Skip to content

Commit

Permalink
man page
Browse files Browse the repository at this point in the history
  • Loading branch information
camwebb committed Nov 25, 2018
1 parent a8617b4 commit 398ce29
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 43 deletions.
29 changes: 18 additions & 11 deletions aregex.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
/*
* aregex.c - Gawk extension to access the TRE approximate regex.
* Copyright (C) 2018 Cam Webb
* Distributed under the Gnu Pulbic Licence v3
* Copyright (C) 2018 Cam Webb, <[email protected]>
* Distributed under the GNU Pulbic Licence v3
*/

#include "common.h"
#include <tre/tre.h>
#include <tre/tre.h> // From the TRE regex lib. https://laurikari.net/tre/

static awk_value_t * do_aregex(int nargs, awk_value_t *result API_FINFO_ARG)
{
Expand All @@ -19,18 +19,19 @@ static awk_value_t * do_aregex(int nargs, awk_value_t *result API_FINFO_ARG)
lintwarn(ext_id, _("aregex() called with >3 arguments"));
}

// set the cost
// Set the default max cost
int defcost = 2;
if (get_argument(2, AWK_NUMBER, &incost)) defcost = incost.num_value ;

// If the string arguments are read
// If the string arguments are read...
if ((get_argument(0, AWK_STRING, &re)) && \
(get_argument(1, AWK_STRING, &str))) {

// Compile RE
regex_t preg;
tre_regcomp(&preg, re.str_value.str, REG_EXTENDED);


// Set approx grep params
regaparams_t params = { 0 };
params.cost_ins = 1;
params.cost_del = 1;
Expand All @@ -42,29 +43,34 @@ static awk_value_t * do_aregex(int nargs, awk_value_t *result API_FINFO_ARG)
params.max_err = defcost;

regamatch_t match;
match.nmatch = 0;
match.pmatch = 0;
match.nmatch = 0; // No partial match arrays needed
match.pmatch = 0; // - ditto -

int treret = 0;
int rval = 0;


// Do the approx regexp
treret = tre_regaexec(&preg, str.str_value.str, &match, params, 0);

if (treret == REG_NOMATCH) rval = -1;
else if (treret == REG_ESPACE) {
rval = -999;
warning(ext_id, _("aregex: TRE error: temporary memory could not be allocated\n to complete the matching operation."));
warning(ext_id, \
_("aregex: TRE err.: mem. insufficient to complete the match."));
}
else rval = match.cost;

// Return: cost (Levenshtein distance) if success, -1 if no match,
// -999 if memory error

return make_number(rval, result);
}

// On string argument failure:
return make_null_string(result);
}

// Boilerplate:
// Gawkextlib boilerplate:

static awk_ext_func_t func_table[] = \
{
Expand All @@ -81,3 +87,4 @@ static awk_bool_t (*init_func)(void) = init_my_module;
static const char *ext_version = PACKAGE_STRING;

dl_load_func(func_table, fmatch, "")

47 changes: 15 additions & 32 deletions doc/aregex.3am
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.TH AREGEX 3am "Nov 24 2018" "Free Software Foundation" "GNU Awk Extension Modules"
.SH NAME
aregex \- access approximate (fuzzy) regex matching from TRE library
\fBaregex\fR \- Approximate (fuzzy) regular expresion or plain string matching using the TRE library
.SH SYNOPSIS
.ft CW
@load "aregex"
Expand All @@ -14,45 +14,28 @@ extension adds a function named
.BR aregex()
as follows:
.TP
.B aregex()
This function takes an integer argument and returns the result from calling
the
.IR aregex (3)
C library function. If the argument is not numeric, it will return an empty string.
\fBaregex(\fIr \fB, \fIs \fR[\fB, \fIc\fR]\fB)\fR
.TP
This function takes two mandatory string arguments, and an optional third integer argument. \fIr \fRis an extended regular expression or plain string to be matched against string \fIs\fR. The regex (\fIr\fR) is bounded by double-quotes, not by the usual \fIgawk\fR slashes. \fIc\fR is an optional total cost of additions, deletions and substitutions (each of cost = 1). Default value of \fIc\fR is 2.
.TP
The function returns an integer: (i) \fI0\fR or \fI>0\fR on a successful match, with the value being the Levenshtein distance between string \fIs\fR and the closest string expanded from the regex \fIr\fR, (ii) \fI-1\fR for a failure to match (i.e., total cost of approximate match > \fIc\fR), or (iii) \fI-999\fR for an error of the TRE library.
.TP
Place plain text (or regex) between `^' and `$' in \fIr\fR to match two full strings or whole words (i.e., so that \fIr\fR does not just act as a substring).
.\" .SH NOTES
.\" .SH BUGS
.SH EXAMPLE
.ft CW
.nf
@load "aregex"
\&...
printf "The mapped value of 11 is %s\en", aregex(11)
print "The distance between 'abcdef' and 'abdex' is " \\
aregex("abcdef", "abdex", 3)
.fi
.ft R
.SH "SEE ALSO"
.BR aregex (3)
.SH AUTHOR
Cam Webb
.SH COPYING PERMISSIONS
Copyright \(co 2018,
Free Software Foundation, Inc.
.PP
Permission is granted to make and distribute verbatim copies of
this manual page provided the copyright notice and this permission
notice are preserved on all copies.
.ig
Permission is granted to process this file through troff and print the
results, provided the printed document carries copying permission
notice identical to this one except for the removal of this paragraph
(this paragraph not being relevant to the printed manual page).
..
.PP
Permission is granted to copy and distribute modified versions of this
manual page under the conditions for verbatim copying, provided that
the entire resulting derived work is distributed under the terms of a
permission notice identical to this one.
The Gawk extension lib: https://sourceforge.net/projects/gawkextlib/
.PP
Permission is granted to copy and distribute translations of this
manual page into another language, under the above conditions for
modified versions, except that this permission notice may be stated in
a translation approved by the Foundation.
TRE library: https://laurikari.net/tre/
.SH AUTHORS
Cam Webb <[email protected]>, @laurikari for the TRE library, the \fIgawkextlib\fR authors
.\" .SH COPYING PERMISSIONS

0 comments on commit 398ce29

Please sign in to comment.