From 398ce29199e8e50ea7d09a212bbdf31ffba5b282 Mon Sep 17 00:00:00 2001 From: Cam Webb Date: Sat, 24 Nov 2018 17:46:16 -0900 Subject: [PATCH] man page --- aregex.c | 29 ++++++++++++++++++----------- doc/aregex.3am | 47 +++++++++++++++-------------------------------- 2 files changed, 33 insertions(+), 43 deletions(-) diff --git a/aregex.c b/aregex.c index ddbe8fa..f77d5bb 100644 --- a/aregex.c +++ b/aregex.c @@ -1,11 +1,11 @@ /* * aregex.c - Gawk extension to access the TRE approximate regex. - * Copyright (C) 2018 Cam Webb - * Distributed under the Gnu Pulbic Licence v3 + * Copyright (C) 2018 Cam Webb, + * Distributed under the GNU Pulbic Licence v3 */ #include "common.h" -#include +#include // From the TRE regex lib. https://laurikari.net/tre/ static awk_value_t * do_aregex(int nargs, awk_value_t *result API_FINFO_ARG) { @@ -19,18 +19,19 @@ static awk_value_t * do_aregex(int nargs, awk_value_t *result API_FINFO_ARG) lintwarn(ext_id, _("aregex() called with >3 arguments")); } - // set the cost + // Set the default max cost int defcost = 2; if (get_argument(2, AWK_NUMBER, &incost)) defcost = incost.num_value ; - // If the string arguments are read + // If the string arguments are read... if ((get_argument(0, AWK_STRING, &re)) && \ (get_argument(1, AWK_STRING, &str))) { // Compile RE regex_t preg; tre_regcomp(&preg, re.str_value.str, REG_EXTENDED); - + + // Set approx grep params regaparams_t params = { 0 }; params.cost_ins = 1; params.cost_del = 1; @@ -42,21 +43,26 @@ static awk_value_t * do_aregex(int nargs, awk_value_t *result API_FINFO_ARG) params.max_err = defcost; regamatch_t match; - match.nmatch = 0; - match.pmatch = 0; + match.nmatch = 0; // No partial match arrays needed + match.pmatch = 0; // - ditto - int treret = 0; int rval = 0; - + + // Do the approx regexp treret = tre_regaexec(&preg, str.str_value.str, &match, params, 0); if (treret == REG_NOMATCH) rval = -1; else if (treret == REG_ESPACE) { rval = -999; - warning(ext_id, _("aregex: TRE error: temporary memory could not be allocated\n to complete the matching operation.")); + warning(ext_id, \ + _("aregex: TRE err.: mem. insufficient to complete the match.")); } else rval = match.cost; + // Return: cost (Levenshtein distance) if success, -1 if no match, + // -999 if memory error + return make_number(rval, result); } @@ -64,7 +70,7 @@ static awk_value_t * do_aregex(int nargs, awk_value_t *result API_FINFO_ARG) return make_null_string(result); } -// Boilerplate: +// Gawkextlib boilerplate: static awk_ext_func_t func_table[] = \ { @@ -81,3 +87,4 @@ static awk_bool_t (*init_func)(void) = init_my_module; static const char *ext_version = PACKAGE_STRING; dl_load_func(func_table, fmatch, "") + diff --git a/doc/aregex.3am b/doc/aregex.3am index fded895..afb1f0d 100644 --- a/doc/aregex.3am +++ b/doc/aregex.3am @@ -1,6 +1,6 @@ .TH AREGEX 3am "Nov 24 2018" "Free Software Foundation" "GNU Awk Extension Modules" .SH NAME -aregex \- access approximate (fuzzy) regex matching from TRE library +\fBaregex\fR \- Approximate (fuzzy) regular expresion or plain string matching using the TRE library .SH SYNOPSIS .ft CW @load "aregex" @@ -14,11 +14,13 @@ extension adds a function named .BR aregex() as follows: .TP -.B aregex() -This function takes an integer argument and returns the result from calling -the -.IR aregex (3) -C library function. If the argument is not numeric, it will return an empty string. +\fBaregex(\fIr \fB, \fIs \fR[\fB, \fIc\fR]\fB)\fR +.TP +This function takes two mandatory string arguments, and an optional third integer argument. \fIr \fRis an extended regular expression or plain string to be matched against string \fIs\fR. The regex (\fIr\fR) is bounded by double-quotes, not by the usual \fIgawk\fR slashes. \fIc\fR is an optional total cost of additions, deletions and substitutions (each of cost = 1). Default value of \fIc\fR is 2. +.TP +The function returns an integer: (i) \fI0\fR or \fI>0\fR on a successful match, with the value being the Levenshtein distance between string \fIs\fR and the closest string expanded from the regex \fIr\fR, (ii) \fI-1\fR for a failure to match (i.e., total cost of approximate match > \fIc\fR), or (iii) \fI-999\fR for an error of the TRE library. +.TP +Place plain text (or regex) between `^' and `$' in \fIr\fR to match two full strings or whole words (i.e., so that \fIr\fR does not just act as a substring). .\" .SH NOTES .\" .SH BUGS .SH EXAMPLE @@ -26,33 +28,14 @@ C library function. If the argument is not numeric, it will return an empty str .nf @load "aregex" \&... -printf "The mapped value of 11 is %s\en", aregex(11) +print "The distance between 'abcdef' and 'abdex' is " \\ + aregex("abcdef", "abdex", 3) .fi .ft R .SH "SEE ALSO" -.BR aregex (3) -.SH AUTHOR -Cam Webb -.SH COPYING PERMISSIONS -Copyright \(co 2018, -Free Software Foundation, Inc. -.PP -Permission is granted to make and distribute verbatim copies of -this manual page provided the copyright notice and this permission -notice are preserved on all copies. -.ig -Permission is granted to process this file through troff and print the -results, provided the printed document carries copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual page). -.. -.PP -Permission is granted to copy and distribute modified versions of this -manual page under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. +The Gawk extension lib: https://sourceforge.net/projects/gawkextlib/ .PP -Permission is granted to copy and distribute translations of this -manual page into another language, under the above conditions for -modified versions, except that this permission notice may be stated in -a translation approved by the Foundation. +TRE library: https://laurikari.net/tre/ +.SH AUTHORS +Cam Webb , @laurikari for the TRE library, the \fIgawkextlib\fR authors +.\" .SH COPYING PERMISSIONS