Skip to content

Commit

Permalink
reduced files to barebones
Browse files Browse the repository at this point in the history
  • Loading branch information
camwebb committed Nov 26, 2018
1 parent 398ce29 commit 61f02bd
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 30 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
aregex.so: aregex.c
gcc -shared -lgawkextlib -ltre -Wall -g -O2 -fPIC -o aregex.so aregex.c
51 changes: 30 additions & 21 deletions aregex.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,44 @@
* Distributed under the GNU Pulbic Licence v3
*/

#include "common.h"
#include <tre/tre.h> // From the TRE regex lib. https://laurikari.net/tre/
// Minimal headers:
#include <stdio.h>
#include <sys/stat.h>
#include <gawkapi.h>
#include <tre/tre.h>

static awk_value_t * do_aregex(int nargs, awk_value_t *result API_FINFO_ARG)
// Gawkextlib boilerplate:
static const gawk_api_t *api; /* for convenience macros to work */
static awk_ext_id_t ext_id;
int plugin_is_GPL_compatible;

// Main aregex() function definition
static awk_value_t * do_aregex(int nargs, awk_value_t *result, struct awk_ext_func *unused)
{
// Variables for reading awk function's arguments
awk_value_t re;
awk_value_t str;
awk_value_t incost;

// Check args if gawk called with lint
if (do_lint) {
if (nargs > 3)
lintwarn(ext_id, _("aregex() called with >3 arguments"));
lintwarn(ext_id, "aregex() called with >3 arguments");
}

// Set the default max cost
// Set the default max cost, and test for 3rd argument
int defcost = 2;
if (get_argument(2, AWK_NUMBER, &incost)) defcost = incost.num_value ;

// If the string arguments are read...
// If the string arguments (1st and 2nd) are read...
if ((get_argument(0, AWK_STRING, &re)) && \
(get_argument(1, AWK_STRING, &str))) {

// Compile RE
// Compile regex
regex_t preg;
tre_regcomp(&preg, re.str_value.str, REG_EXTENDED);

// Set approx grep params
// Set approx aregex params
regaparams_t params = { 0 };
params.cost_ins = 1;
params.cost_del = 1;
Expand All @@ -41,27 +51,30 @@ static awk_value_t * do_aregex(int nargs, awk_value_t *result API_FINFO_ARG)
params.max_ins = defcost;
params.max_subst = defcost;
params.max_err = defcost;


// Create structure for details of match
regamatch_t match;
match.nmatch = 0; // No partial match arrays needed
match.pmatch = 0; // - ditto -


// Return values
int treret = 0;
int rval = 0;

// Do the approx regexp
treret = tre_regaexec(&preg, str.str_value.str, &match, params, 0);

// Set the do_aregex() return value depending on tre_regaexec() return:
// Return cost (Levenshtein distance) if success, -1 if no match,
// -999 if memory error
if (treret == REG_NOMATCH) rval = -1;
else if (treret == REG_ESPACE) {
rval = -999;
warning(ext_id, \
_("aregex: TRE err.: mem. insufficient to complete the match."));
"aregex: TRE err.: mem. insufficient to complete the match.");
// TO DO?: update_ERRNO_int(). But which ERRNO?
}
else rval = match.cost;

// Return: cost (Levenshtein distance) if success, -1 if no match,
// -999 if memory error

return make_number(rval, result);
}
Expand All @@ -70,21 +83,17 @@ static awk_value_t * do_aregex(int nargs, awk_value_t *result API_FINFO_ARG)
return make_null_string(result);
}


// Gawkextlib boilerplate:

static awk_ext_func_t func_table[] = \
{
{ "aregex", do_aregex, 3, 2, awk_false, NULL },
};

static awk_bool_t init_my_module(void)
{
GAWKEXTLIB_COMMON_INIT
return awk_true;
}
static awk_bool_t (*init_func)(void) = NULL;

static awk_bool_t (*init_func)(void) = init_my_module;
static const char *ext_version = PACKAGE_STRING;
static const char *ext_version = "0.1";

dl_load_func(func_table, fmatch, "")

8 changes: 0 additions & 8 deletions test/aregex.awk

This file was deleted.

1 change: 0 additions & 1 deletion test/aregex.ok

This file was deleted.

18 changes: 18 additions & 0 deletions test_aregex.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
@load "./aregex.so"

BEGIN {
str = "abcdefghi"
cost = 2

re = "^bcdefghij$"
print "RE1: '" re "', string: '" str "', cost: " cost ", match dist: " \
aregex(re, str, cost)

re = "^xcdefghij$"
print "RE2: '" re "', string: '" str "', cost: " cost ", match dist: " \
aregex(re, str, cost)

# generate warning with gawk --lint :
print "RE3: '" re "', string: '" str "', cost: " cost ", match dist: " \
aregex(re, str, cost, "extra1", "extra2")
}

0 comments on commit 61f02bd

Please sign in to comment.