Skip to content

Commit

Permalink
Merge pull request #464 from katef/sv/re_strings-endids
Browse files Browse the repository at this point in the history
Add endid support to `re_strings`.
  • Loading branch information
katef authored May 8, 2024
2 parents 25659c2 + 7d9b862 commit f945fda
Show file tree
Hide file tree
Showing 15 changed files with 228 additions and 11 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ SUBDIR += tests/pcre-flags
SUBDIR += tests/pcre-repeat
SUBDIR += tests/pred
SUBDIR += tests/re_literal
SUBDIR += tests/re_strings
SUBDIR += tests/reverse
SUBDIR += tests/trim
SUBDIR += tests/union
Expand Down
6 changes: 6 additions & 0 deletions include/fsm/fsm.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ fsm_setend(struct fsm *fsm, fsm_state_t state, int end);
int
fsm_setendid(struct fsm *fsm, fsm_end_id_t id);

/* Associate a numeric ID with a specific end state in an fsm.
* Returns 1 on success, 0 on error.
* */
int
fsm_setendidstate(struct fsm *fsm, fsm_state_t end_state, fsm_end_id_t id);

/* Get the end IDs associated with an end state, if any.
* If id_buf has enough cells to store all the end IDs (according
* to id_buf_count) then they are written into id_buf[] and
Expand Down
6 changes: 3 additions & 3 deletions include/re/strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#ifndef RE_STRINGS_H
#define RE_STRINGS_H

struct fsm;
#include <fsm/fsm.h>
struct fsm_options;

struct re_strings;
Expand Down Expand Up @@ -42,10 +42,10 @@ void
re_strings_free(struct re_strings *g);

int
re_strings_add_raw(struct re_strings *g, const void *p, size_t n);
re_strings_add_raw(struct re_strings *g, const void *p, size_t n, const fsm_end_id_t *endid);

int
re_strings_add_str(struct re_strings *g, const char *s);
re_strings_add_str(struct re_strings *g, const char *s, const fsm_end_id_t *endid);

struct fsm *
re_strings_build(struct re_strings *g,
Expand Down
10 changes: 10 additions & 0 deletions src/libfsm/endids.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ fsm_setendid(struct fsm *fsm, fsm_end_id_t id)
return 1;
}

int
fsm_setendidstate(struct fsm *fsm, fsm_state_t end_state, fsm_end_id_t id)
{
enum fsm_endid_set_res sres = fsm_endid_set(fsm, end_state, id);
if (sres == FSM_ENDID_SET_ERROR_ALLOC_FAIL) {
return 0;
}
return 1;
}

enum fsm_getendids_res
fsm_getendids(const struct fsm *fsm, fsm_state_t end_state,
size_t id_buf_count, fsm_end_id_t *id_buf,
Expand Down
1 change: 1 addition & 0 deletions src/libfsm/libfsm.syms
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ fsm_getendids
fsm_setendid
fsm_mapendids
fsm_increndids
fsm_setendidstate

fsm_countedges
fsm_countstates
Expand Down
26 changes: 24 additions & 2 deletions src/libre/ac.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <ctype.h>

#include <fsm/fsm.h>
#include <adt/stateset.h>

#include "ac.h"

Expand All @@ -21,6 +22,9 @@ enum { POOL_BLOCK_SIZE = 256 };
struct trie_state {
struct trie_state *children[256];
struct trie_state *fail;
/* use a state set as an endid set */
struct state_set *endids;

fsm_state_t st;
unsigned int index;
unsigned int output:1;
Expand Down Expand Up @@ -73,6 +77,7 @@ newstate(struct trie_graph *g)
st->index = ++g->nstates;

st->output = 0;
st->endids = NULL;

return st;
}
Expand All @@ -86,6 +91,10 @@ cleanup_pool(struct trie_graph *g)
p = g->pool;
g->pool = p->next;

for (size_t i = 0; i < p->n; i++) {
state_set_free(p->states[i].endids);
}

free(p->states);
free(p);
}
Expand Down Expand Up @@ -126,7 +135,7 @@ trie_create(void)
}

struct trie_graph *
trie_add_word(struct trie_graph *g, const char *w, size_t n)
trie_add_word(struct trie_graph *g, const char *w, size_t n, const fsm_end_id_t *endid)
{
struct trie_state *st;
size_t i;
Expand Down Expand Up @@ -159,6 +168,9 @@ trie_add_word(struct trie_graph *g, const char *w, size_t n)
g->depth = n;
}

if (endid != NULL) {
state_set_add(&st->endids, NULL, (fsm_state_t)*endid);
}
return g;
}

Expand Down Expand Up @@ -278,7 +290,7 @@ trie_to_fsm_state(struct trie_state *ts, struct fsm *fsm,
assert(fsm != NULL);
assert(q != NULL);

if (ts->output && have_end) {
if (ts->output && have_end && state_set_empty(ts->endids)) {
*q = single_end;
return 1;
}
Expand Down Expand Up @@ -315,6 +327,16 @@ trie_to_fsm_state(struct trie_state *ts, struct fsm *fsm,

if (ts->output) {
fsm_setend(fsm, st, 1);

struct state_iter si;
fsm_state_t state;
state_set_reset(ts->endids, &si);
while (state_set_next(&si, &state)) {
fsm_end_id_t endid = (fsm_end_id_t)state;
if (!fsm_setendidstate(fsm, st, endid)) {
return 0;
}
}
}

*q = st;
Expand Down
5 changes: 4 additions & 1 deletion src/libre/ac.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#ifndef AC_H
#define AC_H

#include "fsm/fsm.h"

struct fsm;
struct fsm_state;
struct fsm_options;
Expand All @@ -20,7 +22,8 @@ void
trie_free(struct trie_graph *g);

struct trie_graph *
trie_add_word(struct trie_graph *g, const char *w, size_t n);
trie_add_word(struct trie_graph *g, const char *w, size_t n,
const fsm_end_id_t *endid);

int
trie_add_failure_edges(struct trie_graph *g);
Expand Down
10 changes: 5 additions & 5 deletions src/libre/re_strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ re_strings(const struct fsm_options *opt, const char *a[], size_t n,
}

for (i = 0; i < n; i++) {
if (!re_strings_add_str(g, a[i])) {
if (!re_strings_add_str(g, a[i], NULL)) {
goto error;
}
}
Expand Down Expand Up @@ -64,20 +64,20 @@ re_strings_free(struct re_strings *g)
}

int
re_strings_add_raw(struct re_strings *g, const void *p, size_t n)
re_strings_add_raw(struct re_strings *g, const void *p, size_t n, const fsm_end_id_t *endid)
{
assert(p != NULL);
assert(n > 0);

return trie_add_word((struct trie_graph *) g, p, n) != NULL;
return trie_add_word((struct trie_graph *) g, p, n, endid) != NULL;
}

int
re_strings_add_str(struct re_strings *g, const char *s)
re_strings_add_str(struct re_strings *g, const char *s, const fsm_end_id_t *endid)
{
assert(s != NULL);

return re_strings_add_raw(g, s, strlen(s));
return re_strings_add_raw(g, s, strlen(s), endid);
}

struct fsm *
Expand Down
26 changes: 26 additions & 0 deletions tests/re_strings/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
.include "../../share/mk/top.mk"

TEST.tests/re_strings != ls -1 tests/re_strings/re_strings*.c
TEST_SRCDIR.tests/re_strings = tests/re_strings
TEST_OUTDIR.tests/re_strings = ${BUILD}/tests/re_strings

.for n in ${TEST.tests/re_strings:T:R:C/^re_strings//}
test:: ${TEST_OUTDIR.tests/re_strings}/res${n}
SRC += ${TEST_SRCDIR.tests/re_strings}/re_strings${n}.c
CFLAGS.${TEST_SRCDIR.tests/re_strings}/re_strings${n}.c = -UNDEBUG

${TEST_OUTDIR.tests/re_strings}/run${n}: ${TEST_OUTDIR.tests/re_strings}/re_strings${n}.o ${TEST_OUTDIR.tests/re_strings}/testutil.o
${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/re_strings}/run${n} ${TEST_OUTDIR.tests/re_strings}/re_strings${n}.o ${TEST_OUTDIR.tests/re_strings}/testutil.o ${BUILD}/lib/libfsm.a ${BUILD}/lib/libre.a

${TEST_OUTDIR.tests/re_strings}/re_strings${n}.o: tests/re_strings/testutil.h

${TEST_OUTDIR.tests/re_strings}/res${n}: ${TEST_OUTDIR.tests/re_strings}/run${n}
( ${TEST_OUTDIR.tests/re_strings}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/re_strings}/res${n}

.for lib in ${LIB:Mlibfsm} ${LIB:Mlibre}
${TEST_OUTDIR.tests/re_strings}/run${n}: ${BUILD}/lib/${lib:R}.a
.endfor
.endfor

${TEST_OUTDIR.tests/re_strings}/testutil.o: tests/re_strings/testutil.c
${CC} ${CFLAGS} -c -o ${TEST_OUTDIR.tests/re_strings}/testutil.o tests/re_strings/testutil.c
21 changes: 21 additions & 0 deletions tests/re_strings/re_strings1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include "testutil.h"

const char *strings[] = {
"aa",
"ab",
"ac",
"ba",
"bb",
"bc",
"ca",
"cb",
"cc",
NULL,
};

int main(int argc, char **argv)
{
(void)argc;
(void)argv;
return run_test(strings);
}
17 changes: 17 additions & 0 deletions tests/re_strings/re_strings2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "testutil.h"

const char *strings[] = {
"first",
"duplicate",
"duplicate",
"duplicate",
"last",
NULL,
};

int main(int argc, char **argv)
{
(void)argc;
(void)argv;
return run_test(strings);
}
15 changes: 15 additions & 0 deletions tests/re_strings/re_strings3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "testutil.h"

const char *strings[] = {
"duplicate",
"duplicate",
"duplicate",
NULL,
};

int main(int argc, char **argv)
{
(void)argc;
(void)argv;
return run_test(strings);
}
13 changes: 13 additions & 0 deletions tests/re_strings/re_strings4.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "testutil.h"

const char *strings[] = {
/* empty */
NULL,
};

int main(int argc, char **argv)
{
(void)argc;
(void)argv;
return run_test(strings);
}
71 changes: 71 additions & 0 deletions tests/re_strings/testutil.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#include "testutil.h"

#include <stdbool.h>
#include <assert.h>

#include "fsm/fsm.h"
#include "fsm/options.h"

#include "re/re.h"
#include "re/strings.h"

static struct fsm_options opt;

#define MAX_INPUTS 100
static fsm_end_id_t id_buf[MAX_INPUTS];

int
run_test(const char **strings)
{
struct re_strings *s = re_strings_new();
assert(s != NULL);

fsm_end_id_t id = 0;
const char **input = strings;
while (*input != NULL) {
if (!re_strings_add_str(s, *input, &id)) {
assert(!"re_strings_add_str");
}

input++;
id++;
assert(id < MAX_INPUTS);
}

const int flags = 0; /* not anchored */

struct fsm *fsm = re_strings_build(s, &opt, flags);
assert(fsm != NULL);

/* Each literal string input should match, and the set of
* matching endids should include the expected one. */
id = 0;
input = strings;
while (*input != NULL) {
fsm_state_t end;
const char **string = input;
const int res = fsm_exec(fsm, fsm_sgetc, string, &end, NULL);
assert(res > 0); /* match */

size_t written;
enum fsm_getendids_res eres = fsm_getendids(fsm, end,
MAX_INPUTS, id_buf, &written);
assert(eres == FSM_GETENDIDS_FOUND);
bool found = false;
for (size_t i = 0; i < written; i++) {
if (id_buf[i] == id) {
found = true;
break;
}
}
assert(found);

input++;
id++;
}

re_strings_free(s);
fsm_free(fsm);

return EXIT_SUCCESS;
}
11 changes: 11 additions & 0 deletions tests/re_strings/testutil.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef TESTUTIL_H
#define TESTUTIL_H

#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>

int
run_test(const char **strings);

#endif

0 comments on commit f945fda

Please sign in to comment.