Skip to content

Commit

Permalink
String#split RegExp separators, general improvements to String
Browse files Browse the repository at this point in the history
- Adds partial support for RegExp separators. Splicing is not yet in,
  and there seem to be issues with some patterns (see the FIXMEs)
- Fixes incorrect behavior with String#split when separator is matched
  at last segment.
- More work on edge cases with Infinity and other large numbers.
  • Loading branch information
ndreynolds committed Apr 7, 2013
1 parent fb042e3 commit d6f03f3
Show file tree
Hide file tree
Showing 8 changed files with 276 additions and 169 deletions.
4 changes: 2 additions & 2 deletions src/eval.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* eval.c -- AST-walking interpreter
*
* Copyright (c) 2012 Nick Reynolds
* Copyright (c) 2012-2013 Nick Reynolds
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
Expand Down Expand Up @@ -33,7 +33,7 @@ fh_eval(js_val *ctx, ast_node *node)
switch(node->type) {
case NODE_BOOL: return JSBOOL(node->val);
case NODE_STR: return JSSTR(node->sval);
case NODE_REGEXP: return JSREGEXP(node->sval);
case NODE_REGEXP: return JSRE(node->sval);
case NODE_NUM: return JSNUM(node->val);
case NODE_NULL: return JSNULL();
case NODE_FUNC: return JSFUNC(node);
Expand Down
20 changes: 18 additions & 2 deletions src/flathead.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,10 @@ fh_new_regexp(char *re)
}

// Store the inner pattern
fh_set(val, "source", JSSTR(fh_str_slice(re, 1, i)));
fh_set_class(val, "RegExp");
if (i > 1)
fh_set(val, "source", JSSTR(fh_str_slice(re, 1, i)));

fh_set_class(val, "RegExp");
return val;
}

Expand Down Expand Up @@ -323,6 +324,21 @@ fh_to_number(js_val *val)
return val;
}

js_val *
fh_to_int(js_val *val)
{
long long int_val;

val = fh_to_number(val);
if (IS_NAN(val))
return JSNUM(0);
if (IS_INF(val) || val->number.val == 0)
return val;
int sign = val->number.val < 0 ? -1 : 1;
int_val = sign * floor(abs(val->number.val));
return JSNUM(int_val);
}

js_val *
fh_to_int32(js_val *val)
{
Expand Down
9 changes: 7 additions & 2 deletions src/flathead.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* flathead.h -- Core types, constructors, casting, and debug.
*
* Copyright (c) 2012 Nick Reynolds
* Copyright (c) 2012-2013 Nick Reynolds
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
Expand Down Expand Up @@ -49,7 +49,7 @@
#define JSARR() fh_new_array()
#define JSFUNC(x) fh_new_function(x)
#define JSNFUNC(x,n) fh_new_native_function(x,n)
#define JSREGEXP(x) fh_new_regexp(x)
#define JSRE(x) fh_new_regexp(x)
#define JSNUMKEY(x) fh_cast(JSNUM((x)), T_STRING)

#define IS_STR(x) ((x)->type == T_STRING)
Expand All @@ -69,6 +69,10 @@
#define TO_BOOL(x) fh_cast((x),T_BOOLEAN)
#define TO_OBJ(x) fh_cast((x),T_OBJECT)

#define TO_INT(x) fh_to_int(x)
#define TO_INT32(x) fh_to_int32(x)
#define TO_UINT32(x) fh_to_uint32(x)

#define ARG(args,n) fh_get_arg((args), (n))
#define ARGLEN(args) fh_arg_len(args)

Expand Down Expand Up @@ -253,6 +257,7 @@ js_val * fh_try_get_proto(char *);
bool fh_is_callable(js_val *);
js_val * fh_to_primitive(js_val *, js_type);
js_val * fh_to_number(js_val *);
js_val * fh_to_int(js_val *);
js_val * fh_to_int32(js_val *);
js_val * fh_to_uint32(js_val *);
js_val * fh_to_string(js_val *);
Expand Down
34 changes: 29 additions & 5 deletions src/regexp.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* regexp.h -- PCRE wrapper
*
* Copyright (c) 2012 Nick Reynolds
* Copyright (c) 2012-2013 Nick Reynolds
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
Expand All @@ -27,8 +27,9 @@
const int regexp_vector_len = 30;


// Gateway to the PCRE library. When compiled with fh_no_regexp, this function
// is still available, but will throw an error when called.
/* Gateway to the PCRE library. Compiles a regular expression pattern and
* returns matches. When compiled with fh_no_regexp, this function is still
* available, but will throw an error when called. */
int *
fh_regexp(char *str, char *pattern, int *count, int offset, bool caseless)
{
Expand All @@ -43,7 +44,8 @@ fh_regexp(char *str, char *pattern, int *count, int offset, bool caseless)

pcre *regexp = pcre_compile(pattern, options, &error, &error_offset, NULL);
if (!regexp)
fh_error(NULL, E_SYNTAX, "Regular expression is not valid");
fh_error(NULL, E_SYNTAX, "Invalid Regular Expression:\n %s at offset %d",
error, error_offset);

rc = pcre_exec(regexp, NULL, str, strlen(str), offset, 0,
output_vector, regexp_vector_len);
Expand All @@ -61,6 +63,28 @@ fh_regexp(char *str, char *pattern, int *count, int offset, bool caseless)
return output_vector;
#else
fh_error(NULL, E_ERROR, "Regular expressions are not available");
return NULL;
UNREACHABLE();
#endif
}

/* Get the number of capturing subpatterns in the regular expression. This is
* used to get the `NCapturingParens` value used in parts of the ECMA spec */
int
fh_regexp_ncaptures(char *pattern)
{
#ifndef fh_no_regexp
const char *error;
int error_offset;
int options = PCRE_JAVASCRIPT_COMPAT;
int captures;
pcre *regexp = pcre_compile(pattern, options, &error, &error_offset, NULL);
if (!regexp)
fh_error(NULL, E_SYNTAX, "Invalid Regular Expression:\n %s at offset %d",
error, error_offset);
pcre_fullinfo(regexp, NULL, PCRE_INFO_CAPTURECOUNT, &captures);
return captures;
#else
fh_error(NULL, E_ERROR, "Regular expressions are not available");
UNREACHABLE();
#endif
}
3 changes: 2 additions & 1 deletion src/regexp.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* regexp.h -- String functions
*
* Copyright (c) 2012 Nick Reynolds
* Copyright (c) 2012-2013 Nick Reynolds
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
Expand All @@ -22,5 +22,6 @@
#define REGEXP_H

int * fh_regexp(char *, char *, int *, int, bool);
int fh_regexp_ncaptures(char *);

#endif
118 changes: 84 additions & 34 deletions src/runtime/lib/String.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,11 @@ str_proto_index_of(js_val *instance, js_args *args, eval_state *state)
char *needle = search_str->string.ptr;
js_val *from = ARG(args, 1);

int i = IS_NUM(from) ? from->number.val : 0;
int match = 0;
long i = TO_INT(from)->number.val;
long match = 0;

int needle_len = strlen(needle);
int haystack_len = strlen(haystack);
long needle_len = strlen(needle);
long haystack_len = strlen(haystack);

// Searching for an empty string returns the fromIndex when less than
// the instance string length, and the instance string length otherwise.
Expand Down Expand Up @@ -249,12 +249,16 @@ str_proto_search(js_val *instance, js_args *args, eval_state *state)
char *str = TO_STR(instance)->string.ptr;
js_val *regexp = ARG(args, 0);

if (!IS_REGEXP(regexp)) {
js_val *tmp = TO_STR(regexp);
regexp = JSRE("");
fh_set(regexp, "source", tmp);
}

char *pattern = fh_get(regexp, "source")->string.ptr;
bool caseless = fh_get_proto(regexp, "ignoreCase")->boolean.val;

int count, *matches = fh_regexp(str, pattern, &count, 0, caseless);


if (!matches)
return JSNUM(-1);

Expand Down Expand Up @@ -289,9 +293,11 @@ str_proto_split(js_val *instance, js_args *args, eval_state *state)
js_val *sep_arg = ARG(args, 0);
js_val *limit_arg = ARG(args, 1);

instance = TO_STR(instance);
js_val *arr = JSARR();

int limit = IS_UNDEF(limit_arg) ? pow(2, 32) - 1 : TO_NUM(limit_arg)->number.val;
unsigned long limit = IS_UNDEF(limit_arg) ?
pow(2, 32) - 1 : TO_UINT32(limit_arg)->number.val;

if (limit == 0)
return arr;
Expand All @@ -301,21 +307,24 @@ str_proto_split(js_val *instance, js_args *args, eval_state *state)
fh_set_len(arr, 1);
return arr;
}
else if (IS_REGEXP(sep_arg)) {
fh_error(state, E_ERROR, "RegExp separators are not yet implemented");
}

char *split; // store the split substring
char *sep = sep_arg->string.ptr; // separator string
char *str = instance->string.ptr; // instance string
int len = instance->string.length; // instance string length
int match = 0; // number of chars of sep matched
int start = 0; // start index to split from
int index = 0; // result array index
int i; // instance string index
int n; // strlen

for (i = 0, n = strlen(str); i < n; i++) {
else if (IS_REGEXP(sep_arg))
return str_regexp_splitter(instance->string.ptr, sep_arg, limit);
else
sep_arg = TO_STR(sep_arg);

char *split; // store the split substring
char *str = instance->string.ptr; // instance string
char *sep = sep_arg->string.ptr; // separator string
int len = instance->string.length; // instance string length
int match = 0; // number of chars of sep matched
int start = 0; // start index to split from
int index = 0; // result array index
int n = strlen(str); // len of str
int i; // instance string index
bool matched_last = false; //

for (i = 0; i < n; i++) {
matched_last = false;
if (str[i] == sep[match])
match++;
else
Expand All @@ -324,16 +333,23 @@ str_proto_split(js_val *instance, js_args *args, eval_state *state)
if (match == (int)strlen(sep)) {
split = fh_str_slice(str, start, i - strlen(sep) + 1);
fh_set(arr, JSNUMKEY(index++)->string.ptr, JSSTR(split));
free(split);
start = i + 1;
match = 0;
matched_last = true;
if (--limit == 0) break;
}
}

// Move the remaining string (possibly all of it) into the array.
if (limit > 0 && start != len) {
split = fh_str_slice(str, start, len);
fh_set(arr, JSNUMKEY(index++)->string.ptr, JSSTR(split));
if (limit > 0) {
if (start != len) {
split = fh_str_slice(str, start, len);
fh_set(arr, JSNUMKEY(index++)->string.ptr, JSSTR(split));
free(split);
}
else if (matched_last && strlen(sep))
fh_set(arr, JSNUMKEY(index++)->string.ptr, JSSTR(""));
}

fh_set_len(arr, index);
Expand All @@ -344,27 +360,27 @@ str_proto_split(js_val *instance, js_args *args, eval_state *state)
js_val *
str_proto_substr(js_val *instance, js_args *args, eval_state *state)
{
int strlen = instance->string.length;
int start = TO_NUM(ARG(args, 0))->number.val;
int length = IS_UNDEF(ARG(args, 1)) ? strlen : TO_NUM(ARG(args, 1))->number.val;
long slen = instance->string.length;
long start = TO_INT(ARG(args, 0))->number.val;
long length = IS_UNDEF(ARG(args, 1)) ? slen : TO_INT(ARG(args, 1))->number.val;

if (start < 0)
start = MAX(start + strlen, 0);
start = MAX(start + slen, 0);

if (MIN(MAX(length, 0), strlen - start) <= 0)
if (MIN(MAX(length, 0), slen - start) <= 0)
return JSSTR("");

int end = MIN(strlen, start + length);
int end = MIN(slen, start + length);
return JSSTR(fh_str_slice(instance->string.ptr, start, end));
}

// String.prototype.substring(start[, end])
js_val *
str_proto_substring(js_val *instance, js_args *args, eval_state *state)
{
int len = instance->string.length;
int start = TO_NUM(ARG(args, 0))->number.val;
int end = IS_UNDEF(ARG(args, 1)) ? len : TO_NUM(ARG(args, 1))->number.val;
long len = instance->string.length;
long start = TO_INT(ARG(args, 0))->number.val;
int end = IS_UNDEF(ARG(args, 1)) ? len : TO_INT(ARG(args, 1))->number.val;

start = MIN(MAX(start, 0), len);
end = MIN(MAX(end, 0), len);
Expand Down Expand Up @@ -482,6 +498,40 @@ str_splice(char *str, char *rep, int start, int end)
return new;
}

js_val *
str_regexp_splitter(char *str, js_val *regexp, int limit)
{
char *source = TO_STR(fh_get_proto(regexp, "source"))->string.ptr;
bool caseless = TO_BOOL(fh_get_proto(regexp, "ignoreCase"))->boolean.val;
// int ncaps = fh_regexp_ncaptures(source);
js_val *arr = JSARR();
int count, *matches;
char *tmp;
unsigned i, j;
bool matched_last = false;

for (i = 0, j = 0; i < strlen(str); j++) {
matched_last = false;
matches = fh_regexp(str, source, &count, i, caseless);
if (count == 0) break;
tmp = fh_str_slice(str, i, matches[0]);
fh_set(arr, JSNUMKEY(j)->string.ptr, JSSTR(tmp));
i = matches[1];
free(matches);
matched_last = true;
}

if (i < strlen(str)) {
tmp = fh_str_slice(str, i, strlen(str));
fh_set(arr, JSNUMKEY(j++)->string.ptr, JSSTR(tmp));
}
else if (matched_last)
fh_set(arr, JSNUMKEY(j++)->string.ptr, JSSTR(""));

fh_set_len(arr, j);
return arr;
}

js_val *
bootstrap_string()
{
Expand Down
1 change: 1 addition & 0 deletions src/runtime/lib/String.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ js_val * str_proto_trim_right(js_val *, js_args *, eval_state *);
js_val * str_proto_value_of(js_val *, js_args *, eval_state *);

char * str_splice(char *, char *, int, int);
js_val * str_regexp_splitter(char *, js_val *, int);

js_val * bootstrap_string(void);

Expand Down
Loading

0 comments on commit d6f03f3

Please sign in to comment.