Skip to content

Commit

Permalink
Enhance and cleanup lexer-parser interface (#107)
Browse files Browse the repository at this point in the history
This commit separates lex operations, allowing developers to control the
behavior of preprocessor aliasing. It also replaces 'preproc_aliasing'.
Additionally, the commit refines control flow functions related to
preprocessor directives: it consolidates if_elif_skip_lines and
ifdef_else_skip_lines into a single function. This change is due to
their similar functionalities, and it now encompasses all possible
combinations of control flow. In the previous design, the former did not
cover #else, and the latter overlooked #elif. Furthermore, this commit
improves the naming of lex-related functions.
  • Loading branch information
ChAoSUnItY authored Jan 17, 2024
1 parent 4b83bb2 commit 33a7a90
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 97 deletions.
110 changes: 56 additions & 54 deletions src/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,6 @@ int skip_newline = 1;

int preproc_match;

/* Allow replacing identifiers with alias value if alias exists. This is
* disabled in certain cases, e.g. #undef.
*/
int preproc_aliasing = 1;

/* Point to the first character after where the macro has been called. It is
* needed when returning from the macro body.
*/
Expand Down Expand Up @@ -174,44 +169,15 @@ char read_char(int is_skip_space)
return next_char;
}

/* get alias name from defined() directive
* i.e., get __arm__ from defined(__arm__)
*/
void read_alias_name_from_defined(char *alias_name, char *src)
{
int i;

src = src + 8; /* skip defined( */
i = 0;
while (src[i] != ')') {
alias_name[i] = src[i];
i++;
}
alias_name[i] = 0;
}

char peek_char(int offset)
{
return SOURCE[source_idx + offset];
}

/* check alias defined or not */
void chk_def(int defined)
{
char *alias = NULL;
char alias_name[MAX_TOKEN_LEN];

if (defined) {
read_alias_name_from_defined(alias_name, token_str);
alias = find_alias(alias_name);
} else
alias = find_alias(token_str);

if (alias)
preproc_match = 1;
}

token_t get_next_token()
/* Lex next token and returns its token type. Parameter `aliasing` is used for
* disable preprocessor aliasing on identifier tokens.
*/
token_t lex_token_internal(int aliasing)
{
token_str[0] = 0;

Expand Down Expand Up @@ -257,7 +223,7 @@ token_t get_next_token()
read_char(0);
if (next_char == '/') {
read_char(1);
return get_next_token();
return lex_token_internal(aliasing);
}
}
} while (next_char);
Expand Down Expand Up @@ -549,7 +515,7 @@ token_t get_next_token()
if (!strcmp(token_str, "continue"))
return T_continue;

if (preproc_aliasing) {
if (aliasing) {
alias = find_alias(token_str);
if (alias) {
token_t t = is_numeric(alias) ? T_numeric : T_string;
Expand All @@ -570,7 +536,7 @@ token_t get_next_token()
next_char = SOURCE[source_idx];
} else
next_char = read_char(1);
return get_next_token();
return lex_token_internal(aliasing);
}

if (next_char == 0)
Expand All @@ -582,30 +548,45 @@ token_t get_next_token()
return T_eof;
}

/* Lex next token and returns its token type. To disable aliasing on next
* token, use `lex_token_internal`. */
token_t lex_token()
{
return lex_token_internal(1);
}

/* Skip the content. We only need the index where the macro body begins. */
void skip_macro_body()
{
while (!is_newline(next_char))
next_token = get_next_token();
next_token = lex_token();

skip_newline = 1;
next_token = get_next_token();
next_token = lex_token();
}

int lex_accept(token_t token)
/* Accepts next token if token types are matched. */
int lex_accept_internal(token_t token, int aliasing)
{
if (next_token == token) {
/* FIXME: this is a hack, fix aggressive aliasing first */
if (token == T_cppd_ifdef)
preproc_aliasing = 0;
next_token = get_next_token();
if (token == T_cppd_ifdef)
preproc_aliasing = 1;
next_token = lex_token_internal(aliasing);
return 1;
}

return 0;
}

/* Accepts next token if token types are matched. To disable aliasing
* on next token, use `lex_accept_internal`.
*/
int lex_accept(token_t token)
{
return lex_accept_internal(token, 1);
}

/* Peeks next token and copy token's literal to value if token types
* are matched.
*/
int lex_peek(token_t token, char *value)
{
if (next_token == token) {
Expand All @@ -617,17 +598,38 @@ int lex_peek(token_t token, char *value)
return 0;
}

void lex_ident(token_t token, char *value)
/* Strictly match next token with given token type and copy token's
* literal to value.
*/
void lex_ident_internal(token_t token, char *value, int aliasing)
{
if (next_token != token)
error("Unexpected token");
strcpy(value, token_str);
next_token = get_next_token();
next_token = lex_token_internal(aliasing);
}

void lex_expect(token_t token)
/* Strictly match next token with given token type and copy token's
* literal to value. To disable aliasing on next token, use
* `lex_ident_internal`.
*/
void lex_ident(token_t token, char *value)
{
lex_ident_internal(token, value, 1);
}

/* Strictly match next token with given token type. */
void lex_expect_internal(token_t token, int aliasing)
{
if (next_token != token)
error("Unexpected token");
next_token = get_next_token();
next_token = lex_token_internal(aliasing);
}

/* Strictly match next token with given token type. To disable aliasing
* on next token, use `lex_expect_internal`.
*/
void lex_expect(token_t token)
{
lex_expect_internal(token, 1);
}
67 changes: 24 additions & 43 deletions src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ int get_size(var_t *var, type_t *type)
* whitespace */
void skip_line(int invalidate)
{
/* FIXME: Comments will causes current validation failed. */
skip_whitespace();
do {
if (invalidate && !is_whitespace(peek_char(0)) &&
Expand All @@ -75,28 +76,14 @@ void skip_line(int invalidate)
} while (read_char(0) != '\n');
}

void if_elif_skip_lines()
{
char peek_c;
int i;

do {
skip_whitespace();
i = 0;
do {
token_str[i++] = next_char;
} while (read_char(0) != '\n');
token_str[i] = 0;
read_char(1);
peek_c = peek_char(1);
} while (next_char != '#' || (next_char == '#' && peek_c == 'd'));
skip_whitespace();
}

void ifdef_else_skip_lines()
/* Skips lines where preprocessor match is false, this will stop once next
* token is either `T_cppd_elif`, `T_cppd_else` or `cppd_endif`.
*/
void cppd_control_flow_skip_lines()
{
while (!lex_peek(T_cppd_else, NULL) && !lex_peek(T_cppd_endif, NULL)) {
next_token = get_next_token();
while (!lex_peek(T_cppd_elif, NULL) && !lex_peek(T_cppd_else, NULL) &&
!lex_peek(T_cppd_endif, NULL)) {
next_token = lex_token();
}
skip_whitespace();
}
Expand All @@ -111,12 +98,10 @@ void read_defined_macro()
{
char lookup_alias[MAX_TOKEN_LEN];

preproc_aliasing = 0; /* to prevent aggressive aliasing */
lex_expect(T_identifier); /* defined */
lex_expect(T_open_bracket);
lex_expect_internal(T_open_bracket, 0);
lex_ident(T_identifier, lookup_alias);
lex_expect(T_close_bracket);
preproc_aliasing = 1;

check_def(lookup_alias);
}
Expand Down Expand Up @@ -169,10 +154,8 @@ int read_preproc_directive()
if (lex_peek(T_cppd_undef, token)) {
char alias[MAX_VAR_LEN];

preproc_aliasing = 0;
lex_expect(T_cppd_undef);
lex_expect_internal(T_cppd_undef, 0);
lex_peek(T_identifier, alias);
preproc_aliasing = 1;
lex_expect(T_identifier);

remove_alias(alias);
Expand Down Expand Up @@ -201,7 +184,7 @@ int read_preproc_directive()
return 1;
}

if_elif_skip_lines();
cppd_control_flow_skip_lines();
} else {
/* TODO: parse and evaluate constant expression here */
}
Expand All @@ -210,7 +193,7 @@ int read_preproc_directive()
if (lex_accept(T_cppd_elif)) {
if (preproc_match) {
while (!lex_peek(T_cppd_endif, NULL)) {
next_token = get_next_token();
next_token = lex_token();
}
return 1;
}
Expand All @@ -223,7 +206,7 @@ int read_preproc_directive()
return 1;
}

if_elif_skip_lines();
cppd_control_flow_skip_lines();
} else {
/* TODO: parse and evaluate constant expression here */
}
Expand All @@ -240,16 +223,15 @@ int read_preproc_directive()
return 1;
}

/* skip lines until #else or #endif */
ifdef_else_skip_lines();
cppd_control_flow_skip_lines();
return 1;
}
if (lex_accept(T_cppd_endif)) {
preproc_match = 0;
skip_whitespace();
return 1;
}
if (lex_accept(T_cppd_ifdef)) {
if (lex_accept_internal(T_cppd_ifdef, 0)) {
preproc_match = 0;
lex_ident(T_identifier, token);
check_def(token);
Expand All @@ -259,8 +241,7 @@ int read_preproc_directive()
return 1;
}

/* skip lines until #else or #endif */
ifdef_else_skip_lines();
cppd_control_flow_skip_lines();
return 1;
}

Expand Down Expand Up @@ -677,12 +658,12 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
for (i = 0; i < remainder; i++) {
source_idx = macro->params[macro->num_params - remainder + i];
next_char = SOURCE[source_idx];
next_token = get_next_token();
next_token = lex_token();
read_expr(parent, bb);
}
source_idx = t;
next_char = SOURCE[source_idx];
next_token = get_next_token();
next_token = lex_token();
} else if (mac) {
if (parent->macro)
error("Nested macro is not yet supported");
Expand All @@ -695,7 +676,7 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
while (!lex_peek(T_close_bracket, NULL)) {
mac->params[mac->num_params++] = source_idx;
do {
next_token = get_next_token();
next_token = lex_token();
} while (next_token != T_comma &&
next_token != T_close_bracket);
}
Expand All @@ -717,11 +698,11 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
int t = source_idx;
source_idx = macro_param_idx;
next_char = SOURCE[source_idx];
next_token = get_next_token();
next_token = lex_token();
read_expr(parent, bb);
source_idx = t;
next_char = SOURCE[source_idx];
next_token = get_next_token();
next_token = lex_token();
} else if (con) {
ph1_ir = add_ph1_ir(OP_load_constant);
vd = require_var(parent);
Expand Down Expand Up @@ -1599,15 +1580,15 @@ void eval_ternary_imm(int cond, char *token)
{
if (cond == 0) {
while (next_token != T_colon) {
next_token = get_next_token();
next_token = lex_token();
}
lex_accept(T_colon);
read_global_assignment(token);
} else {
read_global_assignment(token);
lex_expect(T_colon);
while (!lex_peek(T_semicolon, NULL)) {
next_token = get_next_token();
next_token = lex_token();
}
}
}
Expand Down Expand Up @@ -2453,7 +2434,7 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
while (!lex_peek(T_close_bracket, NULL)) {
mac->params[mac->num_params++] = source_idx;
do {
next_token = get_next_token();
next_token = lex_token();
} while (next_token != T_comma && next_token != T_close_bracket);
}
/* move `source_idx` to the macro body */
Expand Down

0 comments on commit 33a7a90

Please sign in to comment.