Skip to content

Commit

Permalink
Correctly wrap double quoted strings
Browse files Browse the repository at this point in the history
Also make long plain strings quoted if they go over the with limit

Signed-off-by: Pantelis Antoniou <[email protected]>
  • Loading branch information
pantoniou committed Jan 22, 2025
1 parent 03bc8a2 commit 9ddcfb5
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 64 deletions.
83 changes: 59 additions & 24 deletions src/lib/fy-emit.c
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,7 @@ void fy_emit_token_write_plain(struct fy_emitter *emit, struct fy_token *fyt, in
fy_emit_output_accum(emit, wtype, &emit->ea);
emit->flags &= ~FYEF_INDENTATION;
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);
} else
fy_emit_accum_utf8_put(&emit->ea, c);

Expand All @@ -798,17 +799,21 @@ void fy_emit_token_write_plain(struct fy_emitter *emit, struct fy_token *fyt, in
if (!breaks) {
fy_emit_output_accum(emit, wtype, &emit->ea);
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);
}

emit->flags &= ~FYEF_INDENTATION;
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);

breaks = true;

} else {

if (breaks)
if (breaks) {
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);
}

fy_emit_accum_utf8_put(&emit->ea, c);

Expand Down Expand Up @@ -857,6 +862,7 @@ void fy_emit_token_write_alias(struct fy_emitter *emit, struct fy_token *fyt, in

void fy_emit_token_write_quoted(struct fy_emitter *emit, struct fy_token *fyt, int flags, int indent, char qc)
{
const struct fy_token_analysis *ta;
bool allow_breaks, spaces, breaks;
int c, i, w, digit;
enum fy_emitter_write_type wtype;
Expand All @@ -869,6 +875,7 @@ void fy_emit_token_write_quoted(struct fy_emitter *emit, struct fy_token *fyt, i
uint32_t hi_surrogate, lo_surrogate;
uint8_t non_utf8[4];
size_t non_utf8_len, k;
int emit_width;

wtype = qc == '\'' ?
((flags & DDNF_SIMPLE_SCALAR_KEY) ?
Expand All @@ -889,9 +896,15 @@ void fy_emit_token_write_quoted(struct fy_emitter *emit, struct fy_token *fyt, i
*/
target_style = qc == '"' ? FYAS_DOUBLE_QUOTED : FYAS_SINGLE_QUOTED;

allow_breaks = !(flags & DDNF_SIMPLE) && !fy_emit_is_json_mode(emit) && !fy_emit_is_oneline(emit);
emit_width = fy_emit_width(emit);

ta = fy_token_text_analyze(fyt);

/* simple case of direct output (large amount of cases) */
str = fy_token_get_direct_output(fyt, &len);
if (str && fy_token_atom_style(fyt) == target_style) {
if (str && fy_token_atom_style(fyt) == target_style &&
(ta->flags & FYTTAF_DIRECT_OUTPUT) && emit->column + ta->maxcol < emit_width) {
fy_emit_write(emit, wtype, str, len);
goto out;
}
Expand All @@ -901,8 +914,6 @@ void fy_emit_token_write_quoted(struct fy_emitter *emit, struct fy_token *fyt, i
if (!atom)
goto out;

allow_breaks = !(flags & DDNF_SIMPLE) && !fy_emit_is_json_mode(emit) && !fy_emit_is_oneline(emit);

spaces = false;
breaks = false;

Expand Down Expand Up @@ -931,7 +942,7 @@ void fy_emit_token_write_quoted(struct fy_emitter *emit, struct fy_token *fyt, i

if (fy_is_space(c) || (qc == '\'' && fy_is_ws(c))) {
should_indent = allow_breaks && !spaces &&
fy_emit_accum_column(&emit->ea) > fy_emit_width(emit);
fy_emit_accum_column(&emit->ea) >= emit_width;

if (should_indent &&
((qc == '\'' && fy_is_ws(fy_atom_iter_utf8_peek(&iter))) ||
Expand All @@ -943,6 +954,7 @@ void fy_emit_token_write_quoted(struct fy_emitter *emit, struct fy_token *fyt, i

emit->flags &= ~FYEF_INDENTATION;
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);
} else
fy_emit_accum_utf8_put(&emit->ea, c);

Expand All @@ -959,17 +971,30 @@ void fy_emit_token_write_quoted(struct fy_emitter *emit, struct fy_token *fyt, i
if (!breaks) {
fy_emit_output_accum(emit, wtype, &emit->ea);
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);
}

emit->flags &= ~FYEF_INDENTATION;
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);

breaks = true;
} else {
/* output run */
if (breaks) {
fy_emit_output_accum(emit, wtype, &emit->ea);
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);

} else if (qc == '"' && allow_breaks && fy_emit_accum_column(&emit->ea) >= emit_width) {

fy_emit_output_accum(emit, wtype, &emit->ea);

fy_emit_putc(emit, wtype, '\\');

emit->flags &= ~FYEF_INDENTATION;
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);
}

/* escape */
Expand Down Expand Up @@ -1179,6 +1204,7 @@ void fy_emit_token_write_literal(struct fy_emitter *emit, struct fy_token *fyt,

if (breaks) {
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);
breaks = false;
}

Expand Down Expand Up @@ -1232,8 +1258,10 @@ void fy_emit_token_write_folded(struct fy_emitter *emit, struct fy_token *fyt, i
fy_emit_output_accum(emit, fyewt_literal_scalar, &emit->ea);
/* do not output a newline (indent) if at the end or
* this is a leading spaces line */
if (!fy_is_z(fy_atom_iter_utf8_peek(&iter)) && !leading_spaces)
if (!fy_is_z(fy_atom_iter_utf8_peek(&iter)) && !leading_spaces) {
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);
}
}

/* count the number of consecutive breaks */
Expand All @@ -1252,6 +1280,7 @@ void fy_emit_token_write_folded(struct fy_emitter *emit, struct fy_token *fyt, i
while (nrbreaks-- > nrbreakslim) {
emit->flags &= ~FYEF_INDENTATION;
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);
}

breaks = true;
Expand All @@ -1261,6 +1290,7 @@ void fy_emit_token_write_folded(struct fy_emitter *emit, struct fy_token *fyt, i
/* if we had a break, output an indent */
if (breaks) {
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);

/* if this line starts with whitespace we need to know */
leading_spaces = fy_is_ws(c);
Expand All @@ -1272,6 +1302,7 @@ void fy_emit_token_write_folded(struct fy_emitter *emit, struct fy_token *fyt, i
fy_emit_output_accum(emit, fyewt_folded_scalar, &emit->ea);
emit->flags &= ~FYEF_INDENTATION;
fy_emit_write_indent(emit, indent);
fy_emit_output_col_sync(emit, &emit->ea);
} else
fy_emit_accum_utf8_put(&emit->ea, c);

Expand All @@ -1288,9 +1319,9 @@ fy_emit_token_scalar_style(struct fy_emitter *emit, struct fy_token *fyt,
int flags, int indent, enum fy_node_style style,
struct fy_token *fyt_tag)
{
const struct fy_token_analysis *ta = NULL;
bool json, flow, is_null_scalar, is_json_plain;
struct fy_atom *atom;
int aflags = -1;
const char *tag;
size_t tag_len;

Expand Down Expand Up @@ -1337,61 +1368,63 @@ fy_emit_token_scalar_style(struct fy_emitter *emit, struct fy_token *fyt,
return FYNS_DOUBLE_QUOTED;
}

aflags = fy_token_text_analyze(fyt);
ta = fy_token_text_analyze(fyt);

if (flow && (style == FYNS_ANY || style == FYNS_LITERAL || style == FYNS_FOLDED)) {

/* if there's a linebreak, use double quoted style */
if (aflags & FYTTAF_HAS_ANY_LB) {
if (ta->flags & FYTTAF_HAS_ANY_LB) {
style = FYNS_DOUBLE_QUOTED;
goto out;
}

if (!(aflags & FYTTAF_HAS_NON_PRINT)) {
if (!(ta->flags & FYTTAF_HAS_NON_PRINT)) {
style = FYNS_SINGLE_QUOTED;
goto out;
}

/* anything not empty is double quoted here */
style = !(aflags & FYTTAF_EMPTY) ? FYNS_PLAIN : FYNS_DOUBLE_QUOTED;
style = !(ta->flags & FYTTAF_EMPTY) ? FYNS_PLAIN : FYNS_DOUBLE_QUOTED;
}

/* try to pretify */
if (!flow && fy_emit_is_pretty_mode(emit) &&
(style == FYNS_ANY || style == FYNS_DOUBLE_QUOTED || style == FYNS_SINGLE_QUOTED)) {

/* any original style can be a plain, but contains linebreaks, do a literal */
if ((aflags & (FYTTAF_CAN_BE_PLAIN | FYTTAF_HAS_LB)) == (FYTTAF_CAN_BE_PLAIN | FYTTAF_HAS_LB)) {
if ((ta->flags & (FYTTAF_CAN_BE_PLAIN | FYTTAF_HAS_LB)) == (FYTTAF_CAN_BE_PLAIN | FYTTAF_HAS_LB)) {
style = FYNS_LITERAL;
goto out;
}

/* any style, can be just a plain, just make it so */
if (style == FYNS_ANY && (aflags & (FYTTAF_CAN_BE_PLAIN | FYTTAF_HAS_LB)) == FYTTAF_CAN_BE_PLAIN) {
if ((ta->flags & (FYTTAF_CAN_BE_PLAIN | FYTTAF_HAS_LB)) == FYTTAF_CAN_BE_PLAIN) {
style = FYNS_PLAIN;
goto out;
}

}

if (!flow && emit->source_json && fy_emit_is_dejson_mode(emit)) {
if (is_json_plain || (aflags & (FYTTAF_CAN_BE_PLAIN | FYTTAF_HAS_LB)) == FYTTAF_CAN_BE_PLAIN) {
if (is_json_plain || (ta->flags & (FYTTAF_CAN_BE_PLAIN | FYTTAF_HAS_LB)) == FYTTAF_CAN_BE_PLAIN) {
style = FYNS_PLAIN;
goto out;
}
}

out:
if (style == FYNS_ANY)
style = (aflags & FYTTAF_CAN_BE_PLAIN) ? FYNS_PLAIN : FYNS_DOUBLE_QUOTED;
style = (ta->flags & FYTTAF_CAN_BE_PLAIN) ? FYNS_PLAIN : FYNS_DOUBLE_QUOTED;

if (style == FYNS_PLAIN) {
/* plains in flow mode not being able to be plains
* - plain in block mode that can't be plain in flow mode
* - special handling for plains on start of line
*/
if ((flow && !(aflags & FYTTAF_CAN_BE_PLAIN_FLOW) && !(aflags & FYTTAF_CAN_BE_SIMPLE_KEY) && !is_null_scalar) ||
((aflags & FYTTAF_QUOTE_AT_0) && indent == 0))
if ((flow && !(ta->flags & FYTTAF_CAN_BE_PLAIN_FLOW) && !(ta->flags & FYTTAF_CAN_BE_SIMPLE_KEY) && !is_null_scalar) ||
((ta->flags & FYTTAF_QUOTE_AT_0) && indent == 0))
style = FYNS_DOUBLE_QUOTED;

if (style == FYNS_PLAIN && (emit->column + ta->maxspan) > fy_emit_width(emit))
style = FYNS_DOUBLE_QUOTED;
}

Expand Down Expand Up @@ -1687,7 +1720,8 @@ void fy_emit_mapping(struct fy_emitter *emit, struct fy_node *fyn, int flags, in
struct fy_node_pair *fynp, *fynpn, **fynpp = NULL;
struct fy_token *fyt_key, *fyt_value;
bool last, simple_key, used_malloc = false;
int aflags, i, count;
const struct fy_token_analysis *ta;
int i, count;
struct fy_emit_save_ctx sct, *sc = &sct;

memset(sc, 0, sizeof(*sc));
Expand Down Expand Up @@ -1759,9 +1793,9 @@ void fy_emit_mapping(struct fy_emitter *emit, struct fy_node *fyn, int flags, in
if (fynp->key) {
switch (fynp->key->type) {
case FYNT_SCALAR:
aflags = fy_token_text_analyze(fynp->key->scalar);
ta = fy_token_text_analyze(fynp->key->scalar);
simple_key = fy_emit_is_json_mode(emit) ||
!!(aflags & FYTTAF_CAN_BE_SIMPLE_KEY);
!!(ta->flags & FYTTAF_CAN_BE_SIMPLE_KEY);
break;
case FYNT_SEQUENCE:
simple_key = fy_node_list_empty(&fynp->key->sequence);
Expand Down Expand Up @@ -3266,7 +3300,8 @@ static int fy_emit_handle_mapping_key(struct fy_emitter *emit, struct fy_parser
struct fy_event *fye = &fyep->e;
struct fy_emit_save_ctx *sc = &emit->s_sc;
struct fy_token *fyt_key = NULL;
int ret, aflags;
const struct fy_token_analysis *ta;
int ret;
bool simple_key;

fy_emit_token_unref(emit, fyp, sc->fyt_last_key);
Expand Down Expand Up @@ -3301,8 +3336,8 @@ static int fy_emit_handle_mapping_key(struct fy_emitter *emit, struct fy_parser
break;
case FYET_SCALAR:
fyt_key = fye->scalar.value;
aflags = fy_token_text_analyze(fyt_key);
simple_key = !!(aflags & FYTTAF_CAN_BE_SIMPLE_KEY);
ta = fy_token_text_analyze(fyt_key);
simple_key = !!(ta->flags & FYTTAF_CAN_BE_SIMPLE_KEY);
break;
case FYET_SEQUENCE_START:
fyt_key = fye->sequence_start.sequence_start;
Expand Down
6 changes: 6 additions & 0 deletions src/lib/fy-emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,4 +153,10 @@ fy_emit_output_accum(struct fy_emitter *emit, enum fy_emitter_write_type type, s
fy_emit_accum_reset(ea);
}

static inline void
fy_emit_output_col_sync(struct fy_emitter *emit, struct fy_emit_accum *ea)
{
ea->col += emit->column;
}

#endif
Loading

0 comments on commit 9ddcfb5

Please sign in to comment.