Skip to content

Commit

Permalink
2korapxml: Print input and output filenames in warnings
Browse files Browse the repository at this point in the history
... as well as line number, to be able to localise errors.

Change-Id: I58b401a0432b1c21e273793b08cd76553be63a2d
  • Loading branch information
kupietz committed Jun 25, 2024
1 parent 187abd7 commit 67d8c43
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 5 deletions.
4 changes: 2 additions & 2 deletions script/conllu2korapxml
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,10 @@ foreach my $conllu_file (@conllu_files) {
} elsif ( !/^\s*$/ ) {
if ( !$docid || scalar @spansTo == 0 || scalar @spansFrom == 0 ) {
if ( !$docid ) {
$log->warn("WARNING: No valid input document: text_id (e.g. '# text_id = GOE_AGA.00000') missing");
$log->warn("WARNING: Invalid input in $conllu_file: text_id (e.g. '# text_id = GOE_AGA.00000') missing in line $. when writing to $outh");
}
if ( scalar @spansTo == 0 || scalar @spansFrom == 0 ) {
$log->warn("WARNING: No valid input document: token offsets missing");
$log->warn("WARNING: Invalid input in $conllu_file: token offsets missing in line $. when writing to $outh");
}

# Skip to next potentially valid document
Expand Down
5 changes: 2 additions & 3 deletions t/test.t
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,6 @@ like($zipcontent, qr/.*<f name="pos">NN|NN<\/f>.*/, "conllu2korapxml does not ig
script_runs([ 'script/conllu2korapxml', '-l', 'debug', 't/data/goe.ud.conllu' ], {stdout => \$zipcontent}, "Runs conllu2korap with UDPipe and unparsable comments");
script_stderr_like "Foundry:\\s+ud", "Found generator based foundry";
script_stderr_like "Ignored\\s+foundry\\s+name:\\s+base", "Ignore defined foundry";

$zipfile = "$test_tempdir/goe.ud.zip";
open($fh, ">", $zipfile) or fail("cannot open file $zipfile for writing");
print $fh $zipcontent;
Expand All @@ -210,8 +209,8 @@ like($zipcontent, qr@GOE/AGA/00000/ud/dependency\.xml@, "conllu2korapxml UDPipe

script_runs([ 'script/conllu2korapxml', 't/data/deu-deps.conllu' ], "Runs conllu2korap with UDPipe input");
script_stderr_unlike "fileparse(): need a valid pathname", "Ignore sent_id and newdoc id";
script_stderr_like "WARNING: No valid input document.*token offsets missing", "Warn on missing token offsets";
script_stderr_like qr@WARNING: No valid input document.*text.id .*missing@, "Warn on missing text ids";
script_stderr_like qr@WARNING: Invalid input in.*deu-deps.conllu.*token offsets missing.*in line \d+@, "Warn on missing token offsets";
script_stderr_like qr@WARNING: Invalid input in.*deu-deps.conllu.*text.id .*missing.*in line \d+@, "Warn on missing text ids";

script_runs([ 'script/korapxml2conllu', "t/data/nkjp.zip" ], "Runs korapxml2conllu on nkjp test data");
script_stderr_unlike("Use of uninitialized value", "Handles lonely docid parameters (line separated from layer elements)");
Expand Down

0 comments on commit 67d8c43

Please sign in to comment.