From 67d8c43cd0ec74481e4cf1e117a071f7fcc5a4bf Mon Sep 17 00:00:00 2001 From: Marc Kupietz Date: Tue, 25 Jun 2024 14:32:16 +0200 Subject: [PATCH] 2korapxml: Print input and output filenames in warnings ... as well as line number, to be able to localise errors. Change-Id: I58b401a0432b1c21e273793b08cd76553be63a2d --- script/conllu2korapxml | 4 ++-- t/test.t | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/script/conllu2korapxml b/script/conllu2korapxml index eb381c4..c8e4ec0 100755 --- a/script/conllu2korapxml +++ b/script/conllu2korapxml @@ -138,10 +138,10 @@ foreach my $conllu_file (@conllu_files) { } elsif ( !/^\s*$/ ) { if ( !$docid || scalar @spansTo == 0 || scalar @spansFrom == 0 ) { if ( !$docid ) { - $log->warn("WARNING: No valid input document: text_id (e.g. '# text_id = GOE_AGA.00000') missing"); + $log->warn("WARNING: Invalid input in $conllu_file: text_id (e.g. '# text_id = GOE_AGA.00000') missing in line $. when writing to $outh"); } if ( scalar @spansTo == 0 || scalar @spansFrom == 0 ) { - $log->warn("WARNING: No valid input document: token offsets missing"); + $log->warn("WARNING: Invalid input in $conllu_file: token offsets missing in line $. when writing to $outh"); } # Skip to next potentially valid document diff --git a/t/test.t b/t/test.t index 562b594..aa36e11 100644 --- a/t/test.t +++ b/t/test.t @@ -186,7 +186,6 @@ like($zipcontent, qr/.*NN|NN<\/f>.*/, "conllu2korapxml does not ig script_runs([ 'script/conllu2korapxml', '-l', 'debug', 't/data/goe.ud.conllu' ], {stdout => \$zipcontent}, "Runs conllu2korap with UDPipe and unparsable comments"); script_stderr_like "Foundry:\\s+ud", "Found generator based foundry"; script_stderr_like "Ignored\\s+foundry\\s+name:\\s+base", "Ignore defined foundry"; - $zipfile = "$test_tempdir/goe.ud.zip"; open($fh, ">", $zipfile) or fail("cannot open file $zipfile for writing"); print $fh $zipcontent; @@ -210,8 +209,8 @@ like($zipcontent, qr@GOE/AGA/00000/ud/dependency\.xml@, "conllu2korapxml UDPipe script_runs([ 'script/conllu2korapxml', 't/data/deu-deps.conllu' ], "Runs conllu2korap with UDPipe input"); script_stderr_unlike "fileparse(): need a valid pathname", "Ignore sent_id and newdoc id"; -script_stderr_like "WARNING: No valid input document.*token offsets missing", "Warn on missing token offsets"; -script_stderr_like qr@WARNING: No valid input document.*text.id .*missing@, "Warn on missing text ids"; +script_stderr_like qr@WARNING: Invalid input in.*deu-deps.conllu.*token offsets missing.*in line \d+@, "Warn on missing token offsets"; +script_stderr_like qr@WARNING: Invalid input in.*deu-deps.conllu.*text.id .*missing.*in line \d+@, "Warn on missing text ids"; script_runs([ 'script/korapxml2conllu', "t/data/nkjp.zip" ], "Runs korapxml2conllu on nkjp test data"); script_stderr_unlike("Use of uninitialized value", "Handles lonely docid parameters (line separated from layer elements)");