From d4e851e237e86dee07ab0e21eb5a91d900330f0e Mon Sep 17 00:00:00 2001 From: Richard Eckart de Castilho Date: Sat, 8 Mar 2025 14:17:29 +0100 Subject: [PATCH] #5333 - Export diff data in JSON format to facilitate external processing - Move agreement service to backend module - Added JSON export format to agreement page --- inception/inception-agreement/pom.xml | 18 +++ .../webanno/agreement}/AgreementService.java | 38 ++++-- .../agreement}/AgreementServiceImpl.java | 124 ++++++++++++------ .../config/AgreementAutoConfiguration.java | 11 ++ .../task/CalculatePairwiseAgreementTask.java | 8 ++ .../agreement}/AgreementServiceImplTest.java | 3 +- inception/inception-ui-agreement/pom.xml | 4 - .../config/AgreementUiAutoConfiguration.java | 13 -- .../ui/agreement/page/AgreementPage.html | 20 ++- .../ui/agreement/page/AgreementPage.java | 108 ++++++++++----- .../agreement/page/AgreementPage.properties | 4 +- 11 files changed, 242 insertions(+), 109 deletions(-) rename inception/{inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/page => inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement}/AgreementService.java (60%) rename inception/{inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/page => inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement}/AgreementServiceImpl.java (76%) rename inception/{inception-ui-agreement/src/test/java/de/tudarmstadt/ukp/inception/ui/agreement/page => inception-agreement/src/test/java/de/tudarmstadt/ukp/clarin/webanno/agreement}/AgreementServiceImplTest.java (98%) diff --git a/inception/inception-agreement/pom.xml b/inception/inception-agreement/pom.xml index 5bdf9d55336..edb8dcfddd6 100644 --- a/inception/inception-agreement/pom.xml +++ b/inception/inception-agreement/pom.xml @@ -39,6 +39,14 @@ org.apache.commons commons-math3 + + org.apache.commons + commons-csv + + + commons-io + commons-io + org.apache.uima @@ -102,6 +110,11 @@ org.dkpro.statistics dkpro-statistics-agreement + + + com.fasterxml.jackson.core + jackson-core + @@ -164,6 +177,11 @@ dkpro-core-io-conll-asl test + + org.dkpro.core + dkpro-core-api-ner-asl + test + org.dkpro.core dkpro-core-io-xmi-asl diff --git a/inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementService.java b/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/AgreementService.java similarity index 60% rename from inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementService.java rename to inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/AgreementService.java index 8535d6d8c6d..0a2b65f65e0 100644 --- a/inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementService.java +++ b/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/AgreementService.java @@ -15,8 +15,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.inception.ui.agreement.page; +package de.tudarmstadt.ukp.clarin.webanno.agreement; +import java.io.IOException; import java.io.OutputStream; import java.util.List; import java.util.Map; @@ -27,24 +28,33 @@ import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer; import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument; -import de.tudarmstadt.ukp.clarin.webanno.security.model.User; public interface AgreementService { Map> getDocumentsToEvaluate(Project aProject, List aDocuments, DefaultAgreementTraits aTraits); - void exportPairwiseDiff(OutputStream aOut, AnnotationFeature aFeature, String aMeasure, - DefaultAgreementTraits aTraits, User aCurrentUser, List aDocuments, - String aAnnotator1, String aAnnotator2); + /** + * + * @param aOut + * target stream + * @param aLayer + * the layer to diff. + * @param aFeature + * the feature to diff. If this is null, then the diff is only based on positions. + * @param aTraits + * the diff settings + * @param aDocuments + * the documents to diff + * @param aAnnotators + * the annotators to diff + */ + void exportDiff(OutputStream aOut, AnnotationLayer aLayer, AnnotationFeature aFeature, + DefaultAgreementTraits aTraits, List aDocuments, + List aAnnotators); - void exportPairwiseDiff(OutputStream aOut, AnnotationLayer aLayer, String aMeasure, - DefaultAgreementTraits aTraits, User aCurrentUser, List aDocuments, - String aAnnotator1, String aAnnotator2); - - void exportDiff(OutputStream aOut, AnnotationFeature aFeature, DefaultAgreementTraits aTraits, - User aCurrentUser, List aDocuments, List aAnnotators); - - void exportDiff(OutputStream aOut, AnnotationLayer aLayer, DefaultAgreementTraits aTraits, - User aCurrentUser, List aDocuments, List aAnnotators); + void exportSpanLayerDataAsJson(OutputStream aOut, AnnotationLayer aLayer, AnnotationFeature aFeature, + DefaultAgreementTraits aTraits, List aDocuments, + List aAnnotators) + throws IOException; } diff --git a/inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementServiceImpl.java b/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/AgreementServiceImpl.java similarity index 76% rename from inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementServiceImpl.java rename to inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/AgreementServiceImpl.java index 4dc8cc740e3..3db2fd3c476 100644 --- a/inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementServiceImpl.java +++ b/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/AgreementServiceImpl.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.inception.ui.agreement.page; +package de.tudarmstadt.ukp.clarin.webanno.agreement; import static de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasAccessMode.SHARED_READ_ONLY_ACCESS; import static de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasUpgradeMode.AUTO_CAS_UPGRADE; @@ -54,10 +54,12 @@ import org.apache.commons.io.output.CloseShieldOutputStream; import org.apache.uima.cas.CAS; import org.apache.uima.fit.util.FSUtil; +import org.apache.uima.jcas.tcas.Annotation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import de.tudarmstadt.ukp.clarin.webanno.agreement.AgreementUtils; +import com.fasterxml.jackson.core.JsonFactory; + import de.tudarmstadt.ukp.clarin.webanno.agreement.measures.DefaultAgreementTraits; import de.tudarmstadt.ukp.clarin.webanno.agreement.results.coding.FullCodingAgreementResult; import de.tudarmstadt.ukp.clarin.webanno.api.casstorage.session.CasStorageSession; @@ -69,7 +71,7 @@ import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument; import de.tudarmstadt.ukp.clarin.webanno.model.Tag; import de.tudarmstadt.ukp.clarin.webanno.security.UserDao; -import de.tudarmstadt.ukp.clarin.webanno.security.model.User; +import de.tudarmstadt.ukp.inception.annotation.layer.span.SpanLayerSupport; import de.tudarmstadt.ukp.inception.documents.api.DocumentService; import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService; import de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil; @@ -124,28 +126,74 @@ public Map> getDocumentsToEvaluate(Proj } @Override - public void exportDiff(OutputStream aOut, AnnotationLayer aLayer, DefaultAgreementTraits traits, - User aCurrentUser, List aDocuments, List aAnnotators) + public void exportSpanLayerDataAsJson(OutputStream aOut, AnnotationLayer aLayer, + AnnotationFeature aFeature, DefaultAgreementTraits aTraits, + List aDocuments, List aAnnotators) + throws IOException { - exportDiff(aOut, aLayer, null, traits, aCurrentUser, aDocuments, aAnnotators); - } + if (!SpanLayerSupport.TYPE.equals(aLayer.getType())) { + throw new IllegalArgumentException( + "Only span layers supported but got [" + aLayer.getType() + "]"); + } - @Override - public void exportDiff(OutputStream aOut, AnnotationFeature aFeature, - DefaultAgreementTraits traits, User aCurrentUser, List aDocuments, - List aAnnotators) - { - exportDiff(aOut, aFeature.getLayer(), aFeature, traits, aCurrentUser, aDocuments, - aAnnotators); + var project = aLayer.getProject(); + + var allAnnDocs = getDocumentsToEvaluate(project, aDocuments, aTraits); + var docs = allAnnDocs.keySet().stream() // + .sorted(comparing(SourceDocument::getName)) // + .toList(); + + var featureName = aFeature != null ? aFeature.getName() : null; + + var jsonFactory = new JsonFactory(); + + var adapter = schemaService.getAdapter(aLayer); + + try (var jg = jsonFactory.createGenerator(CloseShieldOutputStream.wrap(aOut))) { + jg.useDefaultPrettyPrinter(); + + jg.writeStartArray(); + + for (var doc : docs) { + var annDocs = allAnnDocs.get(doc); + try (var session = CasStorageSession.openNested()) { + var casMap = loadCasForAnnotators(doc, annDocs, aAnnotators); + + for (var mapEntry : casMap.entrySet()) { + var dataOwner = mapEntry.getKey(); + var cas = mapEntry.getValue(); + + for (var ann : cas. select(adapter.getAnnotationTypeName())) { + jg.writeStartObject(); + jg.writeStringField("doc", doc.getName()); + jg.writeStringField("user", dataOwner); + jg.writeNumberField("begin", ann.getBegin()); + jg.writeNumberField("end", ann.getEnd()); + if (featureName != null) { + var label = adapter.renderFeatureValue(ann, featureName); + jg.writeStringField("label", label); + } + jg.writeEndObject(); + } + + jg.flush(); + } + } + } + + jg.writeEndArray(); + jg.flush(); + } } + @Override public void exportDiff(OutputStream aOut, AnnotationLayer aLayer, AnnotationFeature aFeature, - DefaultAgreementTraits traits, User aCurrentUser, List aDocuments, + DefaultAgreementTraits aTraits, List aDocuments, List aAnnotators) { var project = aLayer.getProject(); - var allAnnDocs = getDocumentsToEvaluate(project, aDocuments, traits); + var allAnnDocs = getDocumentsToEvaluate(project, aDocuments, aTraits); var docs = allAnnDocs.keySet().stream() // .sorted(comparing(SourceDocument::getName)) // .toList(); @@ -158,21 +206,18 @@ public void exportDiff(OutputStream aOut, AnnotationLayer aLayer, AnnotationFeat .collect(toCollection(LinkedHashSet::new)) : emptySet(); + var featureName = aFeature != null ? aFeature.getName() : null; + var countWritten = 0; for (var doc : docs) { + var annDocs = allAnnDocs.get(doc); try (var session = CasStorageSession.openNested()) { - var casMap = new LinkedHashMap(); - for (var annotator : aAnnotators) { - var maybeCas = loadCas(doc, annotator, allAnnDocs); - var cas = maybeCas.isPresent() ? maybeCas.get() : loadInitialCas(doc); - casMap.put(annotator, cas); - } + var casMap = loadCasForAnnotators(doc, annDocs, aAnnotators); var diff = doDiff(adapters, casMap); - var featureName = aFeature != null ? aFeature.getName() : null; var result = AgreementUtils.makeCodingStudy(diff, aLayer.getName(), featureName, - tagset, traits.isExcludeIncomplete(), casMap); + tagset, aTraits.isExcludeIncomplete(), casMap); try (var printer = new CSVPrinter( new OutputStreamWriter(CloseShieldOutputStream.wrap(aOut), UTF_8), @@ -190,26 +235,23 @@ public void exportDiff(OutputStream aOut, AnnotationLayer aLayer, AnnotationFeat } } - @Override - public void exportPairwiseDiff(OutputStream aOut, AnnotationLayer aLayer, String aMeasure, - DefaultAgreementTraits aTraits, User aCurrentUser, List aDocuments, - String aAnnotator1, String aAnnotator2) + private LinkedHashMap loadCasForAnnotators(SourceDocument aDocument, + List aAnnDocs, List aAnnotators) + throws IOException { - exportDiff(aOut, aLayer, null, aTraits, aCurrentUser, aDocuments, - asList(aAnnotator1, aAnnotator2)); - } + var casMap = new LinkedHashMap(); - @Override - public void exportPairwiseDiff(OutputStream aOut, AnnotationFeature aFeature, String aMeasure, - DefaultAgreementTraits aTraits, User aCurrentUser, List aDocuments, - String aAnnotator1, String aAnnotator2) - { - exportDiff(aOut, aFeature.getLayer(), aFeature, aTraits, aCurrentUser, aDocuments, - asList(aAnnotator1, aAnnotator2)); + for (var annotator : aAnnotators) { + var maybeCas = loadCas(aDocument, annotator, aAnnDocs); + var cas = maybeCas.isPresent() ? maybeCas.get() : loadInitialCas(aDocument); + casMap.put(annotator, cas); + } + + return casMap; } private Optional loadCas(SourceDocument aDocument, String aDataOwner, - Map> aAllAnnDocs) + List aAnnDocs) throws IOException { if (CURATION_USER.equals(aDataOwner)) { @@ -220,9 +262,7 @@ private Optional loadCas(SourceDocument aDocument, String aDataOwner, return loadCas(aDocument, aDataOwner); } - var annDocs = aAllAnnDocs.get(aDocument); - - if (annDocs.stream().noneMatch(annDoc -> aDataOwner.equals(annDoc.getUser()))) { + if (aAnnDocs.stream().noneMatch(annDoc -> aDataOwner.equals(annDoc.getUser()))) { return Optional.empty(); } diff --git a/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/config/AgreementAutoConfiguration.java b/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/config/AgreementAutoConfiguration.java index e7dd4a2b6dc..d889cbc7bea 100644 --- a/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/config/AgreementAutoConfiguration.java +++ b/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/config/AgreementAutoConfiguration.java @@ -19,14 +19,25 @@ import org.springframework.context.annotation.Bean; +import de.tudarmstadt.ukp.clarin.webanno.agreement.AgreementService; +import de.tudarmstadt.ukp.clarin.webanno.agreement.AgreementServiceImpl; import de.tudarmstadt.ukp.clarin.webanno.agreement.measures.cohenkappa.CohenKappaAgreementMeasureSupport; import de.tudarmstadt.ukp.clarin.webanno.agreement.measures.fleisskappa.FleissKappaAgreementMeasureSupport; import de.tudarmstadt.ukp.clarin.webanno.agreement.measures.krippendorffalpha.KrippendorffAlphaAgreementMeasureSupport; import de.tudarmstadt.ukp.clarin.webanno.agreement.measures.krippendorffalphaunitizing.KrippendorffAlphaUnitizingAgreementMeasureSupport; +import de.tudarmstadt.ukp.clarin.webanno.security.UserDao; +import de.tudarmstadt.ukp.inception.documents.api.DocumentService; import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService; public class AgreementAutoConfiguration { + @Bean + public AgreementService agreementService(DocumentService aDocumentService, + AnnotationSchemaService aSchemaService, UserDao aUserService) + { + return new AgreementServiceImpl(aDocumentService, aSchemaService, aUserService); + } + @Bean public CohenKappaAgreementMeasureSupport cohenKappaAgreementMeasureSupport( AnnotationSchemaService aAnnotationService) diff --git a/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/task/CalculatePairwiseAgreementTask.java b/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/task/CalculatePairwiseAgreementTask.java index c01473abe03..65ae50ef19b 100644 --- a/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/task/CalculatePairwiseAgreementTask.java +++ b/inception/inception-agreement/src/main/java/de/tudarmstadt/ukp/clarin/webanno/agreement/task/CalculatePairwiseAgreementTask.java @@ -100,11 +100,19 @@ public void execute() try (var session = CasStorageSession.openNested()) { for (int m = 0; m < annotators.size(); m++) { + if (getMonitor().isCancelled()) { + break; + } + var annotator1 = annotators.get(m); var maybeCas1 = LazyInitializer.> builder() .setInitializer(() -> loadCas(doc, annotator1, allAnnDocs)).get(); for (int n = 0; n < annotators.size(); n++) { + if (getMonitor().isCancelled()) { + break; + } + if (!(n < m)) { // Triangle matrix mirrored continue; diff --git a/inception/inception-ui-agreement/src/test/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementServiceImplTest.java b/inception/inception-agreement/src/test/java/de/tudarmstadt/ukp/clarin/webanno/agreement/AgreementServiceImplTest.java similarity index 98% rename from inception/inception-ui-agreement/src/test/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementServiceImplTest.java rename to inception/inception-agreement/src/test/java/de/tudarmstadt/ukp/clarin/webanno/agreement/AgreementServiceImplTest.java index f502dd8f377..45273bb7ffa 100644 --- a/inception/inception-ui-agreement/src/test/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementServiceImplTest.java +++ b/inception/inception-agreement/src/test/java/de/tudarmstadt/ukp/clarin/webanno/agreement/AgreementServiceImplTest.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package de.tudarmstadt.ukp.inception.ui.agreement.page; +package de.tudarmstadt.ukp.clarin.webanno.agreement; import static de.tudarmstadt.ukp.clarin.webanno.agreement.AgreementUtils.makeCodingStudy; import static de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.doDiff; @@ -35,6 +35,7 @@ import org.apache.uima.fit.factory.CasFactory; import org.junit.jupiter.api.Test; +import de.tudarmstadt.ukp.clarin.webanno.agreement.AgreementServiceImpl; import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity; import de.tudarmstadt.ukp.inception.support.uima.AnnotationBuilder; diff --git a/inception/inception-ui-agreement/pom.xml b/inception/inception-ui-agreement/pom.xml index 7540812e0db..e3de8a49bad 100644 --- a/inception/inception-ui-agreement/pom.xml +++ b/inception/inception-ui-agreement/pom.xml @@ -34,10 +34,6 @@ org.apache.commons commons-lang3 - - org.apache.commons - commons-csv - commons-io commons-io diff --git a/inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/config/AgreementUiAutoConfiguration.java b/inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/config/AgreementUiAutoConfiguration.java index 65cf1c2398d..49831136062 100644 --- a/inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/config/AgreementUiAutoConfiguration.java +++ b/inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/config/AgreementUiAutoConfiguration.java @@ -17,22 +17,9 @@ */ package de.tudarmstadt.ukp.inception.ui.agreement.config; -import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import de.tudarmstadt.ukp.clarin.webanno.security.UserDao; -import de.tudarmstadt.ukp.inception.documents.api.DocumentService; -import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService; -import de.tudarmstadt.ukp.inception.ui.agreement.page.AgreementService; -import de.tudarmstadt.ukp.inception.ui.agreement.page.AgreementServiceImpl; - @Configuration public class AgreementUiAutoConfiguration { - @Bean - public AgreementService agreementService(DocumentService aDocumentService, - AnnotationSchemaService aSchemaService, UserDao aUserService) - { - return new AgreementServiceImpl(aDocumentService, aSchemaService, aUserService); - } } diff --git a/inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementPage.html b/inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementPage.html index 677bcd4e83b..c08eead0104 100644 --- a/inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementPage.html +++ b/inception/inception-ui-agreement/src/main/java/de/tudarmstadt/ukp/inception/ui/agreement/page/AgreementPage.html @@ -63,9 +63,23 @@