Skip to content

Commit

Permalink
#5333 - Export diff data in JSON format to facilitate external proces…
Browse files Browse the repository at this point in the history
…sing

- Move agreement service to backend module
- Added JSON export format to agreement page
  • Loading branch information
reckart committed Mar 8, 2025
1 parent 1b47cf5 commit d4e851e
Show file tree
Hide file tree
Showing 11 changed files with 242 additions and 109 deletions.
18 changes: 18 additions & 0 deletions inception/inception-agreement/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>

<dependency>
<groupId>org.apache.uima</groupId>
Expand Down Expand Up @@ -102,6 +110,11 @@
<groupId>org.dkpro.statistics</groupId>
<artifactId>dkpro-statistics-agreement</artifactId>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>

<!-- Spring dependencies -->
<dependency>
Expand Down Expand Up @@ -164,6 +177,11 @@
<artifactId>dkpro-core-io-conll-asl</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-api-ner-asl</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-io-xmi-asl</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.ui.agreement.page;
package de.tudarmstadt.ukp.clarin.webanno.agreement;

import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import java.util.Map;
Expand All @@ -27,24 +28,33 @@
import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer;
import de.tudarmstadt.ukp.clarin.webanno.model.Project;
import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument;
import de.tudarmstadt.ukp.clarin.webanno.security.model.User;

public interface AgreementService
{
Map<SourceDocument, List<AnnotationDocument>> getDocumentsToEvaluate(Project aProject,
List<SourceDocument> aDocuments, DefaultAgreementTraits aTraits);

void exportPairwiseDiff(OutputStream aOut, AnnotationFeature aFeature, String aMeasure,
DefaultAgreementTraits aTraits, User aCurrentUser, List<SourceDocument> aDocuments,
String aAnnotator1, String aAnnotator2);
/**
*
* @param aOut
* target stream
* @param aLayer
* the layer to diff.
* @param aFeature
* the feature to diff. If this is null, then the diff is only based on positions.
* @param aTraits
* the diff settings
* @param aDocuments
* the documents to diff
* @param aAnnotators
* the annotators to diff
*/
void exportDiff(OutputStream aOut, AnnotationLayer aLayer, AnnotationFeature aFeature,
DefaultAgreementTraits aTraits, List<SourceDocument> aDocuments,
List<String> aAnnotators);

void exportPairwiseDiff(OutputStream aOut, AnnotationLayer aLayer, String aMeasure,
DefaultAgreementTraits aTraits, User aCurrentUser, List<SourceDocument> aDocuments,
String aAnnotator1, String aAnnotator2);

void exportDiff(OutputStream aOut, AnnotationFeature aFeature, DefaultAgreementTraits aTraits,
User aCurrentUser, List<SourceDocument> aDocuments, List<String> aAnnotators);

void exportDiff(OutputStream aOut, AnnotationLayer aLayer, DefaultAgreementTraits aTraits,
User aCurrentUser, List<SourceDocument> aDocuments, List<String> aAnnotators);
void exportSpanLayerDataAsJson(OutputStream aOut, AnnotationLayer aLayer, AnnotationFeature aFeature,
DefaultAgreementTraits aTraits, List<SourceDocument> aDocuments,
List<String> aAnnotators)
throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.ui.agreement.page;
package de.tudarmstadt.ukp.clarin.webanno.agreement;

import static de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasAccessMode.SHARED_READ_ONLY_ACCESS;
import static de.tudarmstadt.ukp.clarin.webanno.api.casstorage.CasUpgradeMode.AUTO_CAS_UPGRADE;
Expand Down Expand Up @@ -54,10 +54,12 @@
import org.apache.commons.io.output.CloseShieldOutputStream;
import org.apache.uima.cas.CAS;
import org.apache.uima.fit.util.FSUtil;
import org.apache.uima.jcas.tcas.Annotation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.tudarmstadt.ukp.clarin.webanno.agreement.AgreementUtils;
import com.fasterxml.jackson.core.JsonFactory;

import de.tudarmstadt.ukp.clarin.webanno.agreement.measures.DefaultAgreementTraits;
import de.tudarmstadt.ukp.clarin.webanno.agreement.results.coding.FullCodingAgreementResult;
import de.tudarmstadt.ukp.clarin.webanno.api.casstorage.session.CasStorageSession;
Expand All @@ -69,7 +71,7 @@
import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument;
import de.tudarmstadt.ukp.clarin.webanno.model.Tag;
import de.tudarmstadt.ukp.clarin.webanno.security.UserDao;
import de.tudarmstadt.ukp.clarin.webanno.security.model.User;
import de.tudarmstadt.ukp.inception.annotation.layer.span.SpanLayerSupport;
import de.tudarmstadt.ukp.inception.documents.api.DocumentService;
import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService;
import de.tudarmstadt.ukp.inception.support.uima.WebAnnoCasUtil;
Expand Down Expand Up @@ -124,28 +126,74 @@ public Map<SourceDocument, List<AnnotationDocument>> getDocumentsToEvaluate(Proj
}

@Override
public void exportDiff(OutputStream aOut, AnnotationLayer aLayer, DefaultAgreementTraits traits,
User aCurrentUser, List<SourceDocument> aDocuments, List<String> aAnnotators)
public void exportSpanLayerDataAsJson(OutputStream aOut, AnnotationLayer aLayer,
AnnotationFeature aFeature, DefaultAgreementTraits aTraits,
List<SourceDocument> aDocuments, List<String> aAnnotators)
throws IOException
{
exportDiff(aOut, aLayer, null, traits, aCurrentUser, aDocuments, aAnnotators);
}
if (!SpanLayerSupport.TYPE.equals(aLayer.getType())) {
throw new IllegalArgumentException(
"Only span layers supported but got [" + aLayer.getType() + "]");
}

@Override
public void exportDiff(OutputStream aOut, AnnotationFeature aFeature,
DefaultAgreementTraits traits, User aCurrentUser, List<SourceDocument> aDocuments,
List<String> aAnnotators)
{
exportDiff(aOut, aFeature.getLayer(), aFeature, traits, aCurrentUser, aDocuments,
aAnnotators);
var project = aLayer.getProject();

var allAnnDocs = getDocumentsToEvaluate(project, aDocuments, aTraits);
var docs = allAnnDocs.keySet().stream() //
.sorted(comparing(SourceDocument::getName)) //
.toList();

var featureName = aFeature != null ? aFeature.getName() : null;

var jsonFactory = new JsonFactory();

var adapter = schemaService.getAdapter(aLayer);

try (var jg = jsonFactory.createGenerator(CloseShieldOutputStream.wrap(aOut))) {
jg.useDefaultPrettyPrinter();

jg.writeStartArray();

for (var doc : docs) {
var annDocs = allAnnDocs.get(doc);
try (var session = CasStorageSession.openNested()) {
var casMap = loadCasForAnnotators(doc, annDocs, aAnnotators);

for (var mapEntry : casMap.entrySet()) {
var dataOwner = mapEntry.getKey();
var cas = mapEntry.getValue();

for (var ann : cas.<Annotation> select(adapter.getAnnotationTypeName())) {
jg.writeStartObject();
jg.writeStringField("doc", doc.getName());
jg.writeStringField("user", dataOwner);
jg.writeNumberField("begin", ann.getBegin());
jg.writeNumberField("end", ann.getEnd());
if (featureName != null) {
var label = adapter.renderFeatureValue(ann, featureName);
jg.writeStringField("label", label);
}
jg.writeEndObject();
}

jg.flush();
}
}
}

jg.writeEndArray();
jg.flush();
}
}

@Override
public void exportDiff(OutputStream aOut, AnnotationLayer aLayer, AnnotationFeature aFeature,
DefaultAgreementTraits traits, User aCurrentUser, List<SourceDocument> aDocuments,
DefaultAgreementTraits aTraits, List<SourceDocument> aDocuments,
List<String> aAnnotators)
{
var project = aLayer.getProject();
var allAnnDocs = getDocumentsToEvaluate(project, aDocuments, traits);

var allAnnDocs = getDocumentsToEvaluate(project, aDocuments, aTraits);
var docs = allAnnDocs.keySet().stream() //
.sorted(comparing(SourceDocument::getName)) //
.toList();
Expand All @@ -158,21 +206,18 @@ public void exportDiff(OutputStream aOut, AnnotationLayer aLayer, AnnotationFeat
.collect(toCollection(LinkedHashSet::new))
: emptySet();

var featureName = aFeature != null ? aFeature.getName() : null;

var countWritten = 0;
for (var doc : docs) {
var annDocs = allAnnDocs.get(doc);
try (var session = CasStorageSession.openNested()) {
var casMap = new LinkedHashMap<String, CAS>();
for (var annotator : aAnnotators) {
var maybeCas = loadCas(doc, annotator, allAnnDocs);
var cas = maybeCas.isPresent() ? maybeCas.get() : loadInitialCas(doc);
casMap.put(annotator, cas);
}
var casMap = loadCasForAnnotators(doc, annDocs, aAnnotators);

var diff = doDiff(adapters, casMap);

var featureName = aFeature != null ? aFeature.getName() : null;
var result = AgreementUtils.makeCodingStudy(diff, aLayer.getName(), featureName,
tagset, traits.isExcludeIncomplete(), casMap);
tagset, aTraits.isExcludeIncomplete(), casMap);

try (var printer = new CSVPrinter(
new OutputStreamWriter(CloseShieldOutputStream.wrap(aOut), UTF_8),
Expand All @@ -190,26 +235,23 @@ public void exportDiff(OutputStream aOut, AnnotationLayer aLayer, AnnotationFeat
}
}

@Override
public void exportPairwiseDiff(OutputStream aOut, AnnotationLayer aLayer, String aMeasure,
DefaultAgreementTraits aTraits, User aCurrentUser, List<SourceDocument> aDocuments,
String aAnnotator1, String aAnnotator2)
private LinkedHashMap<String, CAS> loadCasForAnnotators(SourceDocument aDocument,
List<AnnotationDocument> aAnnDocs, List<String> aAnnotators)
throws IOException
{
exportDiff(aOut, aLayer, null, aTraits, aCurrentUser, aDocuments,
asList(aAnnotator1, aAnnotator2));
}
var casMap = new LinkedHashMap<String, CAS>();

@Override
public void exportPairwiseDiff(OutputStream aOut, AnnotationFeature aFeature, String aMeasure,
DefaultAgreementTraits aTraits, User aCurrentUser, List<SourceDocument> aDocuments,
String aAnnotator1, String aAnnotator2)
{
exportDiff(aOut, aFeature.getLayer(), aFeature, aTraits, aCurrentUser, aDocuments,
asList(aAnnotator1, aAnnotator2));
for (var annotator : aAnnotators) {
var maybeCas = loadCas(aDocument, annotator, aAnnDocs);
var cas = maybeCas.isPresent() ? maybeCas.get() : loadInitialCas(aDocument);
casMap.put(annotator, cas);
}

return casMap;
}

private Optional<CAS> loadCas(SourceDocument aDocument, String aDataOwner,
Map<SourceDocument, List<AnnotationDocument>> aAllAnnDocs)
List<AnnotationDocument> aAnnDocs)
throws IOException
{
if (CURATION_USER.equals(aDataOwner)) {
Expand All @@ -220,9 +262,7 @@ private Optional<CAS> loadCas(SourceDocument aDocument, String aDataOwner,
return loadCas(aDocument, aDataOwner);
}

var annDocs = aAllAnnDocs.get(aDocument);

if (annDocs.stream().noneMatch(annDoc -> aDataOwner.equals(annDoc.getUser()))) {
if (aAnnDocs.stream().noneMatch(annDoc -> aDataOwner.equals(annDoc.getUser()))) {
return Optional.empty();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,25 @@

import org.springframework.context.annotation.Bean;

import de.tudarmstadt.ukp.clarin.webanno.agreement.AgreementService;
import de.tudarmstadt.ukp.clarin.webanno.agreement.AgreementServiceImpl;
import de.tudarmstadt.ukp.clarin.webanno.agreement.measures.cohenkappa.CohenKappaAgreementMeasureSupport;
import de.tudarmstadt.ukp.clarin.webanno.agreement.measures.fleisskappa.FleissKappaAgreementMeasureSupport;
import de.tudarmstadt.ukp.clarin.webanno.agreement.measures.krippendorffalpha.KrippendorffAlphaAgreementMeasureSupport;
import de.tudarmstadt.ukp.clarin.webanno.agreement.measures.krippendorffalphaunitizing.KrippendorffAlphaUnitizingAgreementMeasureSupport;
import de.tudarmstadt.ukp.clarin.webanno.security.UserDao;
import de.tudarmstadt.ukp.inception.documents.api.DocumentService;
import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService;

public class AgreementAutoConfiguration
{
@Bean
public AgreementService agreementService(DocumentService aDocumentService,
AnnotationSchemaService aSchemaService, UserDao aUserService)
{
return new AgreementServiceImpl(aDocumentService, aSchemaService, aUserService);
}

@Bean
public CohenKappaAgreementMeasureSupport cohenKappaAgreementMeasureSupport(
AnnotationSchemaService aAnnotationService)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,19 @@ public void execute()

try (var session = CasStorageSession.openNested()) {
for (int m = 0; m < annotators.size(); m++) {
if (getMonitor().isCancelled()) {
break;
}

var annotator1 = annotators.get(m);
var maybeCas1 = LazyInitializer.<Optional<CAS>> builder()
.setInitializer(() -> loadCas(doc, annotator1, allAnnDocs)).get();

for (int n = 0; n < annotators.size(); n++) {
if (getMonitor().isCancelled()) {
break;
}

if (!(n < m)) {
// Triangle matrix mirrored
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.ui.agreement.page;
package de.tudarmstadt.ukp.clarin.webanno.agreement;

import static de.tudarmstadt.ukp.clarin.webanno.agreement.AgreementUtils.makeCodingStudy;
import static de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff.doDiff;
Expand All @@ -35,6 +35,7 @@
import org.apache.uima.fit.factory.CasFactory;
import org.junit.jupiter.api.Test;

import de.tudarmstadt.ukp.clarin.webanno.agreement.AgreementServiceImpl;
import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
import de.tudarmstadt.ukp.inception.support.uima.AnnotationBuilder;

Expand Down
4 changes: 0 additions & 4 deletions inception/inception-ui-agreement/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,9 @@
*/
package de.tudarmstadt.ukp.inception.ui.agreement.config;

import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import de.tudarmstadt.ukp.clarin.webanno.security.UserDao;
import de.tudarmstadt.ukp.inception.documents.api.DocumentService;
import de.tudarmstadt.ukp.inception.schema.api.AnnotationSchemaService;
import de.tudarmstadt.ukp.inception.ui.agreement.page.AgreementService;
import de.tudarmstadt.ukp.inception.ui.agreement.page.AgreementServiceImpl;

@Configuration
public class AgreementUiAutoConfiguration
{
@Bean
public AgreementService agreementService(DocumentService aDocumentService,
AnnotationSchemaService aSchemaService, UserDao aUserService)
{
return new AgreementServiceImpl(aDocumentService, aSchemaService, aUserService);
}
}
Loading

0 comments on commit d4e851e

Please sign in to comment.