Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Development: Extract plagiarism detection service #7152

Merged
merged 29 commits into from
Sep 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
6926ebf
extract plagiarism detection service
jakubriegel Sep 7, 2023
4051833
fix similarity for modeling exercises
jakubriegel Sep 7, 2023
fc8c70e
fix code tests
jakubriegel Sep 7, 2023
9dfc891
extract save logic to a method
jakubriegel Sep 7, 2023
9fe3bc8
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 7, 2023
c83eefb
remove unused field
jakubriegel Sep 7, 2023
c23635b
add doc
jakubriegel Sep 7, 2023
cbfa5f2
Merge remote-tracking branch 'origin/develop' into extract-plagiarism…
jakubriegel Sep 7, 2023
c80f975
add missing @param
jakubriegel Sep 7, 2023
c433abc
fix field removal
jakubriegel Sep 8, 2023
ce8c57a
Merge remote-tracking branch 'origin/develop' into extract-plagiarism…
jakubriegel Sep 9, 2023
521dee1
merge develop
jakubriegel Sep 10, 2023
2647011
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 11, 2023
3cf1869
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 13, 2023
650a501
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 14, 2023
b7da306
Merge branch 'develop' into extract-plagiarism-detection-service
MarkusPaulsen Sep 16, 2023
926e4e0
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 18, 2023
017da6c
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 18, 2023
1426eeb
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 20, 2023
9e030b9
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 20, 2023
aeb4ca6
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 22, 2023
1eaf3f1
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 23, 2023
0b2ab59
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 24, 2023
e8b1d0f
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 25, 2023
2eeae47
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 28, 2023
7497456
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 29, 2023
7ffdc1a
Plagiarism checks: Introduce min length parameter for programming exe…
jakubriegel Sep 30, 2023
88082cd
Merge branch 'develop' into extract-plagiarism-detection-service
jakubriegel Sep 30, 2023
c4688a3
convert PlagiarismDetectionConfig to record
jakubriegel Sep 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package de.tum.in.www1.artemis.domain.plagiarism;

/**
* Stores configuration for plagiarism detection.
*/
public record PlagiarismDetectionConfig(float similarityThreshold, int minimumScore, int minimumSize) {
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashSet;
Expand All @@ -19,6 +20,7 @@

import de.tum.in.www1.artemis.domain.DomainObject;
import de.tum.in.www1.artemis.domain.ProgrammingExercise;
import de.tum.in.www1.artemis.domain.VcsRepositoryUrl;
import de.tum.in.www1.artemis.domain.hestia.ProgrammingExerciseGitDiffEntry;
import de.tum.in.www1.artemis.domain.hestia.ProgrammingExerciseGitDiffReport;
import de.tum.in.www1.artemis.domain.participation.SolutionProgrammingExerciseParticipation;
Expand Down Expand Up @@ -158,6 +160,33 @@ public ProgrammingExerciseGitDiffReport getOrCreateReportOfExercise(ProgrammingE
}
}

/**
* Calculates git diff between two repositories and returns the cumulative number of diff lines.
*
* @param urlRepoA url of the first repo to compare
* @param localPathRepoA local path to the checked out instance of the first repo to compare
* @param urlRepoB url of the second repo to compare
* @param localPathRepoB local path to the checked out instance of the second repo to compare
* @return cumulative number of lines in the git diff of given repositories
*/
public int calculateNumberOfDiffLinesBetweenRepos(VcsRepositoryUrl urlRepoA, Path localPathRepoA, VcsRepositoryUrl urlRepoB, Path localPathRepoB) {
var repoA = gitService.getExistingCheckedOutRepositoryByLocalPath(localPathRepoA, urlRepoA);
var repoB = gitService.getExistingCheckedOutRepositoryByLocalPath(localPathRepoB, urlRepoB);

var treeParserRepoA = new FileTreeIterator(repoA);
var treeParserRepoB = new FileTreeIterator(repoB);

try (var diffOutputStream = new ByteArrayOutputStream(); var git = Git.wrap(repoB)) {
git.diff().setOldTree(treeParserRepoB).setNewTree(treeParserRepoA).setOutputStream(diffOutputStream).call();
var diff = diffOutputStream.toString();
return extractDiffEntries(diff, true).stream().mapToInt(ProgrammingExerciseGitDiffEntry::getLineCount).sum();
}
catch (IOException | GitAPIException e) {
log.error("Error calculating number of diff lines between repositories: urlRepoA={}, urlRepoB={}.", urlRepoA, urlRepoB, e);
return Integer.MAX_VALUE;
}
}

/**
* Creates a new ProgrammingExerciseGitDiffReport for an exercise.
* It will take the git-diff between the template and solution repositories and return all changes.
Expand All @@ -183,7 +212,7 @@ private ProgrammingExerciseGitDiffReport generateReport(TemplateProgrammingExerc
try (ByteArrayOutputStream diffOutputStream = new ByteArrayOutputStream(); Git git = Git.wrap(templateRepo)) {
git.diff().setOldTree(oldTreeParser).setNewTree(newTreeParser).setOutputStream(diffOutputStream).call();
var diff = diffOutputStream.toString();
var programmingExerciseGitDiffEntries = extractDiffEntries(diff);
var programmingExerciseGitDiffEntries = extractDiffEntries(diff, false);
var report = new ProgrammingExerciseGitDiffReport();
for (ProgrammingExerciseGitDiffEntry gitDiffEntry : programmingExerciseGitDiffEntries) {
gitDiffEntry.setGitDiffReport(report);
Expand All @@ -199,7 +228,7 @@ private ProgrammingExerciseGitDiffReport generateReport(TemplateProgrammingExerc
* @param diff The raw git-diff output
* @return The extracted ProgrammingExerciseGitDiffEntries
*/
private List<ProgrammingExerciseGitDiffEntry> extractDiffEntries(String diff) {
private List<ProgrammingExerciseGitDiffEntry> extractDiffEntries(String diff, boolean useAbsoluteLineCount) {
var lines = diff.split("\n");
var parserState = new ParserState();

Expand All @@ -216,7 +245,7 @@ private List<ProgrammingExerciseGitDiffEntry> extractDiffEntries(String diff) {
else if (!parserState.deactivateCodeReading) {
switch (line.charAt(0)) {
case '+' -> handleAddition(parserState);
case '-' -> handleRemoval(parserState);
case '-' -> handleRemoval(parserState, useAbsoluteLineCount);
case ' ' -> handleUnchanged(parserState);
default -> parserState.deactivateCodeReading = true;
}
Expand Down Expand Up @@ -262,7 +291,7 @@ private void handleUnchanged(ParserState parserState) {
parserState.currentPreviousLineCount++;
}

private void handleRemoval(ParserState parserState) {
private void handleRemoval(ParserState parserState, boolean useAbsoluteLineCount) {
var entry = parserState.currentEntry;
if (!parserState.lastLineRemoveOperation && !entry.isEmpty()) {
parserState.entries.add(entry);
Expand All @@ -274,7 +303,16 @@ private void handleRemoval(ParserState parserState) {
entry.setPreviousLineCount(0);
entry.setPreviousStartLine(parserState.currentPreviousLineCount);
}
entry.setPreviousLineCount(entry.getPreviousLineCount() + 1);
if (useAbsoluteLineCount) {
if (parserState.currentEntry.getLineCount() == null) {
parserState.currentEntry.setLineCount(0);
parserState.currentEntry.setStartLine(parserState.currentLineCount);
}
parserState.currentEntry.setLineCount(parserState.currentEntry.getLineCount() + 1);
}
else {
entry.setPreviousLineCount(entry.getPreviousLineCount() + 1);
}

parserState.currentEntry = entry;
parserState.lastLineRemoveOperation = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ public ModelingPlagiarismResult checkPlagiarism(List<ModelingSubmission> modelin
final double similarity = model1.similarity(model2);
log.debug("Compare result {} with {}: {}", i, j, similarity);

if (similarity < minimumSimilarity) {
if (similarity * 100 < minimumSimilarity) {
// ignore comparison results with too small similarity
continue;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package de.tum.in.www1.artemis.service.plagiarism;

import java.io.File;
import java.io.IOException;
import java.util.Optional;

import org.jvnet.hk2.annotations.Service;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;

import de.jplag.exceptions.ExitException;
import de.tum.in.www1.artemis.domain.ProgrammingExercise;
import de.tum.in.www1.artemis.domain.TextExercise;
import de.tum.in.www1.artemis.domain.modeling.ModelingExercise;
import de.tum.in.www1.artemis.domain.plagiarism.PlagiarismDetectionConfig;
import de.tum.in.www1.artemis.domain.plagiarism.PlagiarismResult;
import de.tum.in.www1.artemis.domain.plagiarism.modeling.ModelingPlagiarismResult;
import de.tum.in.www1.artemis.domain.plagiarism.text.TextPlagiarismResult;
import de.tum.in.www1.artemis.repository.plagiarism.PlagiarismResultRepository;
import de.tum.in.www1.artemis.service.programming.ProgrammingLanguageFeatureService;

/**
* Service for triggering plagiarism checks.
*/
@Service
@Component
public class PlagiarismDetectionService {

private static final Logger log = LoggerFactory.getLogger(PlagiarismDetectionService.class);

private final TextPlagiarismDetectionService textPlagiarismDetectionService;

private final Optional<ProgrammingLanguageFeatureService> programmingLanguageFeatureService;

private final ProgrammingPlagiarismDetectionService programmingPlagiarismDetectionService;

private final ModelingPlagiarismDetectionService modelingPlagiarismDetectionService;

private final PlagiarismResultRepository plagiarismResultRepository;

public PlagiarismDetectionService(TextPlagiarismDetectionService textPlagiarismDetectionService, Optional<ProgrammingLanguageFeatureService> programmingLanguageFeatureService,
ProgrammingPlagiarismDetectionService programmingPlagiarismDetectionService, ModelingPlagiarismDetectionService modelingPlagiarismDetectionService,
PlagiarismResultRepository plagiarismResultRepository) {
this.textPlagiarismDetectionService = textPlagiarismDetectionService;
this.programmingLanguageFeatureService = programmingLanguageFeatureService;
this.programmingPlagiarismDetectionService = programmingPlagiarismDetectionService;
this.modelingPlagiarismDetectionService = modelingPlagiarismDetectionService;
this.plagiarismResultRepository = plagiarismResultRepository;
}

/**
* Check plagiarism in given text exercise
*
* @param exercise exercise to check plagiarism
* @param config configuration for plagiarism detection
* @return result of plagiarism checks
*/
public TextPlagiarismResult checkTextExercise(TextExercise exercise, PlagiarismDetectionConfig config) throws ExitException {
var plagiarismResult = textPlagiarismDetectionService.checkPlagiarism(exercise, config.similarityThreshold(), config.minimumScore(), config.minimumSize());
log.info("Finished textPlagiarismDetectionService.checkPlagiarism for exercise {} with {} comparisons,", exercise.getId(), plagiarismResult.getComparisons().size());

trimAndSavePlagiarismResult(plagiarismResult);
return plagiarismResult;
}

/**
* Check plagiarism in given programing exercise
*
* @param exercise exercise to check plagiarism
* @param config configuration for plagiarism detection
* @return result of plagiarism checks
*/
public TextPlagiarismResult checkProgrammingExercise(ProgrammingExercise exercise, PlagiarismDetectionConfig config)
throws ExitException, IOException, ProgrammingLanguageNotSupportedForPlagiarismDetectionException {
checkProgrammingLanguageSupport(exercise);

var plagiarismResult = programmingPlagiarismDetectionService.checkPlagiarism(exercise.getId(), config.similarityThreshold(), config.minimumScore(), config.minimumSize());
log.info("Finished programmingExerciseExportService.checkPlagiarism call for {} comparisons", plagiarismResult.getComparisons().size());

plagiarismResultRepository.prepareResultForClient(plagiarismResult);

// make sure that participation is included in the exercise
plagiarismResult.setExercise(exercise);
return plagiarismResult;
}

/**
* Check plagiarism in given programing exercise and outputs a Jplag report
*
* @param exercise exercise to check plagiarism
* @param config configuration for plagiarism detection
* @return Jplag report of plagiarism checks
*/
public File checkProgrammingExerciseWithJplagReport(ProgrammingExercise exercise, PlagiarismDetectionConfig config)
throws ProgrammingLanguageNotSupportedForPlagiarismDetectionException {
checkProgrammingLanguageSupport(exercise);
return programmingPlagiarismDetectionService.checkPlagiarismWithJPlagReport(exercise.getId(), config.similarityThreshold(), config.minimumScore(), config.minimumSize());
}

/**
* Check plagiarism in given modeling exercise
*
* @param exercise exercise to check plagiarism
* @param config configuration for plagiarism detection
* @return result of plagiarism checks
*/
public ModelingPlagiarismResult checkModelingExercise(ModelingExercise exercise, PlagiarismDetectionConfig config) {
var plagiarismResult = modelingPlagiarismDetectionService.checkPlagiarism(exercise, config.similarityThreshold(), config.minimumSize(), config.minimumScore());
log.info("Finished modelingPlagiarismDetectionService.checkPlagiarism call for {} comparisons", plagiarismResult.getComparisons().size());

trimAndSavePlagiarismResult(plagiarismResult);
return plagiarismResult;
}

private void trimAndSavePlagiarismResult(PlagiarismResult<?> plagiarismResult) {
// Limit the amount temporarily because of database issues
plagiarismResult.sortAndLimit(100);
plagiarismResultRepository.savePlagiarismResultAndRemovePrevious(plagiarismResult);

plagiarismResultRepository.prepareResultForClient(plagiarismResult);
}

private void checkProgrammingLanguageSupport(ProgrammingExercise exercise) throws ProgrammingLanguageNotSupportedForPlagiarismDetectionException {
var language = exercise.getProgrammingLanguage();
var programmingLanguageFeature = programmingLanguageFeatureService.orElseThrow().getProgrammingLanguageFeatures(language);
if (!programmingLanguageFeature.plagiarismCheckSupported()) {
throw new ProgrammingLanguageNotSupportedForPlagiarismDetectionException(language);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package de.tum.in.www1.artemis.service.plagiarism;

import de.tum.in.www1.artemis.domain.enumeration.ProgrammingLanguage;

public class ProgrammingLanguageNotSupportedForPlagiarismDetectionException extends Exception {

ProgrammingLanguageNotSupportedForPlagiarismDetectionException(ProgrammingLanguage language) {
super("Artemis does not support plagiarism checks for the programming language " + language);
}
}
Loading