diff --git a/CHANGELOG.md b/CHANGELOG.md index bb1b831..59d4968 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ # Changelog +## [0.4] - March 10, 2020 + +### Changed [⚠️ Breaking Changes] +- `getShareScores` & `getInsights` have been decoupled from the check class, they now have to imported separately. +- Minor changes in the `analyze.py` module. + + ## [0.3] - Jan 1, 2020 ### Added diff --git a/demo.py b/demo.py index 05d0a91..15851cb 100644 --- a/demo.py +++ b/demo.py @@ -1,26 +1,30 @@ """Usage example""" import os import pprint -from plagcheck import plagcheck +from plagcheck.plagcheck import check, insights, share_scores from dotenv import load_dotenv load_dotenv() -language = "python" +language = "java" userid = os.environ["USER_ID"] -moss = plagcheck.check(language, userid) +moss = check(language, userid) -moss.addFilesByWildCard("testfiles/test_python*.py") +moss.addFilesByWildCard("testfiles/test_java*.java") # or moss.addFile("testfiles/test_python.py") moss.submit() print(moss.getHomePage()) -pprint.pprint(moss.getResults()) -# print frequency of each shared solution -pprint.pprint(moss.getShareScores()) + +result = moss.getResults() + +pprint.pprint(result) + # print potential distributor-culprit relationships -pprint.pprint(moss.getInsights()) +pprint.pprint(insights(result)) +# print frequency of each shared solution +pprint.pprint(share_scores(result)) diff --git a/docs/changelog.md b/docs/changelog.md index 3ebae28..5b56bd5 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,6 +1,13 @@ # Changelog +## [0.4] - March 10, 2020 + +### Changed [⚠️ Breaking Changes] +- `getShareScores` & `getInsights` have been decoupled from the check class, they now have to imported separately. +- Minor changes in the `analyze.py` module. + + ## [0.3] - Jan 1, 2020 ### Added diff --git a/docs/insights.md b/docs/insights.md new file mode 100644 index 0000000..47bba54 --- /dev/null +++ b/docs/insights.md @@ -0,0 +1,51 @@ +# Insights + +PlagCheck provides algorithmic analysis of Moss results. + +### Terminologies + +### 1. Node +Nodes are results returned by Moss i.e every +individual file. + +### 2. Tags +Tags are roles which a file serves i.e. a tag is +a potential distributor or potential culprit or +both. + +### 3. M-group +m-groups (moss-groups) are groups of solution which have similar code. +For example A student who solves a programming problem may share their +solution with 3 of his/her friends, that is a single m-group with 4 nodes. + +For example if you run [demo.py](https://github.com/codeclassroom/PlagCheck/blob/master/demo.py), `insights()` will return the following data: +```java + +{'DCtoC Paths': [('testfiles/test_java5.java', 'testfiles/test_java2.java'), + ('testfiles/test_java4.java', 'testfiles/test_java2.java')], + 'DtoC Paths': [('testfiles/test_java3.java', 'testfiles/test_java2.java'), + ('testfiles/test_java3.java', 'testfiles/test_java.java'), + ('testfiles/test_java7.java', 'testfiles/test_java6.java')], + 'DtoDC Paths': [('testfiles/test_java3.java', 'testfiles/test_java5.java'), + ('testfiles/test_java3.java', 'testfiles/test_java4.java')]} + +``` + +This analysis can be visualized into following _Disconnected Directed Graph_ + +![moss results](https://drive.google.com/uc?export=view&id=1Lc8obgjihfo7EGimn300mTtqfmHK0Zem) + +We assign Tags to every individual Node. + +1. D - Distributor +Student(s) who distributed their +code in a group. +2. C - Culprit +Student(s) who copied the shared +code. +3. DC - Both a Distributor & Culprit + +In the above depicted graph, there are 2 unique _m-groups_. + +1. Group 1 : [1, 2, 3, 4, 5] +2. Group 2 : [7, 6] \ No newline at end of file diff --git a/docs/installation.md b/docs/installation.md index 8d4f9f1..82c173d 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -2,16 +2,24 @@ Installing plagcheck is pretty simple, just run -`pip install plagcheck` +```bash +pip install plagcheck +``` Install a specific verison -`pip install plagcheck==0.2` +```bash +pip install plagcheck==0.4 +``` or directly from GitHub if you cannot wait to test new features -`pip install git+https://github.com/codeclassroom/PlagCheck.git` +```bash +pip install git+https://github.com/codeclassroom/PlagCheck.git +``` -If you have already installed it and want to update +If you have a old version, update it using -`pip install --upgrade plagcheck` \ No newline at end of file +```bash +pip install --upgrade plagcheck +``` \ No newline at end of file diff --git a/docs/usage.md b/docs/usage.md index 37d864c..5fbf539 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,6 +1,6 @@ # Usage -plagcheck provides the following classes: +plagcheck provides the following classes & methods: ### check(files, lang, user_id) @@ -16,29 +16,33 @@ plagcheck provides the following classes: """Usage example""" import os import pprint -from plagcheck import plagcheck +from plagcheck.plagcheck import check, insights, share_scores from dotenv import load_dotenv load_dotenv() -language = "python" +language = "java" userid = os.environ["USER_ID"] -moss = plagcheck.check(language, userid) +moss = check(language, userid) -moss.addFilesByWildCard("testfiles/test_python*.py") +moss.addFilesByWildCard("testfiles/test_java*.java") # or moss.addFile("testfiles/test_python.py") moss.submit() print(moss.getHomePage()) -pprint.pprint(moss.getResults()) -# print frequency of each shared solution -pprint.pprint(moss.getShareScores()) + +result = moss.getResults() + +pprint.pprint(result) + # print potential distributor-culprit relationships -pprint.pprint(moss.getInsights()) +pprint.pprint(insights(result)) +# print frequency of each shared solution +pprint.pprint(share_scores(result)) ``` @@ -72,18 +76,6 @@ c.getHomePage() ```python c.getResults() -""" -[ - { - "file1":"filename1.py", - "file2":"filename2.py", - "percentage": 34, - "no_of_lines_matched": 3, - "lines_matched":[["2-3", "10-11"]] - }, -.... -] -""" ``` @@ -162,14 +154,16 @@ program code that also appears in the base file is not counted in matches. code for an assignment. Multiple Base files are allowed. - You should use a base file if it is convenient; base files improve results, but are not usually necessary for obtaining useful information. -### 7. getShareScores() -**Parameters** : `None`
+
+ +### share_scores() +**Parameters** : `Moss Results`(returned by `getResults()`)
**Return Type** : `Dict`
**Description**: Share Score is a utility which returns frequency of every individual file.
**Demo**: ```python -c.getShareScores() +print(share_scores(moss_data)) # Will return """ @@ -179,4 +173,15 @@ c.getShareScores() """ ``` Share Score is basically the frequency of each file appearing in Moss Results. -i.e Higher the frequency, the more is that solution "shared" by different files. \ No newline at end of file +i.e Higher the frequency, the more is that solution "shared" by different files. + +### insights() +**Parameters** : `Moss Results`(returned by `getResults()`)
+**Return Type** : `Dict`
+**Description**: See [Insights](/insights).
+**Demo**: +```python + +print(insights(moss_data)) + +``` \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index bf53446..3c658a7 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -8,6 +8,7 @@ nav: - Documentation: index.md - Installation: installation.md - Usage: usage.md + - PlagCheck Insights: insights.md - Moss: moss.md - Changelog: changelog.md - About: about.md diff --git a/plagcheck/__init__.py b/plagcheck/__init__.py index 2a6b821..eba4447 100644 --- a/plagcheck/__init__.py +++ b/plagcheck/__init__.py @@ -1,2 +1,2 @@ """The MOSS interface package for CodeClassroom""" -from plagcheck.plagcheck import check +from plagcheck.plagcheck import check, insights, share_scores diff --git a/plagcheck/analyze.py b/plagcheck/analyze.py index 7bbb368..acd9cef 100644 --- a/plagcheck/analyze.py +++ b/plagcheck/analyze.py @@ -28,7 +28,7 @@ def __init__(self): self.nodes = [] self.nodeCount = 0 - def relatesTo(self, P1, P2, node1, node2): + def relate(self, P1, P2, node1, node2): """Set a path between two file nodes""" node_obj_dict = {} diff --git a/plagcheck/plagcheck.py b/plagcheck/plagcheck.py index 1b7b485..7298f9c 100644 --- a/plagcheck/plagcheck.py +++ b/plagcheck/plagcheck.py @@ -43,6 +43,45 @@ def request(url: str): return req.decode("utf-8") +def share_scores(moss_data: dict) -> dict: + """Share Score Insights""" + similar_code_files = [] + for result in moss_data: + similar_code_files.append(result["file1"]) + similar_code_files.append(result["file2"]) + + # frequency of files which are similar + share_score = collections.Counter(similar_code_files) + + return dict(share_score) + + +def insights(moss_data: dict) -> dict: + """Analysis for Moss""" + mg = Mgroups() + similar_code_files = set() + insights = {} + + for r in moss_data: + similar_code_files.add(r["file1"]) + similar_code_files.add(r["file2"]) + + mg.createNodes(similar_code_files) + + for r in moss_data: + mg.relate( + r["percentage_file1"], r["percentage_file2"], r["file1"], r["file2"] + ) + + mg.set_tags() + + insights["DtoC Paths"] = mg.d2c() + insights["DtoDC Paths"] = mg.d2dc() + insights["DCtoC Paths"] = mg.dc2c() + + return insights + + class check: """ Args: @@ -133,40 +172,3 @@ def getResults(self) -> Tuple[str, Results]: """Return the result as a list of dictionary""" return self.moss_results - - def getShareScores(self): - """Share Score Insights""" - similar_code_files = [] - for result in self.moss_results: - similar_code_files.append(result["file1"]) - similar_code_files.append(result["file2"]) - - # frequency of files which are similar - share_score = collections.Counter(similar_code_files) - - return dict(share_score) - - def getInsights(self): - """Analysis for Moss""" - mg = Mgroups() - similar_code_files = set() - insights = {} - - for r in self.moss_results: - similar_code_files.add(r["file1"]) - similar_code_files.add(r["file2"]) - - mg.createNodes(similar_code_files) - - for r in self.moss_results: - mg.relatesTo( - r["percentage_file1"], r["percentage_file2"], r["file1"], r["file2"] - ) - - mg.set_tags() - - insights["DtoC Paths"] = mg.d2c() - insights["DtoDC Paths"] = mg.d2dc() - insights["DCtoC Paths"] = mg.dc2c() - - return insights diff --git a/plagcheck/plagcheck_test.py b/plagcheck/plagcheck_test.py index bebd86c..1110aef 100644 --- a/plagcheck/plagcheck_test.py +++ b/plagcheck/plagcheck_test.py @@ -1,5 +1,6 @@ """Tests for the MOSS interface package for CodeClassroom""" from plagcheck import analyze, plagcheck +from plagcheck.plagcheck import insights, share_scores def test_check(): @@ -11,15 +12,15 @@ def test_check(): temp.addFile("testfiles/test_python2.py") temp.submit() results = temp.getResults() - insights = temp.getInsights() - share_scores = temp.getShareScores() + moss_insights = insights(results) + moss_share_scores = share_scores(results) - assert share_scores == { + assert moss_share_scores == { "testfiles/test_python.py": 1, "testfiles/test_python2.py": 1, } - assert insights == {"DCtoC Paths": [], "DtoC Paths": [], "DtoDC Paths": []} + assert moss_insights == {"DCtoC Paths": [], "DtoC Paths": [], "DtoDC Paths": []} assert results == [ { @@ -48,8 +49,8 @@ def test_Mgroups(): mg = analyze.Mgroups() mg.createNodes({"1", "2", "3"}) - mg.relatesTo(45, 88, "3", "1") - mg.relatesTo(46, 90, "3", "2") + mg.relate(45, 88, "3", "1") + mg.relate(46, 90, "3", "2") mg.set_tags() diff --git a/setup.py b/setup.py index 47b6241..67ff46f 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="plagcheck", - version="0.3", + version="0.4", license="MIT", author="Bhupesh Varshney", author_email="varshneybhupesh@gmail.com", @@ -13,9 +13,9 @@ keywords="moss plagiarism analysis cheat mosspy", long_description=long_description, long_description_content_type="text/markdown", - url="https://github.com/codeclassroom/PlagCheck", + url="https://codeclassroom.github.io/PlagCheck/", project_urls={ - "Documentation": "https://github.com/codeclassroom/PlagCheck/blob/master/docs/docs.md", + "Documentation": "https://plagcheck.readthedocs.io/en/latest/?badge=latest", "Source Code": "https://github.com/codeclassroom/PlagCheck", "Funding": "https://www.patreon.com/bePatron?u=18082750", "Say Thanks!": "https://github.com/codeclassroom/PlagCheck/issues/new?assignees=&labels=&template=---say-thank-you.md&title=",