[IMP] cleans up code and removes all unused or unimportant files

SAP · Aug 30, 2024 · e0e4944 · e0e4944
1 parent b56718c
commit e0e4944
Show file tree

Hide file tree

Showing 10 changed files with 9 additions and 650 deletions.
diff --git a/prospector/evaluation/README.md b/prospector/evaluation/README.md
@@ -1,6 +1,6 @@
 # Evaluate Prospector
 
-This folder contains the scripts used for evaluating Prospector's reports (created and used in Summer 2024). The folder is structured as follows:
+This folder contains the scripts used for evaluating Prospector's reports and data needed for it (created and used in Summer 2024). The folder is structured as follows:
 
 1. **Data** folder: contains input data, Prospector reports and results of the analysis of the Prospector reports.
 2. **Scripts**: The scripts used for running Prospector on a batch of CVEs, and for analysing the created reports.
@@ -24,19 +24,6 @@ them in a Redis Queue, from which the `prospector_worker` container fetches jobs
 
 You can set the number of workers in `docker/worker/etc_supervisor_confd_rqworker.conf.j2`.
 
-## Command Line Options
-
-All scripts are called from `main.py`, depending on the CL flags that are set. The following flags can be set:
-
-1. `-i`: Sets the filename of the file in the input data path.
-2. `-c`: Allows you to select a subset of CVEs, instead of all CVEs from the input data (eg. `-c CVE-2020-1925, CVE-2018-1234`)
-3. `-e`: For *execute*, dispatched jobs for all CVEs from the input data (or the subset if `-c` is set) to the Redis Queue (`dispatch_jobs.py`).
-4. `-a`: Analyses the reports created by Propsector (`analysis.py`)
-5. `-a -s`: Analyses the statistics part of the Prospector reports (eg. to analyse execution times, `analyse_statistics.py`)
-6. `-a --flow`: Creates a JSON file showing how the reports change categories between two different executions.
-6. `-eq`: For *empty queue*, to empty the jobs left on the queue.
-7. `-co`: For *count*, to count how many of the CVEs in the input data have a corresponding report.
-
 ## Configuration File
 
 The configuration file has two parts to it: a main part and a Prospector settings part, which is a copy of a part of the original Prospector `config.yaml` file.

diff --git a/prospector/evaluation/analyse.py b/prospector/evaluation/analyse.py
@@ -80,7 +80,7 @@ def analyse_prospector_reports(filename: str, selected_cves: str):
     # Keep track of the CVEs where there is no report file
     reports_not_found = []
 
-    #### Data to insert into table
+    # Data to insert into table
     if BATCH in ["regular", "old_code"]:
         results = {
             "high": [],
@@ -620,7 +620,7 @@ def generate_checkmarks_table(input_dataset: str, selected_cves):
 
         rule_checks = {rule: "" for rule in all_rules}
         for r in matched_rules:
-            rule_checks[r] = "\checkmark"
+            rule_checks[r] = "\checkmark"  # noqa: W605
 
         row.extend([rule_checks[r] for r in all_rules])
         row.extend([str(overall_exectime), str(llm_exectime)])
@@ -785,9 +785,7 @@ def generate_sankey_diagram(file1: str, file2: str, file3: str):
         height=800,
     )
 
-    output_file = (
-        ANALYSIS_RESULTS_PATH + f"sankey-{file1}-{file2}-{file3}.png"
-    )
+    output_file = ANALYSIS_RESULTS_PATH + f"sankey-{file1}-{file2}-{file3}.png"
     # Save as PNG
     write_image(fig, output_file)
     print(f"Sankey diagram saved to {output_file}")
diff --git a/prospector/evaluation/analyse_statistics.py b/prospector/evaluation/analyse_statistics.py
@@ -63,16 +63,6 @@ def analyse_statistics(filename: str):  # noqa: C901
     avg_cc_time = sum(cc_times) / len(cc_times)
     avg_total_cc_time = sum(total_cc_times) / len(total_cc_times)
 
-    # How many commits was the commit classification rule applied to?
-    for itm in dataset:
-        filepath = PROSPECTOR_REPORTS_PATH_HOST + filename + f"/{itm[0]}.json"
-        try:
-            cc_num_commits = _get_cc_num_commits(filepath)
-            break
-
-        except FileNotFoundError:
-            continue
-
     execution_data = {
         "timestamp": datetime.now().strftime("%H:%M:%S"),
         "total_files_found": len(repo_times),

diff --git a/prospector/evaluation/cloning_repos.py b/prospector/evaluation/cloning_repos.py
diff --git a/prospector/evaluation/compare.py b/prospector/evaluation/compare.py