From 8d3bd5ba373072f0f2e577685488a582c6eec532 Mon Sep 17 00:00:00 2001 From: Juan Caballero Date: Mon, 27 Feb 2023 11:55:10 +0100 Subject: [PATCH] Added support for OPSWAT MetaDefender reports, contributed by #35 --- README.md | 35 ++++++++++++++++++++++------------ avclass/labeler.py | 31 ++++++++++++++++++++++++++++-- examples/opswat_md_sample.json | 1 + pyproject.toml | 2 +- 4 files changed, 54 insertions(+), 15 deletions(-) create mode 100644 examples/opswat_md_sample.json diff --git a/README.md b/README.md index 1b18ad9..d46d0ef 100644 --- a/README.md +++ b/README.md @@ -153,10 +153,20 @@ clean the command line options. ## Input formats -AVClass supports three input JSONL formats +AVClass supports four input JSONL formats (i.e., one JSON object per line). -1. VirusTotal v2 API reports, +1. VirusTotal v3 API reports, +where each line in the input *file* should be the full JSON of a +VirusTotal API version 3 response with a *File* object report, +e.g., obtained by querying https://www.virustotal.com/api/v3/files/{hash} +There is an example VirusTotal v3 input file in examples/vtv3_sample.json + +```shell +avclass -f examples/vtv3_sample.json -o output.txt +``` + +2. VirusTotal v2 API reports, where each line in the input *file* should be the full JSON of a VirusTotal v2 API response to the */file/report* endpoint, e.g., obtained by querying https://www.virustotal.com/vtapi/v2/file/report?apikey={apikey}&resource={hash} @@ -166,17 +176,17 @@ There is an example VirusTotal v2 input file in examples/vtv2_sample.json avclass -f examples/vtv2_sample.json -o output.txt ``` -2. VirusTotal v3 API reports, -where each line in the input *file* should be the full JSON of a -VirusTotal API version 3 response with a *File* object report, -e.g., obtained by querying https://www.virustotal.com/api/v3/files/{hash} -There is an example VirusTotal v3 input file in examples/vtv3_sample.json +3. OPSWAT MetaDefender reports, +where each line in the input *file* should be the full JSON +obtained from OPSWAT MetaDefender. +There is an example OPSWAT MetaDefender input file in +examples/opswat_md_sample.json ```shell -avclass -f examples/vtv3_sample.json -o output.txt +avclass -f examples/opswat_md_sample.json -o output.txt ``` -3. Simplified format, +4. Simplified format, where each line in the input *file* should be a JSON with (at least) these fields: {md5, sha1, sha256, av_labels}. @@ -200,11 +210,12 @@ For example, you can provide as input the three test files (each of a different format) in the examples directory: ```shell -avclass -f examples/vtv3_sample.json -f examples/vtv2_sample.json -f examples/malheurReference_lb.json -o output.txt +avclass -f examples/vtv3_sample.json -f examples/vtv2_sample.json -f examples/malheurReference_lb.json -f examples/opswat_md_sample.json -o output.txt ``` -output.txt will have 3134 lines: 3130 samples from malheurReference_lb.json, -3 samples from vtv2_sample.json and one from vtv3_sample.json. +output.txt will have 3135 lines: 3130 samples from malheurReference_lb.json, +3 samples from vtv2_sample.json, 1 sample from vtv3_sample.json, and +1 sample from opswat_md_sample.json. You can also provide as input a directory with the -d option and AVClass will process all files in that directory. diff --git a/avclass/labeler.py b/avclass/labeler.py index 8f808a1..0cd7910 100755 --- a/avclass/labeler.py +++ b/avclass/labeler.py @@ -173,6 +173,28 @@ def get_sample_info_vt_v3(vt_rep): return SampleInfo(md5, sha1, sha256, label_pairs, vt_tags) + @staticmethod + def get_sample_info_opswat_md(vt_rep): + """Parse sample information from OPSWAT MetaDefender report""" + try: + scans = vt_rep["scan_results"]["scan_details"] + md5 = vt_rep["file_info"]["md5"] + sha1 = vt_rep["file_info"]["sha1"] + sha256 = vt_rep["file_info"]["sha256"] + except KeyError: + return None + # Obtain labels from scan results + label_pairs = [] + for av, res in scans.items(): + label = res["threat_found"] + if label is not None and res["scan_result_i"] == 1: + clean_label = "".join( + filter(lambda x: x in string.printable, label) + ).strip() + label_pairs.append((av, clean_label)) + + return SampleInfo(md5, sha1, sha256, label_pairs, []) + def open_file(self, filepath): """Guess filetype and return file descriptor to file""" # Check if file is gzipped by opening it as raw data @@ -198,8 +220,13 @@ def open_file(self, filepath): itype = "vt2" get_sample_info_fun = self.get_sample_info_vt_v2 else: - itype = "lb" - get_sample_info_fun = self.get_sample_info_lb + sample_info = self.get_sample_info_opswat_md(report) + if sample_info is not None: + itype = "md" + get_sample_info_fun = self.get_sample_info_opswat_md + else: + itype = "lb" + get_sample_info_fun = self.get_sample_info_lb # Set file pointer to beginning again fd.seek(0, 0) # Return file descriptor and type diff --git a/examples/opswat_md_sample.json b/examples/opswat_md_sample.json new file mode 100644 index 0000000..0577345 --- /dev/null +++ b/examples/opswat_md_sample.json @@ -0,0 +1 @@ +{"data_id": "49f8ca95f24a45ce9b7feb41b484e165", "dlp_info": {}, "extracted_files": {"files_extracted_count": 4, "files_in_archive": [{"data_id": "0dba93e893a64e42b2aad42996d52fb2", "detected_by": 0, "display_name": "reedmi.cvl", "file_size": 251124, "file_type": "application/vnd.rar", "file_type_description": "WinRAR Compressed Archive", "process_info": {"blocked_reason": "Encrypted Archive", "progress_percentage": 100, "result": "Blocked", "verdicts": ["Encrypted Archive"]}, "progress_percentage": 100, "scan_all_result_a": "Encrypted Archive", "scan_all_result_i": 12, "scanned_with": 29}, {"data_id": "220373b076e74ab09ae49b9879617b9b", "detected_by": 2, "display_name": "elp.bat", "file_size": 670, "file_type": "text/plain", "file_type_description": "ASCII Text", "process_info": {"blocked_reason": "Infected", "progress_percentage": 100, "result": "Blocked", "verdicts": ["Infected"]}, "progress_percentage": 100, "scan_all_result_a": "Infected", "scan_all_result_i": 1, "scanned_with": 29}, {"data_id": "b43c3a5ba47249a6b99632d9fb0563c5", "detected_by": 1, "display_name": "extraPFZ.exe", "file_size": 564896, "file_type": "application/x-dosexec", "file_type_description": "Executable File", "process_info": {"blocked_reason": "Infected", "progress_percentage": 100, "result": "Blocked", "verdicts": ["Infected"]}, "progress_percentage": 100, "scan_all_result_a": "Infected", "scan_all_result_i": 1, "scanned_with": 29}, {"data_id": "662a175c7783459b9afa28b1d33d1379", "detected_by": 0, "display_name": "svideo.vbs", "file_size": 81, "file_type": "text/plain", "file_type_description": "ASCII Text", "process_info": {"blocked_reason": "", "progress_percentage": 100, "result": "Allowed", "verdicts": ["No Threat Detected"]}, "progress_percentage": 100, "scan_all_result_a": "No Threat Detected", "scan_all_result_i": 0, "scanned_with": 29}], "first_index": 0, "page_size": 50, "total_extracted_files": 4, "worst_data_id": "49f8ca95f24a45ce9b7feb41b484e165"}, "file_info": {"display_name": "2c6110a76dda8da49195052fa561ab8b8278c02df400124e46d26d2df228b70b", "file_size": 988643, "file_type": "application/vnd.microsoft.portable-executable", "file_type_description": "Self-extracting Executable File", "md5": "33ca3e86d783234092e52369e1b6bb83", "sha1": "653ab54e15b01473943cd897ded24f742b0193c5", "sha256": "2c6110a76dda8da49195052fa561ab8b8278c02df400124e46d26d2df228b70b", "upload_timestamp": "2021-01-29T22:53:45.604Z"}, "process_info": {"blocked_reason": "Infected", "file_type_skipped_scan": false, "post_processing": {"actions_failed": "", "actions_ran": "", "converted_destination": "", "converted_to": "", "copy_move_destination": ""}, "processing_time": 20516, "profile": "File process", "progress_percentage": 100, "queue_time": 1219, "result": "Blocked", "user_agent": "", "username": "", "verdicts": ["Infected"]}, "scan_results": {"data_id": "49f8ca95f24a45ce9b7feb41b484e165", "last_file_scanned": "reedmi.cvl", "progress_percentage": 100, "scan_all_result_a": "Infected", "scan_all_result_i": 1, "scan_details": {"AegisLab": {"def_time": "2021-01-29T12:48:00.000Z", "eng_id": "aegislab_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 9, "threat_found": "", "wait_time": 1366}, "Ahnlab": {"def_time": "2021-01-30T00:00:00.000Z", "eng_id": "ahnlab_1_windows", "location": "local", "scan_result_i": 1, "scan_time": 24, "threat_found": "Malware/Win32.Generic", "wait_time": 1351}, "Antiy": {"def_time": "2021-01-29T15:48:00.000Z", "eng_id": "antiy_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 20, "threat_found": "", "wait_time": 1355}, "Avira": {"def_time": "2021-01-29T00:00:00.000Z", "eng_id": "avira_1_windows", "location": "local", "scan_result_i": 1, "scan_time": 1, "threat_found": "TR/Drop.Agent.xlojg", "wait_time": 1374}, "BitDefender": {"def_time": "2021-01-29T13:19:00.000Z", "eng_id": "bitdefender_1_windows", "location": "local", "scan_result_i": 1, "scan_time": 140, "threat_found": "Trojan.Dropper.ZME", "wait_time": 1501}, "ByteHero": {"def_time": "2021-01-27T00:00:00.000Z", "eng_id": "bytehero_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 680, "threat_found": "", "wait_time": 1352}, "ClamAV": {"def_time": "2021-01-28T07:28:06.000Z", "eng_id": "clamav_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 1125, "threat_found": "", "wait_time": 1438}, "Comodo": {"def_time": "2021-01-29T05:05:50.000Z", "eng_id": "comodo_1_windows", "location": "local", "scan_result_i": 1, "scan_time": 26, "threat_found": "Malware", "wait_time": 1349}, "Cyren": {"def_time": "2021-01-29T14:35:00.000Z", "eng_id": "cyren_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 94, "threat_found": "", "wait_time": 1547}, "ESET": {"def_time": "2021-01-29T00:00:00.000Z", "eng_id": "eset_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 50, "threat_found": "", "wait_time": 1544}, "Emsisoft": {"def_time": "2021-01-29T12:07:00.000Z", "eng_id": "emsisoft_1_windows", "location": "local", "scan_result_i": 1, "scan_time": 1202, "threat_found": "Trojan.Dropper.ZME (B)", "wait_time": 1502}, "Filseclab": {"def_time": "2021-01-27T23:08:00.000Z", "eng_id": "filseclab_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 411, "threat_found": "", "wait_time": 1527}, "Huorong": {"def_time": "2021-01-29T09:24:00.000Z", "eng_id": "huorong_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 260, "threat_found": "", "wait_time": 1584}, "Ikarus": {"def_time": "2021-01-29T13:13:30.000Z", "eng_id": "ikarus_1_windows", "location": "local", "scan_result_i": 3, "scan_time": 235, "threat_found": "The archive is password protected or the given password is invalid.", "wait_time": 1594}, "K7": {"def_time": "2021-01-29T11:16:00.000Z", "eng_id": "k7_1_windows", "location": "local", "scan_result_i": 1, "scan_time": 12, "threat_found": "Trojan ( 005631561 )", "wait_time": 1363}, "McAfee": {"def_time": "2021-01-29T00:00:00.000Z", "eng_id": "mcafee_1_windows", "location": "local", "scan_result_i": 1, "scan_time": 61, "threat_found": "RDN/Dridex", "wait_time": 1549}, "NANOAV": {"def_time": "2021-01-29T11:38:00.000Z", "eng_id": "nano_1_windows", "location": "local", "scan_result_i": 1, "scan_time": 28, "threat_found": "Trojan.Win32.Dridex.icipbk", "wait_time": 1519}, "NetGate": {"def_time": "2021-01-24T04:10:00.000Z", "eng_id": "netgate_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 64, "threat_found": "", "wait_time": 1561}, "Quick Heal": {"def_time": "2021-01-29T06:52:00.000Z", "eng_id": "quickheal_1_windows", "location": "local", "scan_result_i": 1, "scan_time": 51, "threat_found": "Backdoor.Dridex", "wait_time": 1559}, "Sophos": {"def_time": "2021-01-29T00:12:00.000Z", "eng_id": "sophos_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 238, "threat_found": "", "wait_time": 1591}, "Symantec": {"def_time": "2021-01-29T00:00:00.000Z", "eng_id": "symantec_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 21, "threat_found": "", "wait_time": 1464}, "TACHYON": {"def_time": "2021-01-29T00:00:00.000Z", "eng_id": "nprotect_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 76, "threat_found": "", "wait_time": 1549}, "TrendMicro": {"def_time": "2021-01-27T20:22:00.000Z", "eng_id": "trendmicro_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 1388, "threat_found": "", "wait_time": 1441}, "TrendMicro House Call": {"def_time": "2021-01-28T22:14:00.000Z", "eng_id": "trendmicrohousecall_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 1281, "threat_found": "", "wait_time": 1454}, "Vir.IT eXplorer": {"def_time": "2021-01-29T12:10:00.000Z", "eng_id": "viritexplorer_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 72, "threat_found": "", "wait_time": 1569}, "VirusBlokAda": {"def_time": "2021-01-29T08:04:00.000Z", "eng_id": "virusblokada_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 492, "threat_found": "", "wait_time": 1493}, "Windows Defender": {"def_time": "2021-01-29T07:07:36.000Z", "eng_id": "windowsdefender_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 760, "threat_found": "", "wait_time": 1334}, "Xvirus Personal Guard": {"def_time": "2021-01-28T05:47:00.000Z", "eng_id": "xviruspersonalguard_1_windows", "location": "local", "scan_result_i": 1, "scan_time": 825, "threat_found": "Suspicious:NewThreat.179", "wait_time": 1363}, "Zillya!": {"def_time": "2021-01-28T07:07:00.000Z", "eng_id": "zillya_1_windows", "location": "local", "scan_result_i": 0, "scan_time": 10, "threat_found": "", "wait_time": 1475}}, "start_time": "2021-01-29T22:53:46.823Z", "total_avs": 29, "total_time": 20516}, "vulnerability_info": {}, "yara_info": {}} diff --git a/pyproject.toml b/pyproject.toml index 7e02937..a5a1773 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "avclass-malicialab" -version = "2.7.0" +version = "2.7.1" description = "AVClass is a Python package and command line tool to tag / label malware samples." readme = "README.md" authors = [{ name = "MaliciaLab" }]