Skip to content

Commit

Permalink
Ignore non-JSON output in detect-it-easy output
Browse files Browse the repository at this point in the history
The `diec` binary outputs non-JSON output in JSON mode, causing the parsing to fail. For example:

```text
[!] Heuristic scan is disabled. Use --heuristicscan to enable
{
    "detects": [
        {
            "filetype": "PE64",
            "info": "",
            "offset": "0",
            "parentfilepart": "Header",
            "size": "3488048",
            "values": [
                {
                    "info": "",
                    "name": "Microsoft Linker",
                    "string": "Linker: Microsoft Linker(14.22.27905)",
                    "type": "Linker",
                    "version": "14.22.27905"
                },
                {
                    "info": "C++",
                    "name": "Microsoft Visual C/C++",
                    "string": "Compiler: Microsoft Visual C/C++(19.22.27905)[C++]",
                    "type": "Compiler",
                    "version": "19.22.27905"
                },
                {
                    "info": "",
                    "name": "Visual Studio",
                    "string": "Tool: Visual Studio(2019, v16.2)",
                    "type": "Tool",
                    "version": "2019, v16.2"
                }
            ]
        }
    ]
}```

There is a related issue noted against the DIE project:
horsicq/Detect-It-Easy#242
  • Loading branch information
josh-feather committed Jan 29, 2025
1 parent 989ffde commit c07a187
Showing 1 changed file with 22 additions and 11 deletions.
33 changes: 22 additions & 11 deletions lib/cuckoo/common/integrations/file_extra_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
import json
import logging
import os
import re
import shlex
import shutil
import signal
import subprocess

# from contextlib import suppress
from typing import DefaultDict, List, Optional, Set, Union
from typing import Any, DefaultDict, List, Optional, Set, Union

import pebble

Expand Down Expand Up @@ -262,27 +263,37 @@ def static_file_info(

def detect_it_easy_info(file_path: str):
if not path_exists(processing_conf.die.binary):
log.warning("detect-it-easy binary not found at path %s", processing_conf.die.binary)
return []

try:
result_json = subprocess.check_output(
die_output = subprocess.check_output(
[processing_conf.die.binary, "-j", file_path],
stderr=subprocess.STDOUT,
universal_newlines=True,
)

if "detects" not in result_json:
return []
def get_json() -> dict[str, Any]:
"""Get the JSON element from the detect it easy output.
if "Invalid signature" in result_json and "{" in result_json:
start = result_json.find("{")
if start != -1:
result_json = result_json[start:]
This is required due to non-JSON output in JSON mode.
https://github.com/horsicq/Detect-It-Easy/issues/242
"""
matches = re.findall(r"\{.*\}", die_output, re.S)
return json.loads(matches[0]) if matches else {}

strings = [sub["string"] for block in json.loads(result_json).get("detects", []) for sub in block.get("values", [])]
def get_matches() -> list[str]:
"""Get the string values from the detect it easy output."""
return [sub["string"] for block in get_json().get("detects", []) for sub in block.get("values", [])]

if strings:
return strings
return [] if "detects" not in die_output else get_matches()
except subprocess.CalledProcessError as err:
log.error(
"Detect-It-Easy: Failed to execute cmd=`%s`, stdout=`%s`, stderr=`%s`",
shlex.join(err.cmd),
err.stdout,
err.stderr,
)
except json.decoder.JSONDecodeError as e:
log.debug("DIE results are not in json format: %s", str(e))
except Exception as e:
Expand Down

0 comments on commit c07a187

Please sign in to comment.