From f3e463b34bda305d64e2a2032465a1dd0cbedc42 Mon Sep 17 00:00:00 2001
From: Nazanin Donyapour <nazanin.donyapour@gmail.com>
Date: Fri, 5 Jul 2024 19:20:07 +0000
Subject: [PATCH] extract-data-csv plugin

---
 .../extract-data-csv-plugin/.bumpversion.cfg  |  29 ++++
 utils/extract-data-csv-plugin/.dockerignore   |   4 +
 utils/extract-data-csv-plugin/.gitattributes  |   1 +
 utils/extract-data-csv-plugin/.gitignore      |   1 +
 utils/extract-data-csv-plugin/CHANGELOG.md    |   5 +
 utils/extract-data-csv-plugin/Dockerfile      |  22 +++
 utils/extract-data-csv-plugin/README.md       |  18 +++
 utils/extract-data-csv-plugin/VERSION         |   1 +
 utils/extract-data-csv-plugin/build-docker.sh |   4 +
 .../extract_data_csv_0@1@0.cwl                | 141 ++++++++++++++++++
 utils/extract-data-csv-plugin/ict.yml         |  87 +++++++++++
 utils/extract-data-csv-plugin/pyproject.toml  |  31 ++++
 .../mm/utils/extract_data_csv/__init__.py     |   7 +
 .../mm/utils/extract_data_csv/__main__.py     |  73 +++++++++
 .../extract_data_csv/extract_data_csv.py      |  50 +++++++
 .../extract-data-csv-plugin/tests/__init__.py |   1 +
 .../tests/fake_sample_records.csv             |   3 +
 .../tests/test_extract_data_csv.py            |  49 ++++++
 18 files changed, 527 insertions(+)
 create mode 100644 utils/extract-data-csv-plugin/.bumpversion.cfg
 create mode 100644 utils/extract-data-csv-plugin/.dockerignore
 create mode 100644 utils/extract-data-csv-plugin/.gitattributes
 create mode 100644 utils/extract-data-csv-plugin/.gitignore
 create mode 100644 utils/extract-data-csv-plugin/CHANGELOG.md
 create mode 100644 utils/extract-data-csv-plugin/Dockerfile
 create mode 100644 utils/extract-data-csv-plugin/README.md
 create mode 100644 utils/extract-data-csv-plugin/VERSION
 create mode 100755 utils/extract-data-csv-plugin/build-docker.sh
 create mode 100644 utils/extract-data-csv-plugin/extract_data_csv_0@1@0.cwl
 create mode 100644 utils/extract-data-csv-plugin/ict.yml
 create mode 100644 utils/extract-data-csv-plugin/pyproject.toml
 create mode 100644 utils/extract-data-csv-plugin/src/polus/mm/utils/extract_data_csv/__init__.py
 create mode 100644 utils/extract-data-csv-plugin/src/polus/mm/utils/extract_data_csv/__main__.py
 create mode 100644 utils/extract-data-csv-plugin/src/polus/mm/utils/extract_data_csv/extract_data_csv.py
 create mode 100644 utils/extract-data-csv-plugin/tests/__init__.py
 create mode 100644 utils/extract-data-csv-plugin/tests/fake_sample_records.csv
 create mode 100644 utils/extract-data-csv-plugin/tests/test_extract_data_csv.py
diff --git a/utils/extract-data-csv-plugin/.bumpversion.cfg b/utils/extract-data-csv-plugin/.bumpversion.cfg
new file mode 100644
index 00000000..f8274026
--- /dev/null
+++ b/utils/extract-data-csv-plugin/.bumpversion.cfg
@@ -0,0 +1,29 @@
+[bumpversion]
+current_version = 0.1.0
+commit = False
+tag = False
+parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
+serialize = 
+	{major}.{minor}.{patch}-{release}{dev}
+	{major}.{minor}.{patch}
+
+[bumpversion:part:release]
+optional_value = _
+first_value = dev
+values = 
+	dev
+	_
+
+[bumpversion:part:dev]
+
+[bumpversion:file:pyproject.toml]
+search = version = "{current_version}"
+replace = version = "{new_version}"
+
+[bumpversion:file:VERSION]
+
+[bumpversion:file:README.md]
+
+[bumpversion:file:plugin.json]
+
+[bumpversion:file:src/polus/mm/utils/extract_data_csv/__init__.py]
diff --git a/utils/extract-data-csv-plugin/.dockerignore b/utils/extract-data-csv-plugin/.dockerignore
new file mode 100644
index 00000000..7c603f81
--- /dev/null
+++ b/utils/extract-data-csv-plugin/.dockerignore
@@ -0,0 +1,4 @@
+.venv
+out
+tests
+__pycache__
diff --git a/utils/extract-data-csv-plugin/.gitattributes b/utils/extract-data-csv-plugin/.gitattributes
new file mode 100644
index 00000000..87e654bb
--- /dev/null
+++ b/utils/extract-data-csv-plugin/.gitattributes
@@ -0,0 +1 @@
+*.csv filter=lfs diff=lfs merge=lfs -text
diff --git a/utils/extract-data-csv-plugin/.gitignore b/utils/extract-data-csv-plugin/.gitignore
new file mode 100644
index 00000000..c04bc49f
--- /dev/null
+++ b/utils/extract-data-csv-plugin/.gitignore
@@ -0,0 +1 @@
+poetry.lock
diff --git a/utils/extract-data-csv-plugin/CHANGELOG.md b/utils/extract-data-csv-plugin/CHANGELOG.md
new file mode 100644
index 00000000..b67793f7
--- /dev/null
+++ b/utils/extract-data-csv-plugin/CHANGELOG.md
@@ -0,0 +1,5 @@
+# CHANGELOG
+
+## 0.1.0
+
+Initial release.
diff --git a/utils/extract-data-csv-plugin/Dockerfile b/utils/extract-data-csv-plugin/Dockerfile
new file mode 100644
index 00000000..e8ac9249
--- /dev/null
+++ b/utils/extract-data-csv-plugin/Dockerfile
@@ -0,0 +1,22 @@
+FROM condaforge/mambaforge
+
+ENV EXEC_DIR="/opt/executables"
+ENV POLUS_LOG="INFO"
+RUN mkdir -p ${EXEC_DIR}
+
+
+# Work directory defined in the base container
+# WORKDIR ${EXEC_DIR}
+
+COPY pyproject.toml ${EXEC_DIR}
+COPY VERSION ${EXEC_DIR}
+COPY README.md ${EXEC_DIR}
+COPY CHANGELOG.md ${EXEC_DIR}
+
+# Install needed packages here
+
+COPY src ${EXEC_DIR}/src
+
+RUN pip3 install ${EXEC_DIR} --no-cache-dir
+
+CMD ["--help"]
diff --git a/utils/extract-data-csv-plugin/README.md b/utils/extract-data-csv-plugin/README.md
new file mode 100644
index 00000000..2139717a
--- /dev/null
+++ b/utils/extract-data-csv-plugin/README.md
@@ -0,0 +1,18 @@
+# extract_data_csv (0.1.0)
+
+Extract data from a CSV file
+
+## Options
+
+This plugin takes 6 input arguments and 2 output argument:
+
+| Name          | Description             | I/O    | Type   | Default |
+|---------------|-------------------------|--------|--------|---------|
+| input_csv_path | Path to the input csv file, Type: string, File type: input, Accepted formats: csv | Input | File | File |
+| query | query str to search the dataset, Type: string, File type: input, Accepted formats: txt | Input | string | string |
+| min_row | The row min inex, Type: int | Input | int | int |
+| max_row | The row max inex, Type: int | Input | int | int |
+| column_name | The name of the column to load data, Type: string, File type: input, Accepted formats: txt | Input | string | string |
+| output_txt_path | Path to the txt datoutput file, Type: string, File type: output, Accepted formats: txt | Input | string | string |
+| output_txt_path | Path to the txt output file | Output | File | File |
+| output_data | The output data | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} |
diff --git a/utils/extract-data-csv-plugin/VERSION b/utils/extract-data-csv-plugin/VERSION
new file mode 100644
index 00000000..6e8bf73a
--- /dev/null
+++ b/utils/extract-data-csv-plugin/VERSION
@@ -0,0 +1 @@
+0.1.0
diff --git a/utils/extract-data-csv-plugin/build-docker.sh b/utils/extract-data-csv-plugin/build-docker.sh
new file mode 100755
index 00000000..1e818292
--- /dev/null
+++ b/utils/extract-data-csv-plugin/build-docker.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+version=$(<VERSION)
+docker build . -t polusai/extract-data-csv-tool:${version}
diff --git a/utils/extract-data-csv-plugin/extract_data_csv_0@1@0.cwl b/utils/extract-data-csv-plugin/extract_data_csv_0@1@0.cwl
new file mode 100644
index 00000000..b4dedff1
--- /dev/null
+++ b/utils/extract-data-csv-plugin/extract_data_csv_0@1@0.cwl
@@ -0,0 +1,141 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+
+class: CommandLineTool
+
+label: Extract data from a CSV file
+
+doc: |-
+  Extract data from a CSV file
+
+baseCommand: ["python",  "-m", "polus.mm.utils.extract_data_csv"]
+
+hints:
+  DockerRequirement:
+    dockerPull: polusai/extract-data-csv-tool@sha256:2ef6682340ba4e34b826f5028bc4377b523c635975777f50deaf6d7278baa344
+
+requirements:
+  InlineJavascriptRequirement: {}
+  # Enabling InitialWorkDirRequirement will stage the input csv file
+  InitialWorkDirRequirement:
+    listing:
+    - $(inputs.input_csv_path)
+
+inputs:
+  input_csv_path:
+    label: Path to the input csv file
+    doc: |-
+      Path to the input csv file
+      Type: string
+      File type: input
+      Accepted formats: csv
+    type: File
+    format: edam:format_3752
+    inputBinding:
+      prefix: --input_csv_path
+
+  query:
+    label: query str to search the dataset
+    doc: |-
+      query str to search the dataset
+      Type: string
+      File type: input
+      Accepted formats: txt
+    type: string?
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --query
+
+  min_row:
+    label: The row min index
+    doc: |-
+      The row min inex
+      Type: int
+    type: int?
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --min_row
+    default: 1
+
+  max_row:
+    label: The row max index
+    doc: |-
+      The row max inex
+      Type: int
+    type: int?
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --max_row
+    default: -1
+
+  column_name:
+    label: The name of the column to load data
+    doc: |-
+      The name of the column to load data
+      Type: string
+      File type: input
+      Accepted formats: txt
+    type: string
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --column_name
+
+  output_txt_path:
+    label: Path to the txt output file
+    doc: |-
+      Path to the txt datoutput file
+      Type: string
+      File type: output
+      Accepted formats: txt
+    type: string
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --output_txt_path
+    default: system.log
+
+outputs:
+  output_txt_path:
+    label: Path to the txt output file
+    doc: |-
+      Path to the txt output file
+    type: File
+    outputBinding:
+      glob: $(inputs.output_txt_path)
+    format: edam:format_2330
+
+  output_data:
+    label: The output data
+    doc: |-
+      The output data
+    type:
+      type: array
+      items: string
+    outputBinding:
+      glob: $(inputs.output_txt_path)
+      loadContents: true
+      outputEval: |
+        ${
+            var lines = self[0].contents.split("\n");
+            // remove black lines
+            lines = lines.filter(function(line) {return line.trim() !== '';});
+            var data = [];
+            for (var i = 0; i < lines.length; i++) {
+              // The format of the lines is as follows: data
+                var words = lines[i].split(",").map(function(item) {return item.trim();});
+                data.push(words[0]);
+
+              }
+            return data;
+        }
+
+
+$namespaces:
+  edam: https://edamontology.org/
+
+$schemas:
+- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
diff --git a/utils/extract-data-csv-plugin/ict.yml b/utils/extract-data-csv-plugin/ict.yml
new file mode 100644
index 00000000..e92736b7
--- /dev/null
+++ b/utils/extract-data-csv-plugin/ict.yml
@@ -0,0 +1,87 @@
+specVersion: "0.1.0"
+name: extract_data_csv
+version: 0.1.0
+container: extract-data-csv-plugin
+entrypoint:
+title: extract_data_csv
+description: Extract data from a CSV file
+author: Brandon Walker, Nazanin Donyapour
+contact: brandon.walker@axleinfo.com, nazanin.donyapour@axleinfo.com
+repository:
+documentation:
+citation:
+
+inputs:
+  - name: input_csv_path
+    required: true
+    description: Path to the input csv file, Type string, File type input, Accepted formats csv
+    type: File
+    format:
+      uri: edam:format_3752
+  - name: query
+    required: true
+    description: query str to search the dataset, Type string, File type input, Accepted formats txt
+    type: string
+    format:
+      uri: edam:format_2330
+  - name: min_row
+    required: true
+    description: The row min inex, Type int
+    type: int
+    format:
+      uri: edam:format_2330
+  - name: max_row
+    required: true
+    description: The row max inex, Type int
+    type: int
+    format:
+      uri: edam:format_2330
+  - name: column_name
+    required: true
+    description: The name of the column to load data, Type string, File type input, Accepted formats txt
+    type: string
+    format:
+      uri: edam:format_2330
+  - name: output_txt_path
+    required: true
+    description: Path to the txt datoutput file, Type string, File type output, Accepted formats txt
+    type: string
+    defaultValue: system.log
+    format:
+      uri: edam:format_2330
+outputs:
+  - name: output_txt_path
+    required: true
+    description: Path to the txt output file
+    type: File
+    format:
+      uri: edam:format_2330
+  - name: output_data
+    required: true
+    description: The output data
+    type: {'type': 'array', 'items': 'string'}
+ui:
+  - key: inputs.input_csv_path
+    title: "input_csv_path: "
+    description: "Path to the input csv file, Type string, File type input, Accepted formats csv"
+    type: File
+  - key: inputs.query
+    title: "query: "
+    description: "query str to search the dataset, Type string, File type input, Accepted formats txt"
+    type: string
+  - key: inputs.min_row
+    title: "min_row: "
+    description: "The row min inex, Type int"
+    type: int
+  - key: inputs.max_row
+    title: "max_row: "
+    description: "The row max inex, Type int"
+    type: int
+  - key: inputs.column_name
+    title: "column_name: "
+    description: "The name of the column to load data, Type string, File type input, Accepted formats txt"
+    type: string
+  - key: inputs.output_txt_path
+    title: "output_txt_path: "
+    description: "Path to the txt datoutput file, Type string, File type output, Accepted formats txt"
+    type: string
diff --git a/utils/extract-data-csv-plugin/pyproject.toml b/utils/extract-data-csv-plugin/pyproject.toml
new file mode 100644
index 00000000..0f6d4ca3
--- /dev/null
+++ b/utils/extract-data-csv-plugin/pyproject.toml
@@ -0,0 +1,31 @@
+[tool.poetry]
+name = "polus-mm-utils-extract-data-csv"
+version = "0.1.0"
+description = "Extract data from a CSV file"
+authors = ["Nazanin Donyapour <nazanin.donyapour@axleinfo.com>", "Brandon Walker <brandon.walker@axleinfo.com>"]
+readme = "README.md"
+packages = [{include = "polus", from = "src"}]
+
+[tool.poetry.dependencies]
+python = ">=3.9,<3.13"
+typer = "^0.7.0"
+sophios = "0.1.4"
+pandas = "2.2.2"
+
+[tool.poetry.group.dev.dependencies]
+bump2version = "^1.0.1"
+pytest = "^7.4"
+pytest-sugar = "^0.9.6"
+pre-commit = "^3.2.1"
+black = "^23.3.0"
+mypy = "^1.1.1"
+ruff = "^0.0.270"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.pytest.ini_options]
+pythonpath = [
+  "."
+]
diff --git a/utils/extract-data-csv-plugin/src/polus/mm/utils/extract_data_csv/__init__.py b/utils/extract-data-csv-plugin/src/polus/mm/utils/extract_data_csv/__init__.py
new file mode 100644
index 00000000..b357d01e
--- /dev/null
+++ b/utils/extract-data-csv-plugin/src/polus/mm/utils/extract_data_csv/__init__.py
@@ -0,0 +1,7 @@
+"""extract_data_csv."""
+
+__version__ = "0.1.0"
+
+from polus.mm.utils.extract_data_csv.extract_data_csv import (  # noqa # pylint: disable=unused-import
+    extract_data_csv,
+)
diff --git a/utils/extract-data-csv-plugin/src/polus/mm/utils/extract_data_csv/__main__.py b/utils/extract-data-csv-plugin/src/polus/mm/utils/extract_data_csv/__main__.py
new file mode 100644
index 00000000..17a9e5f1
--- /dev/null
+++ b/utils/extract-data-csv-plugin/src/polus/mm/utils/extract_data_csv/__main__.py
@@ -0,0 +1,73 @@
+"""Package entrypoint for the extract_data_csv package."""
+
+# Base packages
+import logging
+from os import environ
+
+import typer
+from polus.mm.utils.extract_data_csv.extract_data_csv import extract_data_csv
+
+logging.basicConfig(
+    format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
+    datefmt="%d-%b-%y %H:%M:%S",
+)
+POLUS_LOG = getattr(logging, environ.get("POLUS_LOG", "INFO"))
+logger = logging.getLogger("polus.mm.utils.extract_data_csv.")
+logger.setLevel(POLUS_LOG)
+
+app = typer.Typer(help="extract_data_csv.")
+
+
+@app.command()
+def main(  # noqa: PLR0913
+    input_csv_path: str = typer.Option(
+        ...,
+        "--input_csv_path",
+        help="Path to the input csv file, Type string, File type input",
+    ),
+    query: str = typer.Option(
+        ...,
+        "--query",
+        help="query str to search the dataset, Type string, File type input",
+    ),
+    min_row: int = typer.Option(
+        ...,
+        "--min_row",
+        help="The row min inex, Type int",
+    ),
+    max_row: int = typer.Option(
+        ...,
+        "--max_row",
+        help="The row max inex, Type int",
+    ),
+    column_name: str = typer.Option(
+        ...,
+        "--column_name",
+        help="The name of the column to load data, Type string, File type input",
+    ),
+    output_txt_path: str = typer.Option(
+        ...,
+        "--output_txt_path",
+        help="Path to the txt datoutput file, Type string, File type output",
+    ),
+) -> None:
+    """extract_data_csv."""
+    logger.info(f"input_csv_path: {input_csv_path}")
+    logger.info(f"query: {query}")
+    logger.info(f"min_row: {min_row}")
+    logger.info(f"max_row: {max_row}")
+    logger.info(f"column_name: {column_name}")
+    logger.info(f"output_txt_path: {output_txt_path}")
+
+    extract_data_csv(
+        input_csv_path=input_csv_path,
+        query=query,
+        min_row=min_row,
+        max_row=max_row,
+        column_name=column_name,
+        output_txt_path=output_txt_path,
+    )
+
+
+if __name__ == "__main__":
+    app()
diff --git a/utils/extract-data-csv-plugin/src/polus/mm/utils/extract_data_csv/extract_data_csv.py b/utils/extract-data-csv-plugin/src/polus/mm/utils/extract_data_csv/extract_data_csv.py
new file mode 100644
index 00000000..41bcec27
--- /dev/null
+++ b/utils/extract-data-csv-plugin/src/polus/mm/utils/extract_data_csv/extract_data_csv.py
@@ -0,0 +1,50 @@
+"""Extract data from a CSV file."""
+from pathlib import Path
+
+import pandas
+
+
+def extract_data_csv(  # noqa: PLR0913
+    input_csv_path: str,
+    query: str,
+    column_name: str,
+    output_txt_path: str,
+    min_row: int = 1,
+    max_row: int = -1,
+) -> None:
+    """extract_data_csv.
+
+    Args:
+        input_csv_path: Path to the input csv file, Type string, File type input
+        query: query str to search the dataset, Type string, File type input
+        column_name: The name of the column to load data, Type string, File type input
+        output_txt_path: Path to the txt datoutput file, Type string, File type output
+        min_row: The row min inex, Type int
+        max_row: The row max inex, Type int
+    Returns:
+        None
+    """
+    df = pandas.read_csv(input_csv_path)
+
+    print(df.shape)  # noqa: T201
+    print(df.columns)  # noqa: T201g
+
+    if query:
+        df = df.query(query)
+        print(df)  # noqa: T201
+
+    # Remove rows with null value in thecolumn
+    df.dropna(subset=[column_name], inplace=True)
+
+    # Perform row slicing (if any)
+    if int(min_row) != 1 or int(max_row) != -1:
+        # We want to convert to zero-based indices and we also want
+        # the upper index to be inclusive (i.e. <=) so -1 lower index.
+        df = df[(int(min_row) - 1) : int(max_row)]
+        print(df)  # noqa: T201g
+
+    # Now restrict to the column we want
+    with Path.open(Path(output_txt_path), mode="w", encoding="utf-8") as f:
+        for sm in df[column_name].to_list():
+            # repr() preserves backslashes and strip quotes
+            f.write(f"{repr(sm)[1:-1]}\n")
diff --git a/utils/extract-data-csv-plugin/tests/__init__.py b/utils/extract-data-csv-plugin/tests/__init__.py
new file mode 100644
index 00000000..d8c0b330
--- /dev/null
+++ b/utils/extract-data-csv-plugin/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for extract_data_csv."""
diff --git a/utils/extract-data-csv-plugin/tests/fake_sample_records.csv b/utils/extract-data-csv-plugin/tests/fake_sample_records.csv
new file mode 100644
index 00000000..85918dd6
--- /dev/null
+++ b/utils/extract-data-csv-plugin/tests/fake_sample_records.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be7e03a84d85217bc644a92b740965be1d4361ad3162bc9f3c7932e638fbf5bc
+size 1296
diff --git a/utils/extract-data-csv-plugin/tests/test_extract_data_csv.py b/utils/extract-data-csv-plugin/tests/test_extract_data_csv.py
new file mode 100644
index 00000000..d4b58917
--- /dev/null
+++ b/utils/extract-data-csv-plugin/tests/test_extract_data_csv.py
@@ -0,0 +1,49 @@
+"""Tests for extract_data_csv."""
+from pathlib import Path
+
+from polus.mm.utils.extract_data_csv.extract_data_csv import extract_data_csv
+from sophios.api.pythonapi import Step
+from sophios.api.pythonapi import Workflow
+
+
+def test_extract_data_csv() -> None:
+    """Test extract_data_csv."""
+    input_csv_path = "fake_sample_records.csv"
+    input_csv_path = str(Path(__file__).resolve().parent / Path(input_csv_path))
+    query = ""
+    column_name = "Smiles"
+    output_txt_path = "smiles.txt"
+
+    extract_data_csv(input_csv_path, query, column_name, output_txt_path)
+
+    assert Path(output_txt_path).exists()
+
+
+def test_extract_data_csv_cwl() -> None:
+    """Test extract_data_csv CWL."""
+    cwl_file = Path("extract_data_csv_0@1@0.cwl")
+
+    # Create the step for the CWL file
+    extract_data_csv_step = Step(clt_path=cwl_file)
+
+    input_csv_path = "fake_sample_records.csv"
+    input_csv_path = str(Path(__file__).resolve().parent / Path(input_csv_path))
+
+    extract_data_csv_step.input_csv_path = input_csv_path
+    extract_data_csv_step.query = ""
+    extract_data_csv_step.column_name = "Smiles"
+    extract_data_csv_step.output_txt_path = "smiles.txt"
+
+    # Define the workflow with the step
+    steps = [extract_data_csv_step]
+    filename = "extract_data_csv"
+    workflow = Workflow(steps, filename)
+
+    # Run the workflow
+    workflow.run()
+
+    # Check for the existence of the output file
+    outdir = Path("outdir")
+    assert any(
+        file.name == "smiles.txt" for file in outdir.rglob("*")
+    ), "The file output_scored.txt was not found."