From fc46f88484d3b36f5e8c4b4b25cc8bfbfd09b1f2 Mon Sep 17 00:00:00 2001 From: Eric Pinzur Date: Fri, 21 Jun 2024 09:59:25 +0200 Subject: [PATCH] updated build and analysis --- .github/workflows/publish-to-pypi.yml | 1 + README.md | 6 +++--- colbert_chunk_size_and_k.py | 2 +- poetry.lock | 19 +++++++++++++++++-- pyproject.toml | 1 + ragulate/analysis.py | 2 ++ scripts/test_integration_runner.py | 4 +++- scripts/test_unit_runner.py | 4 +++- 8 files changed, 31 insertions(+), 8 deletions(-) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index e5ee77f..3b936be 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -36,6 +36,7 @@ jobs: run: | echo "pyroject.toml:" cat pyproject.toml + poetry self add poetry-plugin-export poetry install poetry build poetry export -f requirements.txt --output requirements.txt diff --git a/README.md b/README.md index 056979d..1633c1e 100644 --- a/README.md +++ b/README.md @@ -2,14 +2,14 @@ A tool for evaluating RAG pipelines -![ragulate_logo](images/logo_smaller.png) +![ragulate_logo](https://raw.githubusercontent.com/epinzur/ragulate/main/images/logo_smaller.png) ## The Metrics The RAGulate currently reports 4 relevancy metrics: Answer Correctness, Answer Relevance, Context Relevance, and Groundedness. -![metrics_diagram](images/metrics.png) +![metrics_diagram](https://raw.githubusercontent.com/epinzur/ragulate/main/images/metrics.png) * Answer Correctness * How well does the generated answer match the ground-truth answer? @@ -28,7 +28,7 @@ The RAGulate currently reports 4 relevancy metrics: Answer Correctness, Answer R The tool outputs results as images like this: -![example_output](images/example.png) +![example_output](https://raw.githubusercontent.com/epinzur/ragulate/main/images/example.png) These images show distribution box plots of the metrics for different test runs. diff --git a/colbert_chunk_size_and_k.py b/colbert_chunk_size_and_k.py index 272f07a..adeb799 100644 --- a/colbert_chunk_size_and_k.py +++ b/colbert_chunk_size_and_k.py @@ -27,6 +27,7 @@ keyspace = "colbert" import logging + logging.basicConfig(level=logging.INFO) logging.getLogger("unstructured").setLevel(logging.ERROR) logging.getLogger("cassandra").setLevel(logging.ERROR) @@ -34,7 +35,6 @@ logging.getLogger("httpx").setLevel(logging.ERROR) - def get_embedding_model(chunk_size: int) -> ColbertEmbeddingModel: return ColbertEmbeddingModel(doc_maxlen=chunk_size, batch_size=batch_size) diff --git a/poetry.lock b/poetry.lock index 996dbc3..eb0384f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiohttp" @@ -3322,6 +3322,21 @@ files = [ {file = "rpds_py-0.18.1.tar.gz", hash = "sha256:dc48b479d540770c811fbd1eb9ba2bb66951863e448efec2e2c102625328e92f"}, ] +[[package]] +name = "setuptools" +version = "70.1.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-70.1.0-py3-none-any.whl", hash = "sha256:d9b8b771455a97c8a9f3ab3448ebe0b29b5e105f1228bba41028be116985a267"}, + {file = "setuptools-70.1.0.tar.gz", hash = "sha256:01a1e793faa5bd89abc851fa15d0a0db26f160890c7102cd8dce643e886b47f5"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.10.0)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] + [[package]] name = "six" version = "1.16.0" @@ -4179,4 +4194,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "6ac06e5fbf3acc677dc267c1002e263a1cc6a156560877645365e241c23658a3" +content-hash = "678a8e8edc4926462efbf5699598d6e14a4c281dbb5e37285257600a1871145a" diff --git a/pyproject.toml b/pyproject.toml index 630942c..4c4102f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ tqdm = ">=4.66.1" pyyaml = "^6.0.1" cerberus = "^1.3.5" pydantic = "^2.7.3" +setuptools = "^70.0.0" [tool.poetry.group.dev.dependencies] black = "^24.4.2" diff --git a/ragulate/analysis.py b/ragulate/analysis.py index f7c1b0c..64ad9a7 100644 --- a/ragulate/analysis.py +++ b/ragulate/analysis.py @@ -53,6 +53,8 @@ def get_all_data(self, recipes: List[str]) -> DataFrame: def output_plots_by_dataset(self, df: DataFrame, metrics: List[str]): recipes = sorted(df["recipe"].unique(), key=lambda x: x.lower()) datasets = sorted(df["dataset"].unique(), key=lambda x: x.lower()) + metrics = sorted(metrics) + metrics.reverse() # generate an array of rainbow colors by fixing the saturation and lightness of the HSL # representation of color and marching around the hue. diff --git a/scripts/test_integration_runner.py b/scripts/test_integration_runner.py index bdbe78a..6f6f27f 100644 --- a/scripts/test_integration_runner.py +++ b/scripts/test_integration_runner.py @@ -1,5 +1,7 @@ -import pytest import sys +import pytest + + def main(): sys.exit(pytest.main(["tests/integration_tests"])) diff --git a/scripts/test_unit_runner.py b/scripts/test_unit_runner.py index 7ff8de9..68ad640 100644 --- a/scripts/test_unit_runner.py +++ b/scripts/test_unit_runner.py @@ -1,5 +1,7 @@ -import pytest import sys +import pytest + + def main(): sys.exit(pytest.main(["tests/unit_tests"]))