diff --git a/Dockerfile b/Dockerfile index dc6ceb2..8219fa5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,8 @@ WORKDIR CASALIOY RUN pip3 install poetry RUN python3 -m poetry config virtualenvs.create false RUN python3 -m poetry install -RUN python3 -m pip install --force streamlit # Temp fix, see pyproject.toml +RUN python3 -m pip install --force streamlit sentence_transformers # Temp fix, see pyproject.toml +RUN python3 -m pip uninstall -y llama-cpp-python +RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 python3 -m pip install llama-cpp-python # GPU support RUN pre-commit install COPY example.env .env diff --git a/README.md b/README.md index 07b6bec..034f86b 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ for older docker without GUI use `casalioy:latest` might deprecate soon ``` cd models wget https://huggingface.co/Pi3141/alpaca-native-7B-ggml/resolve/397e872bf4c83f4c642317a5bf65ce84a105786e/ggml-model-q4_0.bin && -wget https://huggingface.co/datasets/dnato/ggjt-v1-vic7b-uncensored-q4_0.bin/resolve/main/ggjt-v1-vic7b-uncensored-q4_0.bin +wget https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q5_1.bin cd ../ ``` @@ -59,15 +59,21 @@ cd ../ python -m pip install poetry python -m poetry config virtualenvs.in-project true python -m poetry install -python -m pip install --force streamlit # Temporary bandaid fix, waiting for streamlit >=1.23 . .venv/bin/activate +python -m pip install --force streamlit sentence_transformers # Temporary bandaid fix, waiting for streamlit >=1.23 pre-commit install ``` +If you want GPU support for llama-ccp: +```shell +pip uninstall -y llama-cpp-python +CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --force llama-cpp-python +``` + > Download the 2 models and place them in a folder called `./models`: - LLM: default - is [ggjt-v1-vic7b-uncensored-q4_0](https://huggingface.co/datasets/dnato/ggjt-v1-vic7b-uncensored-q4_0.bin/resolve/main/ggjt-v1-vic7b-uncensored-q4_0.bin) + is [ggml-vic7b-q5_1](https://huggingface.co/eachadea/ggml-vicuna-7b-1.1/resolve/main/ggml-vic7b-q5_1.bin) - Embedding: default to [ggml-model-q4_0](https://huggingface.co/Pi3141/alpaca-native-7B-ggml/resolve/397e872bf4c83f4c642317a5bf65ce84a105786e/ggml-model-q4_0.bin). @@ -102,7 +108,7 @@ This should look like this │ └── shor.pdfstate_of_the_union.txt │ └── state_of_the_union.txt ├── models - │ ├── ggjt-v1-vic7b-uncensored-q4_0.bin + │ ├── ggml-vic7b-q5_1.bin │ └── ggml-model-q4_0.bin └── .env, convert.py, Dockerfile ``` diff --git a/example.env b/example.env index ff91e9a..a954796 100644 --- a/example.env +++ b/example.env @@ -1,6 +1,7 @@ # Generic MODEL_N_CTX=1024 -LLAMA_EMBEDDINGS_MODEL=models/ggml-model-q4_0.bin +TEXT_EMBEDDINGS_MODEL=all-MiniLM-L6-v2 +TEXT_EMBEDDINGS_MODEL_TYPE=HF # LlamaCpp or HF USE_MLOCK=true # Ingestion @@ -11,6 +12,7 @@ INGEST_CHUNK_OVERLAP=50 # Generation MODEL_TYPE=LlamaCpp # GPT4All or LlamaCpp -MODEL_PATH=models/ggjt-v1-vic7b-uncensored-q4_0.bin +MODEL_PATH=models/ggml-vic7b-q5_1.bin MODEL_TEMP=0.8 -MODEL_STOP=###,\n +MODEL_STOP=[STOP] +CHAIN_TYPE=stuff diff --git a/ingest.py b/ingest.py index 743bfa7..b9ef68f 100644 --- a/ingest.py +++ b/ingest.py @@ -4,6 +4,7 @@ import sys from hashlib import md5 from pathlib import Path +from typing import Callable from langchain.docstore.document import Document from langchain.document_loaders import ( @@ -15,11 +16,10 @@ UnstructuredHTMLLoader, UnstructuredPowerPointLoader, ) -from langchain.embeddings import LlamaCppEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from qdrant_client import QdrantClient, models -from load_env import chunk_overlap, chunk_size, documents_directory, llama_embeddings_model, model_n_ctx, persist_directory, use_mlock +from load_env import chunk_overlap, chunk_size, documents_directory, get_embedding_model, persist_directory file_loaders = { # extension -> loader "txt": lambda path: TextLoader(path, encoding="utf8"), @@ -41,13 +41,13 @@ def load_one_doc(filepath: Path) -> list[Document]: return file_loaders[filepath.suffix[1:]](str(filepath)).load() -def embed_documents_with_progress(embedding_model: LlamaCppEmbeddings, texts: list[str]) -> list[list[float]]: +def embed_documents_with_progress(embedding_function: Callable, texts: list[str]) -> list[list[float]]: """wrapper around embed_documents that prints progress""" embeddings = [] N_chunks = len(texts) for i, text in enumerate(texts): - print(f"embedding chunk {i+1}/{N_chunks}") - embeddings.append(embedding_model.client.embed(text)) + print(f"embedding chunk {i + 1}/{N_chunks}") + embeddings.append(embedding_function(text)) return [list(map(float, e)) for e in embeddings] @@ -76,12 +76,12 @@ def main(sources_directory: str, cleandb: str) -> None: # Generate embeddings print("Generating embeddings...") - embedding_model = LlamaCppEmbeddings(model_path=llama_embeddings_model, n_ctx=model_n_ctx, use_mlock=use_mlock) - embeddings = embed_documents_with_progress(embedding_model, texts) + embedding_model, encode_fun = get_embedding_model() + embeddings = embed_documents_with_progress(encode_fun, texts) # Store embeddings print("Storing embeddings...") - client = QdrantClient(path=db_dir) # using Qdrant.from_documents recreates the db each time + client = QdrantClient(path=db_dir, prefer_grpc=True) # using Qdrant.from_documents recreates the db each time try: collection = client.get_collection("test") except ValueError: # doesn't exist diff --git a/load_env.py b/load_env.py index 7b57f9d..0d29c73 100644 --- a/load_env.py +++ b/load_env.py @@ -1,12 +1,16 @@ """load env variables""" import os +from typing import Callable from dotenv import load_dotenv +from langchain.embeddings import HuggingFaceEmbeddings, LlamaCppEmbeddings +from langchain.prompts import PromptTemplate load_dotenv() # generic -llama_embeddings_model = os.environ.get("LLAMA_EMBEDDINGS_MODEL") +text_embeddings_model = os.environ.get("TEXT_EMBEDDINGS_MODEL") +text_embeddings_model_type = os.environ.get("TEXT_EMBEDDINGS_MODEL_TYPE") model_n_ctx = int(os.environ.get("MODEL_N_CTX")) use_mlock = os.environ.get("USE_MLOCK").lower() == "true" @@ -19,5 +23,63 @@ # generate model_type = os.environ.get("MODEL_TYPE") model_path = os.environ.get("MODEL_PATH") -model_temp = float(os.environ.get("MODEL_TEMP")) -model_stop = os.environ.get("MODEL_STOP").split(",") +model_temp = float(os.environ.get("MODEL_TEMP", "0.8")) +model_stop = os.environ.get("MODEL_STOP", "") +model_stop = model_stop.split(",") if model_stop else [] +chain_type = os.environ.get("CHAIN_TYPE", "refine") +n_gpu_layers = int(os.environ.get("N_GPU_LAYERS", 0)) + + +def get_embedding_model() -> tuple[HuggingFaceEmbeddings, Callable] | tuple[LlamaCppEmbeddings, Callable]: + """get the text embedding model + :returns: tuple[the model, its encoding function]""" + match text_embeddings_model_type: + case "HF": + model = HuggingFaceEmbeddings(model_name=text_embeddings_model) + return model, model.client.encode + case "LlamaCpp": + model = LlamaCppEmbeddings(model_path=text_embeddings_model, n_ctx=model_n_ctx) + return model, model.client.embed + case _: + raise ValueError(f"Unknown embedding type {text_embeddings_model_type}") + + +def get_prompt_template_kwargs() -> dict[str, PromptTemplate]: + """get an improved prompt template""" + match chain_type: + case "stuff": + question_prompt = """HUMAN: Answer the question using ONLY the given context. If you are unsure of the answer, respond with "Unknown[STOP]". Conclude your response with "[STOP]" to indicate the completion of the answer. + +Context: {context} + +Question: {question} + +ASSISTANT:""" + return {"prompt": PromptTemplate(template=question_prompt, input_variables=["context", "question"])} + case "refine": + question_prompt = """HUMAN: Answer the question using ONLY the given context. +Indicate the end of your answer with "[STOP]" and refrain from adding any additional information beyond that which is provided in the context. + +Question: {question} + +Context: {context_str} + +ASSISTANT:""" + refine_prompt = """HUMAN: Refine the original answer to the question using the new context. +Use ONLY the information from the context and your previous answer. +If the context is not helpful, use the original answer. +Indicate the end of your answer with "[STOP]" and avoid adding any extraneous information. + +Original question: {question} + +Existing answer: {existing_answer} + +New context: {context_str} + +ASSISTANT:""" + return { + "question_prompt": PromptTemplate(template=question_prompt, input_variables=["context_str", "question"]), + "refine_prompt": PromptTemplate(template=refine_prompt, input_variables=["context_str", "existing_answer", "question"]), + } + case _: + return {} diff --git a/poetry.lock b/poetry.lock index 44ef295..ec101a8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4,7 +4,7 @@ name = "aiohttp" version = "3.8.4" description = "Async http client/server framework (asyncio)" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -113,7 +113,7 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -128,7 +128,7 @@ frozenlist = ">=1.1.0" name = "altair" version = "4.2.2" description = "Altair: A declarative statistical visualization library for Python." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -151,7 +151,7 @@ dev = ["black", "docutils", "flake8", "ipython", "m2r", "mistune (<2.0.0)", "pyt name = "anyio" version = "3.6.2" description = "High level compatibility layer for multiple asynchronous event loop implementations" -category = "main" +category = "dev" optional = false python-versions = ">=3.6.2" files = [ @@ -172,7 +172,7 @@ trio = ["trio (>=0.16,<0.22)"] name = "argilla" version = "1.7.0" description = "Open-source tool for exploring, labeling, and monitoring data for NLP projects." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -203,7 +203,7 @@ server = ["PyYAML (>=5.4.1,<6.1.0)", "SQLAlchemy (>=2.0.0,<2.1.0)", "aiofiles (> name = "async-timeout" version = "4.0.2" description = "Timeout context manager for asyncio programs" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -215,7 +215,7 @@ files = [ name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -234,7 +234,7 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "backoff" version = "2.2.1" description = "Function decoration for backoff and retry" -category = "main" +category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -246,7 +246,7 @@ files = [ name = "beautifulsoup4" version = "4.12.2" description = "Screen-scraping library" -category = "main" +category = "dev" optional = false python-versions = ">=3.6.0" files = [ @@ -315,7 +315,7 @@ uvloop = ["uvloop (>=0.15.2)"] name = "blinker" version = "1.6.2" description = "Fast, simple object-to-object and broadcast signaling" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -327,7 +327,7 @@ files = [ name = "cachetools" version = "5.3.0" description = "Extensible memoizing collections and decorators" -category = "main" +category = "dev" optional = false python-versions = "~=3.7" files = [ @@ -339,7 +339,7 @@ files = [ name = "certifi" version = "2023.5.7" description = "Python package for providing Mozilla's CA Bundle." -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -525,7 +525,7 @@ files = [ name = "click" version = "8.1.3" description = "Composable command line interface toolkit" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -540,7 +540,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -552,7 +552,7 @@ files = [ name = "commonmark" version = "0.9.1" description = "Python parser for the CommonMark Markdown spec" -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -567,7 +567,7 @@ test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"] name = "contourpy" version = "1.0.7" description = "Python library for calculating contours of 2D quadrilateral grids" -category = "main" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -684,7 +684,7 @@ tox = ["tox"] name = "cycler" version = "0.11.0" description = "Composable style cycles" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -696,7 +696,7 @@ files = [ name = "dataclasses-json" version = "0.5.7" description = "Easily serialize dataclasses to and from JSON" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -716,7 +716,7 @@ dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest ( name = "decorator" version = "5.1.1" description = "Decorators for Humans" -category = "main" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -728,7 +728,7 @@ files = [ name = "deprecated" version = "1.2.13" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." -category = "main" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -758,7 +758,7 @@ files = [ name = "docx2txt" version = "0.8" description = "A pure python-based utility to extract text and images from docx files." -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -769,7 +769,7 @@ files = [ name = "entrypoints" version = "0.4" description = "Discover and load entry points from installed packages." -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -781,7 +781,7 @@ files = [ name = "et-xmlfile" version = "1.1.0" description = "An implementation of lxml.xmlfile for the standard library" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -793,7 +793,7 @@ files = [ name = "faker" version = "18.7.0" description = "Faker is a Python package that generates fake data for you." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -808,7 +808,7 @@ python-dateutil = ">=2.4" name = "favicon" version = "0.7.0" description = "Get a website's favicon." -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -840,7 +840,7 @@ testing = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "diff-cover (>=7.5)", "p name = "fonttools" version = "4.39.4" description = "Tools to manipulate font files" -category = "main" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -866,7 +866,7 @@ woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] name = "frozenlist" version = "1.3.3" description = "A list-like structure which implements collections.abc.MutableSequence" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -946,11 +946,77 @@ files = [ {file = "frozenlist-1.3.3.tar.gz", hash = "sha256:58bcc55721e8a90b88332d6cd441261ebb22342e238296bb330968952fbb3a6a"}, ] +[[package]] +name = "fsspec" +version = "2023.5.0" +description = "File-system specification" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fsspec-2023.5.0-py3-none-any.whl", hash = "sha256:51a4ad01a5bb66fcc58036e288c0d53d3975a0df2a5dc59a93b59bade0391f2a"}, + {file = "fsspec-2023.5.0.tar.gz", hash = "sha256:b3b56e00fb93ea321bc9e5d9cf6f8522a0198b20eb24e02774d329e9c6fb84ce"}, +] + +[package.extras] +abfs = ["adlfs"] +adl = ["adlfs"] +arrow = ["pyarrow (>=1)"] +dask = ["dask", "distributed"] +devel = ["pytest", "pytest-cov"] +dropbox = ["dropbox", "dropboxdrivefs", "requests"] +full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] +fuse = ["fusepy"] +gcs = ["gcsfs"] +git = ["pygit2"] +github = ["requests"] +gs = ["gcsfs"] +gui = ["panel"] +hdfs = ["pyarrow (>=1)"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"] +libarchive = ["libarchive-c"] +oci = ["ocifs"] +s3 = ["s3fs"] +sftp = ["paramiko"] +smb = ["smbprotocol"] +ssh = ["paramiko"] +tqdm = ["tqdm"] + +[[package]] +name = "gitdb" +version = "4.0.10" +description = "Git Object Database" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"}, + {file = "gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"}, +] + +[package.dependencies] +smmap = ">=3.0.1,<6" + +[[package]] +name = "gitpython" +version = "3.1.31" +description = "GitPython is a Python library used to interact with Git repositories" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "GitPython-3.1.31-py3-none-any.whl", hash = "sha256:f04893614f6aa713a60cbbe1e6a97403ef633103cdd0ef5eb6efe0deb98dbe8d"}, + {file = "GitPython-3.1.31.tar.gz", hash = "sha256:8ce3bcf69adfdf7c7d503e78fd3b1c492af782d58893b650adb2ac8912ddd573"}, +] + +[package.dependencies] +gitdb = ">=4.0.1,<5" + [[package]] name = "greenlet" version = "2.0.2" description = "Lightweight in-process concurrent programming" -category = "main" +category = "dev" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" files = [ @@ -1024,7 +1090,7 @@ test = ["objgraph", "psutil"] name = "grpcio" version = "1.54.2" description = "HTTP/2-based RPC framework" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1082,7 +1148,7 @@ protobuf = ["grpcio-tools (>=1.54.2)"] name = "grpcio-tools" version = "1.54.2" description = "Protobuf code generator for gRPC" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1142,7 +1208,7 @@ setuptools = "*" name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1154,7 +1220,7 @@ files = [ name = "h2" version = "4.1.0" description = "HTTP/2 State-Machine based protocol implementation" -category = "main" +category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -1170,7 +1236,7 @@ hyperframe = ">=6.0,<7" name = "hpack" version = "4.0.0" description = "Pure-Python HPACK header compression" -category = "main" +category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -1182,7 +1248,7 @@ files = [ name = "htbuilder" version = "0.6.1" description = "A purely-functional HTML builder for Python. Think JSX rather than templates." -category = "main" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -1196,7 +1262,7 @@ more-itertools = "*" name = "httpcore" version = "0.16.3" description = "A minimal low-level HTTP client." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1218,7 +1284,7 @@ socks = ["socksio (>=1.0.0,<2.0.0)"] name = "httpx" version = "0.23.3" description = "The next generation HTTP client." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1239,11 +1305,43 @@ cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<13)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (>=1.0.0,<2.0.0)"] +[[package]] +name = "huggingface-hub" +version = "0.14.1" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +category = "dev" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "huggingface_hub-0.14.1-py3-none-any.whl", hash = "sha256:9fc619170d800ff3793ad37c9757c255c8783051e1b5b00501205eb43ccc4f27"}, + {file = "huggingface_hub-0.14.1.tar.gz", hash = "sha256:9ab899af8e10922eac65e290d60ab956882ab0bf643e3d990b1394b6b47b7fbc"}, +] + +[package.dependencies] +filelock = "*" +fsspec = "*" +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = ">=4.42.1" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "gradio", "jedi", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "soundfile"] +torch = ["torch"] +typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] + [[package]] name = "hyperframe" version = "6.0.1" description = "HTTP/2 framing layer for Python" -category = "main" +category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -1270,7 +1368,7 @@ license = ["ukkonen"] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -1282,7 +1380,7 @@ files = [ name = "importlib-metadata" version = "6.6.0" description = "Read metadata from Python packages" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1302,7 +1400,7 @@ testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packag name = "importlib-resources" version = "5.12.0" description = "Read resources from Python packages" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1339,7 +1437,7 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1357,7 +1455,7 @@ i18n = ["Babel (>=2.7)"] name = "joblib" version = "1.2.0" description = "Lightweight pipelining with Python functions" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1369,7 +1467,7 @@ files = [ name = "jsonschema" version = "4.17.3" description = "An implementation of JSON Schema validation for Python" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1389,7 +1487,7 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "kiwisolver" version = "1.4.4" description = "A fast implementation of the Cassowary constraint solver" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1467,7 +1565,7 @@ files = [ name = "langchain" version = "0.0.168" description = "Building applications with LLMs through composability" -category = "main" +category = "dev" optional = false python-versions = ">=3.8.1,<4.0" files = [ @@ -1550,13 +1648,13 @@ dev = ["Sphinx (>=5.1.1)", "black (==22.10.0)", "coverage (>=4.5.4)", "fixit (== [[package]] name = "llama-cpp-python" -version = "0.1.49" +version = "0.1.50" description = "A Python wrapper for llama.cpp" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "llama_cpp_python-0.1.49.tar.gz", hash = "sha256:f22d3f39a8d19e187a86114c81296c3d2de4f31cbe1c7bd887d543f025825820"}, + {file = "llama_cpp_python-0.1.50.tar.gz", hash = "sha256:e305ae1b9f135f94afd8dd227701e6a1cd36db9c28f736b830ec364127c00bb9"}, ] [package.dependencies] @@ -1566,7 +1664,7 @@ typing-extensions = ">=4.5.0,<5.0.0" name = "lxml" version = "4.9.2" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." -category = "main" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" files = [ @@ -1659,7 +1757,7 @@ source = ["Cython (>=0.29.7)"] name = "markdown" version = "3.4.3" description = "Python implementation of John Gruber's Markdown." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1677,7 +1775,7 @@ testing = ["coverage", "pyyaml"] name = "markdownlit" version = "0.0.7" description = "markdownlit adds a couple of lit Markdown capabilities to your Streamlit apps" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1698,7 +1796,7 @@ streamlit-extras = "*" name = "markupsafe" version = "2.1.2" description = "Safely add untrusted strings to HTML/XML markup." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1758,7 +1856,7 @@ files = [ name = "marshmallow" version = "3.19.0" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1779,7 +1877,7 @@ tests = ["pytest", "pytz", "simplejson"] name = "marshmallow-enum" version = "1.5.1" description = "Enum field for Marshmallow" -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -1794,7 +1892,7 @@ marshmallow = ">=2.0.0" name = "matplotlib" version = "3.7.1" description = "Python plotting package" -category = "main" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1857,7 +1955,7 @@ python-dateutil = ">=2.7" name = "monotonic" version = "1.6" description = "An implementation of time.monotonic() for Python 2 & < 3.3" -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -1869,7 +1967,7 @@ files = [ name = "more-itertools" version = "9.1.0" description = "More routines for operating on iterables, beyond itertools" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1877,11 +1975,29 @@ files = [ {file = "more_itertools-9.1.0-py3-none-any.whl", hash = "sha256:d2bc7f02446e86a68911e58ded76d6561eea00cddfb2a91e7019bbb586c799f3"}, ] +[[package]] +name = "mpmath" +version = "1.3.0" +description = "Python library for arbitrary-precision floating-point arithmetic" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] + +[package.extras] +develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] +docs = ["sphinx"] +gmpy = ["gmpy2 (>=2.1.0a4)"] +tests = ["pytest (>=4.6)"] + [[package]] name = "msg-parser" version = "1.2.0" description = "This module enables reading, parsing and converting Microsoft Outlook MSG E-Mail files." -category = "main" +category = "dev" optional = false python-versions = ">=3.4" files = [ @@ -1899,7 +2015,7 @@ rtf = ["compressed-rtf (>=1.0.5)"] name = "multidict" version = "6.0.4" description = "multidict implementation" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1983,7 +2099,7 @@ files = [ name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." -category = "main" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -1991,11 +2107,30 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "networkx" +version = "3.1" +description = "Python package for creating and manipulating graphs and networks" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"}, + {file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"}, +] + +[package.extras] +default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"] +developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"] +doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"] +extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"] +test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] + [[package]] name = "nltk" version = "3.8.1" description = "Natural Language Toolkit" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2036,7 +2171,7 @@ setuptools = "*" name = "numexpr" version = "2.8.4" description = "Fast numerical expression evaluator for NumPy" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2079,7 +2214,7 @@ numpy = ">=1.13.3" name = "numpy" version = "1.23.5" description = "NumPy is the fundamental package for array computing with Python." -category = "main" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2117,7 +2252,7 @@ files = [ name = "olefile" version = "0.46" description = "Python package to parse, read and write Microsoft OLE2 files (Structured Storage or Compound Document, Microsoft Office)" -category = "main" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2128,7 +2263,7 @@ files = [ name = "openapi-schema-pydantic" version = "1.2.4" description = "OpenAPI (v3) specification schema as pydantic class" -category = "main" +category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -2143,7 +2278,7 @@ pydantic = ">=1.8.2" name = "openpyxl" version = "3.1.2" description = "A Python library to read/write Excel 2010 xlsx/xlsm files" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2158,7 +2293,7 @@ et-xmlfile = "*" name = "packaging" version = "23.1" description = "Core utilities for Python packages" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2170,7 +2305,7 @@ files = [ name = "pandas" version = "1.5.3" description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2264,7 +2399,7 @@ image = ["Pillow"] name = "pillow" version = "9.5.0" description = "Python Imaging Library (Fork)" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2360,7 +2495,7 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest- name = "portalocker" version = "2.7.0" description = "Wraps the portalocker recipe for easy usage" -category = "main" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -2399,7 +2534,7 @@ virtualenv = ">=20.10.0" name = "protobuf" version = "4.23.0" description = "" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2422,7 +2557,7 @@ files = [ name = "pyarrow" version = "12.0.0" description = "Python library for Apache Arrow" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2491,7 +2626,7 @@ files = [ name = "pydantic" version = "1.10.7" description = "Data validation and settings management using python type hints" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2540,11 +2675,31 @@ typing-extensions = ">=4.2.0" dotenv = ["python-dotenv (>=0.10.4)"] email = ["email-validator (>=1.0.3)"] +[[package]] +name = "pydeck" +version = "0.8.0" +description = "Widget for deck.gl maps" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydeck-0.8.0-py2.py3-none-any.whl", hash = "sha256:a8fa7757c6f24bba033af39db3147cb020eef44012ba7e60d954de187f9ed4d5"}, + {file = "pydeck-0.8.0.tar.gz", hash = "sha256:07edde833f7cfcef6749124351195aa7dcd24663d4909fd7898dbd0b6fbc01ec"}, +] + +[package.dependencies] +jinja2 = ">=2.10.1" +numpy = ">=1.16.4" + +[package.extras] +carto = ["pydeck-carto"] +jupyter = ["ipykernel (>=5.1.2)", "ipython (>=5.8.0)", "ipywidgets (>=7,<8)", "traitlets (>=4.3.2)"] + [[package]] name = "pygments" version = "2.15.1" description = "Pygments is a syntax highlighting package written in Python." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2559,7 +2714,7 @@ plugins = ["importlib-metadata"] name = "pygpt4all" version = "1.1.0" description = "Official Python CPU inference for GPT4All language models based on llama.cpp and ggml" -category = "main" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2574,7 +2729,7 @@ pyllamacpp = "*" name = "pygptj" version = "2.0.3" description = "Python bindings for the GGML GPT-J Laguage model" -category = "main" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2628,7 +2783,7 @@ numpy = "*" name = "pyllamacpp" version = "2.1.3" description = "Python bindings for llama.cpp" -category = "main" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2679,7 +2834,7 @@ files = [ name = "pymdown-extensions" version = "9.11" description = "Extension pack for Python Markdown." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2695,7 +2850,7 @@ pyyaml = "*" name = "pympler" version = "1.0.1" description = "A development tool to measure, monitor and analyze the memory behavior of Python objects." -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2707,7 +2862,7 @@ files = [ name = "pypandoc" version = "1.11" description = "Thin wrapper for pandoc." -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2719,7 +2874,7 @@ files = [ name = "pyparsing" version = "3.0.9" description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "main" +category = "dev" optional = false python-versions = ">=3.6.8" files = [ @@ -2734,7 +2889,7 @@ diagrams = ["jinja2", "railroad-diagrams"] name = "pyrsistent" version = "0.19.3" description = "Persistent/Functional/Immutable data structures" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2771,7 +2926,7 @@ files = [ name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "main" +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -2786,7 +2941,7 @@ six = ">=1.5" name = "python-docx" version = "0.8.11" description = "Create and update Microsoft Word .docx files." -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -2815,7 +2970,7 @@ cli = ["click (>=5.0)"] name = "python-magic" version = "0.4.27" description = "File type identification using libmagic" -category = "main" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -2827,7 +2982,7 @@ files = [ name = "python-pptx" version = "0.6.21" description = "Generate and manipulate Open XML PowerPoint (.pptx) files" -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -2843,7 +2998,7 @@ XlsxWriter = ">=0.5.7" name = "pytz" version = "2023.3" description = "World timezone definitions, modern and historical" -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -2855,7 +3010,7 @@ files = [ name = "pywin32" version = "306" description = "Python for Window Extensions" -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -2879,7 +3034,7 @@ files = [ name = "pyyaml" version = "6.0" description = "YAML parser and emitter for Python" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -2929,7 +3084,7 @@ files = [ name = "qdrant-client" version = "1.1.7" description = "Client library for the Qdrant vector search engine" -category = "main" +category = "dev" optional = false python-versions = ">=3.7,<3.12" files = [ @@ -2951,7 +3106,7 @@ urllib3 = ">=1.26.14,<2.0.0" name = "regex" version = "2023.5.5" description = "Alternative regular expression module, to replace re." -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3049,7 +3204,7 @@ files = [ name = "requests" version = "2.30.0" description = "Python HTTP for Humans." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3071,7 +3226,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "rfc3986" version = "1.5.0" description = "Validating URI References per RFC 3986" -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -3089,7 +3244,7 @@ idna2008 = ["idna"] name = "rich" version = "13.0.1" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -category = "main" +category = "dev" optional = false python-versions = ">=3.7.0" files = [ @@ -3104,6 +3259,111 @@ pygments = ">=2.6.0,<3.0.0" [package.extras] jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"] +[[package]] +name = "scikit-learn" +version = "1.2.2" +description = "A set of python modules for machine learning and data mining" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "scikit-learn-1.2.2.tar.gz", hash = "sha256:8429aea30ec24e7a8c7ed8a3fa6213adf3814a6efbea09e16e0a0c71e1a1a3d7"}, + {file = "scikit_learn-1.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99cc01184e347de485bf253d19fcb3b1a3fb0ee4cea5ee3c43ec0cc429b6d29f"}, + {file = "scikit_learn-1.2.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e6e574db9914afcb4e11ade84fab084536a895ca60aadea3041e85b8ac963edb"}, + {file = "scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fe83b676f407f00afa388dd1fdd49e5c6612e551ed84f3b1b182858f09e987d"}, + {file = "scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e2642baa0ad1e8f8188917423dd73994bf25429f8893ddbe115be3ca3183584"}, + {file = "scikit_learn-1.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ad66c3848c0a1ec13464b2a95d0a484fd5b02ce74268eaa7e0c697b904f31d6c"}, + {file = "scikit_learn-1.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dfeaf8be72117eb61a164ea6fc8afb6dfe08c6f90365bde2dc16456e4bc8e45f"}, + {file = "scikit_learn-1.2.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:fe0aa1a7029ed3e1dcbf4a5bc675aa3b1bc468d9012ecf6c6f081251ca47f590"}, + {file = "scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:065e9673e24e0dc5113e2dd2b4ca30c9d8aa2fa90f4c0597241c93b63130d233"}, + {file = "scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf036ea7ef66115e0d49655f16febfa547886deba20149555a41d28f56fd6d3c"}, + {file = "scikit_learn-1.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:8b0670d4224a3c2d596fd572fb4fa673b2a0ccfb07152688ebd2ea0b8c61025c"}, + {file = "scikit_learn-1.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9c710ff9f9936ba8a3b74a455ccf0dcf59b230caa1e9ba0223773c490cab1e51"}, + {file = "scikit_learn-1.2.2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:2dd3ffd3950e3d6c0c0ef9033a9b9b32d910c61bd06cb8206303fb4514b88a49"}, + {file = "scikit_learn-1.2.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44b47a305190c28dd8dd73fc9445f802b6ea716669cfc22ab1eb97b335d238b1"}, + {file = "scikit_learn-1.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:953236889928d104c2ef14027539f5f2609a47ebf716b8cbe4437e85dce42744"}, + {file = "scikit_learn-1.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:7f69313884e8eb311460cc2f28676d5e400bd929841a2c8eb8742ae78ebf7c20"}, + {file = "scikit_learn-1.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8156db41e1c39c69aa2d8599ab7577af53e9e5e7a57b0504e116cc73c39138dd"}, + {file = "scikit_learn-1.2.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fe175ee1dab589d2e1033657c5b6bec92a8a3b69103e3dd361b58014729975c3"}, + {file = "scikit_learn-1.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d5312d9674bed14f73773d2acf15a3272639b981e60b72c9b190a0cffed5bad"}, + {file = "scikit_learn-1.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea061bf0283bf9a9f36ea3c5d3231ba2176221bbd430abd2603b1c3b2ed85c89"}, + {file = "scikit_learn-1.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:6477eed40dbce190f9f9e9d0d37e020815825b300121307942ec2110302b66a3"}, +] + +[package.dependencies] +joblib = ">=1.1.1" +numpy = ">=1.17.3" +scipy = ">=1.3.2" +threadpoolctl = ">=2.0.0" + +[package.extras] +benchmark = ["matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "pandas (>=1.0.5)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "plotly (>=5.10.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)", "sphinx (>=4.0.1)", "sphinx-gallery (>=0.7.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.1.3)", "pandas (>=1.0.5)", "plotly (>=5.10.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)"] +tests = ["black (>=22.3.0)", "flake8 (>=3.8.2)", "matplotlib (>=3.1.3)", "mypy (>=0.961)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pytest (>=5.3.1)", "pytest-cov (>=2.9.0)", "scikit-image (>=0.16.2)"] + +[[package]] +name = "scipy" +version = "1.10.1" +description = "Fundamental algorithms for scientific computing in Python" +category = "dev" +optional = false +python-versions = "<3.12,>=3.8" +files = [ + {file = "scipy-1.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e7354fd7527a4b0377ce55f286805b34e8c54b91be865bac273f527e1b839019"}, + {file = "scipy-1.10.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4b3f429188c66603a1a5c549fb414e4d3bdc2a24792e061ffbd607d3d75fd84e"}, + {file = "scipy-1.10.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1553b5dcddd64ba9a0d95355e63fe6c3fc303a8fd77c7bc91e77d61363f7433f"}, + {file = "scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c0ff64b06b10e35215abce517252b375e580a6125fd5fdf6421b98efbefb2d2"}, + {file = "scipy-1.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:fae8a7b898c42dffe3f7361c40d5952b6bf32d10c4569098d276b4c547905ee1"}, + {file = "scipy-1.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f1564ea217e82c1bbe75ddf7285ba0709ecd503f048cb1236ae9995f64217bd"}, + {file = "scipy-1.10.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d925fa1c81b772882aa55bcc10bf88324dadb66ff85d548c71515f6689c6dac5"}, + {file = "scipy-1.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaea0a6be54462ec027de54fca511540980d1e9eea68b2d5c1dbfe084797be35"}, + {file = "scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15a35c4242ec5f292c3dd364a7c71a61be87a3d4ddcc693372813c0b73c9af1d"}, + {file = "scipy-1.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:43b8e0bcb877faf0abfb613d51026cd5cc78918e9530e375727bf0625c82788f"}, + {file = "scipy-1.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5678f88c68ea866ed9ebe3a989091088553ba12c6090244fdae3e467b1139c35"}, + {file = "scipy-1.10.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:39becb03541f9e58243f4197584286e339029e8908c46f7221abeea4b749fa88"}, + {file = "scipy-1.10.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bce5869c8d68cf383ce240e44c1d9ae7c06078a9396df68ce88a1230f93a30c1"}, + {file = "scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07c3457ce0b3ad5124f98a86533106b643dd811dd61b548e78cf4c8786652f6f"}, + {file = "scipy-1.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:049a8bbf0ad95277ffba9b3b7d23e5369cc39e66406d60422c8cfef40ccc8415"}, + {file = "scipy-1.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cd9f1027ff30d90618914a64ca9b1a77a431159df0e2a195d8a9e8a04c78abf9"}, + {file = "scipy-1.10.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:79c8e5a6c6ffaf3a2262ef1be1e108a035cf4f05c14df56057b64acc5bebffb6"}, + {file = "scipy-1.10.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51af417a000d2dbe1ec6c372dfe688e041a7084da4fdd350aeb139bd3fb55353"}, + {file = "scipy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b4735d6c28aad3cdcf52117e0e91d6b39acd4272f3f5cd9907c24ee931ad601"}, + {file = "scipy-1.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:7ff7f37b1bf4417baca958d254e8e2875d0cc23aaadbe65b3d5b3077b0eb23ea"}, + {file = "scipy-1.10.1.tar.gz", hash = "sha256:2cf9dfb80a7b4589ba4c40ce7588986d6d5cebc5457cad2c2880f6bc2d42f3a5"}, +] + +[package.dependencies] +numpy = ">=1.19.5,<1.27.0" + +[package.extras] +dev = ["click", "doit (>=0.36.0)", "flake8", "mypy", "pycodestyle", "pydevtool", "rich-click", "typing_extensions"] +doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"] +test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "sentence-transformers" +version = "2.2.2" +description = "Multilingual text embeddings" +category = "dev" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "sentence-transformers-2.2.2.tar.gz", hash = "sha256:dbc60163b27de21076c9a30d24b5b7b6fa05141d68cf2553fa9a77bf79a29136"}, +] + +[package.dependencies] +huggingface-hub = ">=0.4.0" +nltk = "*" +numpy = "*" +scikit-learn = "*" +scipy = "*" +sentencepiece = "*" +torch = ">=1.6.0" +torchvision = "*" +tqdm = "*" +transformers = ">=4.6.0,<5.0.0" + [[package]] name = "sentencepiece" version = "0.1.99" @@ -3163,7 +3423,7 @@ files = [ name = "setuptools" version = "67.7.2" description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3180,7 +3440,7 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3188,11 +3448,23 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "smmap" +version = "5.0.0" +description = "A pure Python implementation of a sliding window memory map manager" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"}, + {file = "smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"}, +] + [[package]] name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3204,7 +3476,7 @@ files = [ name = "soupsieve" version = "2.4.1" description = "A modern CSS selector implementation for Beautiful Soup." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3216,7 +3488,7 @@ files = [ name = "sqlalchemy" version = "2.0.13" description = "Database Abstraction Library" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3294,7 +3566,7 @@ sqlcipher = ["sqlcipher3-binary"] name = "st-annotated-text" version = "4.0.0" description = "A simple component to display annotated text in Streamlit apps." -category = "main" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -3307,8 +3579,8 @@ htbuilder = "*" [[package]] name = "streamlit" version = "1.22.0" -description = "" -category = "main" +description = "A faster way to build and share data apps" +category = "dev" optional = false python-versions = ">=3.7, !=3.9.7" files = [] @@ -3319,6 +3591,7 @@ altair = ">=3.2.0,<5" blinker = ">=1.0.0" cachetools = ">=4.0" click = ">=7.0" +gitpython = "!=3.1.19" importlib-metadata = ">=1.4" numpy = "*" packaging = ">=14.1" @@ -3326,17 +3599,22 @@ pandas = ">=0.25,<3" pillow = ">=6.2.0" protobuf = ">=3.20,<5" pyarrow = ">=4.0" +pydeck = ">=0.1.dev5" pympler = ">=0.9" python-dateutil = "*" requests = ">=2.4" rich = ">=10.11.0" tenacity = ">=8.0.0,<9" toml = "*" +tornado = ">=6.0.3" typing-extensions = ">=3.10.0.0" tzlocal = ">=1.1" validators = ">=0.2" watchdog = {version = "*", markers = "platform_system != \"Darwin\""} +[package.extras] +snowflake = ["snowflake-snowpark-python"] + [package.source] type = "git" url = "https://github.com/hippalectryon-0/streamlit.git" @@ -3347,7 +3625,7 @@ subdirectory = "lib" name = "streamlit-camera-input-live" version = "0.2.0" description = "Alternative version of st.camera_input which returns the webcam images live, without any button press needed" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3363,7 +3641,7 @@ streamlit = ">=1.2" name = "streamlit-card" version = "0.0.4" description = "A streamlit component, to make UI cards" -category = "main" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3378,7 +3656,7 @@ streamlit = ">=0.63" name = "streamlit-chat" version = "0.0.2.2" description = "A streamlit component, to make chatbots" -category = "main" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3393,7 +3671,7 @@ streamlit = ">=0.63" name = "streamlit-embedcode" version = "0.1.2" description = "Streamlit component for embedded code snippets" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3408,7 +3686,7 @@ streamlit = ">=0.63" name = "streamlit-extras" version = "0.2.7" description = "A library to discover, try, install and share Streamlit extras" -category = "main" +category = "dev" optional = false python-versions = ">=3.8, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*" files = [ @@ -3435,7 +3713,7 @@ streamlit-vertical-slider = ">=1.0.2" name = "streamlit-faker" version = "0.0.2" description = "streamlit-faker is a library to very easily fake Streamlit commands" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3453,7 +3731,7 @@ streamlit-extras = "*" name = "streamlit-image-coordinates" version = "0.1.3" description = "Streamlit component that displays an image and returns the coordinates when you click on it" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3469,7 +3747,7 @@ streamlit = ">=1.2" name = "streamlit-keyup" version = "0.2.0" description = "Text input that renders on keyup" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3485,7 +3763,7 @@ streamlit = ">=1.2" name = "streamlit-toggle-switch" version = "1.0.2" description = "Creates a customizable toggle" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3500,7 +3778,7 @@ streamlit = ">=0.63" name = "streamlit-vertical-slider" version = "1.0.2" description = "Creates a customizable vertical slider" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3511,11 +3789,26 @@ files = [ [package.dependencies] streamlit = ">=0.63" +[[package]] +name = "sympy" +version = "1.12" +description = "Computer algebra system (CAS) in Python" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, + {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, +] + +[package.dependencies] +mpmath = ">=0.19" + [[package]] name = "tenacity" version = "8.2.2" description = "Retry code until it succeeds" -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3526,11 +3819,78 @@ files = [ [package.extras] doc = ["reno", "sphinx", "tornado (>=4.5)"] +[[package]] +name = "threadpoolctl" +version = "3.1.0" +description = "threadpoolctl" +category = "dev" +optional = false +python-versions = ">=3.6" +files = [ + {file = "threadpoolctl-3.1.0-py3-none-any.whl", hash = "sha256:8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b"}, + {file = "threadpoolctl-3.1.0.tar.gz", hash = "sha256:a335baacfaa4400ae1f0d8e3a58d6674d2f8828e3716bb2802c44955ad391380"}, +] + +[[package]] +name = "tokenizers" +version = "0.13.3" +description = "Fast and Customizable Tokenizers" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "tokenizers-0.13.3-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:f3835c5be51de8c0a092058a4d4380cb9244fb34681fd0a295fbf0a52a5fdf33"}, + {file = "tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4ef4c3e821730f2692489e926b184321e887f34fb8a6b80b8096b966ba663d07"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5fd1a6a25353e9aa762e2aae5a1e63883cad9f4e997c447ec39d071020459bc"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee0b1b311d65beab83d7a41c56a1e46ab732a9eed4460648e8eb0bd69fc2d059"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ef4215284df1277dadbcc5e17d4882bda19f770d02348e73523f7e7d8b8d396"}, + {file = "tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4d53976079cff8a033f778fb9adca2d9d69d009c02fa2d71a878b5f3963ed30"}, + {file = "tokenizers-0.13.3-cp310-cp310-win32.whl", hash = "sha256:1f0e3b4c2ea2cd13238ce43548959c118069db7579e5d40ec270ad77da5833ce"}, + {file = "tokenizers-0.13.3-cp310-cp310-win_amd64.whl", hash = "sha256:89649c00d0d7211e8186f7a75dfa1db6996f65edce4b84821817eadcc2d3c79e"}, + {file = "tokenizers-0.13.3-cp311-cp311-macosx_10_11_universal2.whl", hash = "sha256:56b726e0d2bbc9243872b0144515ba684af5b8d8cd112fb83ee1365e26ec74c8"}, + {file = "tokenizers-0.13.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc5c022ce692e1f499d745af293ab9ee6f5d92538ed2faf73f9708c89ee59ce6"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f55c981ac44ba87c93e847c333e58c12abcbb377a0c2f2ef96e1a266e4184ff2"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f247eae99800ef821a91f47c5280e9e9afaeed9980fc444208d5aa6ba69ff148"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b3e3215d048e94f40f1c95802e45dcc37c5b05eb46280fc2ccc8cd351bff839"}, + {file = "tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ba2b0bf01777c9b9bc94b53764d6684554ce98551fec496f71bc5be3a03e98b"}, + {file = "tokenizers-0.13.3-cp311-cp311-win32.whl", hash = "sha256:cc78d77f597d1c458bf0ea7c2a64b6aa06941c7a99cb135b5969b0278824d808"}, + {file = "tokenizers-0.13.3-cp311-cp311-win_amd64.whl", hash = "sha256:ecf182bf59bd541a8876deccf0360f5ae60496fd50b58510048020751cf1724c"}, + {file = "tokenizers-0.13.3-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:0527dc5436a1f6bf2c0327da3145687d3bcfbeab91fed8458920093de3901b44"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07cbb2c307627dc99b44b22ef05ff4473aa7c7cc1fec8f0a8b37d8a64b1a16d2"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4560dbdeaae5b7ee0d4e493027e3de6d53c991b5002d7ff95083c99e11dd5ac0"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64064bd0322405c9374305ab9b4c07152a1474370327499911937fd4a76d004b"}, + {file = "tokenizers-0.13.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8c6e2ab0f2e3d939ca66aa1d596602105fe33b505cd2854a4c1717f704c51de"}, + {file = "tokenizers-0.13.3-cp37-cp37m-win32.whl", hash = "sha256:6cc29d410768f960db8677221e497226e545eaaea01aa3613fa0fdf2cc96cff4"}, + {file = "tokenizers-0.13.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fc2a7fdf864554a0dacf09d32e17c0caa9afe72baf9dd7ddedc61973bae352d8"}, + {file = "tokenizers-0.13.3-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:8791dedba834c1fc55e5f1521be325ea3dafb381964be20684b92fdac95d79b7"}, + {file = "tokenizers-0.13.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:d607a6a13718aeb20507bdf2b96162ead5145bbbfa26788d6b833f98b31b26e1"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3791338f809cd1bf8e4fee6b540b36822434d0c6c6bc47162448deee3f77d425"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2f35f30e39e6aab8716f07790f646bdc6e4a853816cc49a95ef2a9016bf9ce6"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:310204dfed5aa797128b65d63538a9837cbdd15da2a29a77d67eefa489edda26"}, + {file = "tokenizers-0.13.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0f9b92ea052305166559f38498b3b0cae159caea712646648aaa272f7160963"}, + {file = "tokenizers-0.13.3-cp38-cp38-win32.whl", hash = "sha256:9a3fa134896c3c1f0da6e762d15141fbff30d094067c8f1157b9fdca593b5806"}, + {file = "tokenizers-0.13.3-cp38-cp38-win_amd64.whl", hash = "sha256:8e7b0cdeace87fa9e760e6a605e0ae8fc14b7d72e9fc19c578116f7287bb873d"}, + {file = "tokenizers-0.13.3-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:00cee1e0859d55507e693a48fa4aef07060c4bb6bd93d80120e18fea9371c66d"}, + {file = "tokenizers-0.13.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a23ff602d0797cea1d0506ce69b27523b07e70f6dda982ab8cf82402de839088"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70ce07445050b537d2696022dafb115307abdffd2a5c106f029490f84501ef97"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:280ffe95f50eaaf655b3a1dc7ff1d9cf4777029dbbc3e63a74e65a056594abc3"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97acfcec592f7e9de8cadcdcda50a7134423ac8455c0166b28c9ff04d227b371"}, + {file = "tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd7730c98a3010cd4f523465867ff95cd9d6430db46676ce79358f65ae39797b"}, + {file = "tokenizers-0.13.3-cp39-cp39-win32.whl", hash = "sha256:48625a108029cb1ddf42e17a81b5a3230ba6888a70c9dc14e81bc319e812652d"}, + {file = "tokenizers-0.13.3-cp39-cp39-win_amd64.whl", hash = "sha256:bc0a6f1ba036e482db6453571c9e3e60ecd5489980ffd95d11dc9f960483d783"}, + {file = "tokenizers-0.13.3.tar.gz", hash = "sha256:2e546dbb68b623008a5442353137fbb0123d311a6d7ba52f2667c8862a75af2e"}, +] + +[package.extras] +dev = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] +docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] + [[package]] name = "toml" version = "0.10.2" description = "Python Library for Tom's Obvious, Minimal Language" -category = "main" +category = "dev" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3566,7 +3926,7 @@ files = [ name = "toolz" version = "0.12.0" description = "List processing tools and functional utilities" -category = "main" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -3574,11 +3934,111 @@ files = [ {file = "toolz-0.12.0.tar.gz", hash = "sha256:88c570861c440ee3f2f6037c4654613228ff40c93a6c25e0eba70d17282c6194"}, ] +[[package]] +name = "torch" +version = "2.0.1" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +category = "dev" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:8ced00b3ba471856b993822508f77c98f48a458623596a4c43136158781e306a"}, + {file = "torch-2.0.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:359bfaad94d1cda02ab775dc1cc386d585712329bb47b8741607ef6ef4950747"}, + {file = "torch-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:7c84e44d9002182edd859f3400deaa7410f5ec948a519cc7ef512c2f9b34d2c4"}, + {file = "torch-2.0.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:567f84d657edc5582d716900543e6e62353dbe275e61cdc36eda4929e46df9e7"}, + {file = "torch-2.0.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:787b5a78aa7917465e9b96399b883920c88a08f4eb63b5a5d2d1a16e27d2f89b"}, + {file = "torch-2.0.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:e617b1d0abaf6ced02dbb9486803abfef0d581609b09641b34fa315c9c40766d"}, + {file = "torch-2.0.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b6019b1de4978e96daa21d6a3ebb41e88a0b474898fe251fd96189587408873e"}, + {file = "torch-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:dbd68cbd1cd9da32fe5d294dd3411509b3d841baecb780b38b3b7b06c7754434"}, + {file = "torch-2.0.1-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:ef654427d91600129864644e35deea761fb1fe131710180b952a6f2e2207075e"}, + {file = "torch-2.0.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:25aa43ca80dcdf32f13da04c503ec7afdf8e77e3a0183dd85cd3e53b2842e527"}, + {file = "torch-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5ef3ea3d25441d3957348f7e99c7824d33798258a2bf5f0f0277cbcadad2e20d"}, + {file = "torch-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:0882243755ff28895e8e6dc6bc26ebcf5aa0911ed81b2a12f241fc4b09075b13"}, + {file = "torch-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:f66aa6b9580a22b04d0af54fcd042f52406a8479e2b6a550e3d9f95963e168c8"}, + {file = "torch-2.0.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:1adb60d369f2650cac8e9a95b1d5758e25d526a34808f7448d0bd599e4ae9072"}, + {file = "torch-2.0.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:1bcffc16b89e296826b33b98db5166f990e3b72654a2b90673e817b16c50e32b"}, + {file = "torch-2.0.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:e10e1597f2175365285db1b24019eb6f04d53dcd626c735fc502f1e8b6be9875"}, + {file = "torch-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:423e0ae257b756bb45a4b49072046772d1ad0c592265c5080070e0767da4e490"}, + {file = "torch-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8742bdc62946c93f75ff92da00e3803216c6cce9b132fbca69664ca38cfb3e18"}, + {file = "torch-2.0.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:c62df99352bd6ee5a5a8d1832452110435d178b5164de450831a3a8cc14dc680"}, + {file = "torch-2.0.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:671a2565e3f63b8fe8e42ae3e36ad249fe5e567435ea27b94edaa672a7d0c416"}, +] + +[package.dependencies] +filelock = "*" +jinja2 = "*" +networkx = "*" +sympy = "*" +typing-extensions = "*" + +[package.extras] +opt-einsum = ["opt-einsum (>=3.3)"] + +[[package]] +name = "torchvision" +version = "0.15.2" +description = "image and video datasets and models for torch deep learning" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "torchvision-0.15.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7754088774e810c5672b142a45dcf20b1bd986a5a7da90f8660c43dc43fb850c"}, + {file = "torchvision-0.15.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37eb138e13f6212537a3009ac218695483a635c404b6cc1d8e0d0d978026a86d"}, + {file = "torchvision-0.15.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:54143f7cc0797d199b98a53b7d21c3f97615762d4dd17ad45a41c7e80d880e73"}, + {file = "torchvision-0.15.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:1eefebf5fbd01a95fe8f003d623d941601c94b5cec547b420da89cb369d9cf96"}, + {file = "torchvision-0.15.2-cp310-cp310-win_amd64.whl", hash = "sha256:96fae30c5ca8423f4b9790df0f0d929748e32718d88709b7b567d2f630c042e3"}, + {file = "torchvision-0.15.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5f35f6bd5bcc4568e6522e4137fa60fcc72f4fa3e615321c26cd87e855acd398"}, + {file = "torchvision-0.15.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:757505a0ab2be7096cb9d2bf4723202c971cceddb72c7952a7e877f773de0f8a"}, + {file = "torchvision-0.15.2-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:012ad25cfd9019ff9b0714a168727e3845029be1af82296ff1e1482931fa4b80"}, + {file = "torchvision-0.15.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b02a7ffeaa61448737f39a4210b8ee60234bda0515a0c0d8562f884454105b0f"}, + {file = "torchvision-0.15.2-cp311-cp311-win_amd64.whl", hash = "sha256:10be76ceded48329d0a0355ac33da131ee3993ff6c125e4a02ab34b5baa2472c"}, + {file = "torchvision-0.15.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8f12415b686dba884fb086f53ac803f692be5a5cdd8a758f50812b30fffea2e4"}, + {file = "torchvision-0.15.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:31211c01f8b8ec33b8a638327b5463212e79a03e43c895f88049f97af1bd12fd"}, + {file = "torchvision-0.15.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c55f9889e436f14b4f84a9c00ebad0d31f5b4626f10cf8018e6c676f92a6d199"}, + {file = "torchvision-0.15.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:9a192f2aa979438f23c20e883980b23d13268ab9f819498774a6d2eb021802c2"}, + {file = "torchvision-0.15.2-cp38-cp38-win_amd64.whl", hash = "sha256:c07071bc8d02aa8fcdfe139ab6a1ef57d3b64c9e30e84d12d45c9f4d89fb6536"}, + {file = "torchvision-0.15.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4790260fcf478a41c7ecc60a6d5200a88159fdd8d756e9f29f0f8c59c4a67a68"}, + {file = "torchvision-0.15.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:987ab62225b4151a11e53fd06150c5258ced24ac9d7c547e0e4ab6fbca92a5ce"}, + {file = "torchvision-0.15.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:63df26673e66cba3f17e07c327a8cafa3cce98265dbc3da329f1951d45966838"}, + {file = "torchvision-0.15.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b85f98d4cc2f72452f6792ab4463a3541bc5678a8cdd3da0e139ba2fe8b56d42"}, + {file = "torchvision-0.15.2-cp39-cp39-win_amd64.whl", hash = "sha256:07c462524cc1bba5190c16a9d47eac1fca024d60595a310f23c00b4ffff18b30"}, +] + +[package.dependencies] +numpy = "*" +pillow = ">=5.3.0,<8.3.0 || >=8.4.0" +requests = "*" +torch = "2.0.1" + +[package.extras] +scipy = ["scipy"] + +[[package]] +name = "tornado" +version = "6.3.2" +description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." +category = "dev" +optional = false +python-versions = ">= 3.8" +files = [ + {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:c367ab6c0393d71171123ca5515c61ff62fe09024fa6bf299cd1339dc9456829"}, + {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b46a6ab20f5c7c1cb949c72c1994a4585d2eaa0be4853f50a03b5031e964fc7c"}, + {file = "tornado-6.3.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2de14066c4a38b4ecbbcd55c5cc4b5340eb04f1c5e81da7451ef555859c833f"}, + {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05615096845cf50a895026f749195bf0b10b8909f9be672f50b0fe69cba368e4"}, + {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b17b1cf5f8354efa3d37c6e28fdfd9c1c1e5122f2cb56dac121ac61baa47cbe"}, + {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:29e71c847a35f6e10ca3b5c2990a52ce38b233019d8e858b755ea6ce4dcdd19d"}, + {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:834ae7540ad3a83199a8da8f9f2d383e3c3d5130a328889e4cc991acc81e87a0"}, + {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6a0848f1aea0d196a7c4f6772197cbe2abc4266f836b0aac76947872cd29b411"}, + {file = "tornado-6.3.2-cp38-abi3-win32.whl", hash = "sha256:7efcbcc30b7c654eb6a8c9c9da787a851c18f8ccd4a5a3a95b05c7accfa068d2"}, + {file = "tornado-6.3.2-cp38-abi3-win_amd64.whl", hash = "sha256:0c325e66c8123c606eea33084976c832aa4e766b7dff8aedd7587ea44a604cdf"}, + {file = "tornado-6.3.2.tar.gz", hash = "sha256:4b927c4f19b71e627b13f3db2324e4ae660527143f9e1f2e2fb404f3a187e2ba"}, +] + [[package]] name = "tqdm" version = "4.65.0" description = "Fast, Extensible Progress Meter" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3595,11 +4055,80 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] +[[package]] +name = "transformers" +version = "4.29.1" +description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +category = "dev" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "transformers-4.29.1-py3-none-any.whl", hash = "sha256:75f851f2420c26410edbdf4a2a1a5b434ab2b96aea36eb5931d06cc3b2e7b509"}, + {file = "transformers-4.29.1.tar.gz", hash = "sha256:3dc9cd198918e140468edbf37d7edf3b7a75633655ce0771ce323bbf8c118c4d"}, +] + +[package.dependencies] +filelock = "*" +huggingface-hub = ">=0.14.1,<1.0" +numpy = ">=1.17" +packaging = ">=20.0" +pyyaml = ">=5.1" +regex = "!=2019.12.17" +requests = "*" +tokenizers = ">=0.11.1,<0.11.3 || >0.11.3,<0.14" +tqdm = ">=4.27" + +[package.extras] +accelerate = ["accelerate (>=0.19.0)"] +agents = ["Pillow", "accelerate (>=0.19.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.9,!=1.12.0)"] +all = ["Pillow", "accelerate (>=0.19.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.6.9)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "numba (<0.57.0)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf (<=3.20.2)", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision"] +audio = ["kenlm", "librosa", "numba (<0.57.0)", "phonemizer", "pyctcdecode (>=0.4.0)"] +codecarbon = ["codecarbon (==1.2.0)"] +deepspeed = ["accelerate (>=0.19.0)", "deepspeed (>=0.8.3)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.19.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.8.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf (<=3.20.2)", "psutil", "pytest", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"] +dev = ["GitPython (<3.1.19)", "Pillow", "accelerate (>=0.19.0)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.6.9)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "numba (<0.57.0)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf (<=3.20.2)", "psutil", "pyctcdecode (>=0.4.0)", "pytest", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev-tensorflow = ["GitPython (<3.1.19)", "Pillow", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "numba (<0.57.0)", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf (<=3.20.2)", "psutil", "pyctcdecode (>=0.4.0)", "pytest", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "urllib3 (<2.0.0)"] +dev-torch = ["GitPython (<3.1.19)", "Pillow", "accelerate (>=0.19.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "numba (<0.57.0)", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf (<=3.20.2)", "psutil", "pyctcdecode (>=0.4.0)", "pytest", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +docs = ["Pillow", "accelerate (>=0.19.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.6.9)", "hf-doc-builder", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "numba (<0.57.0)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf (<=3.20.2)", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision"] +docs-specific = ["hf-doc-builder"] +fairscale = ["fairscale (>0.3)"] +flax = ["flax (>=0.4.1,<=0.6.9)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "optax (>=0.0.8,<=0.1.4)"] +flax-speech = ["kenlm", "librosa", "numba (<0.57.0)", "phonemizer", "pyctcdecode (>=0.4.0)"] +ftfy = ["ftfy"] +integrations = ["optuna", "ray[tune]", "sigopt"] +ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] +modelcreation = ["cookiecutter (==1.7.3)"] +natten = ["natten (>=0.14.6)"] +onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"] +onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] +optuna = ["optuna"] +quality = ["GitPython (<3.1.19)", "black (>=23.1,<24.0)", "datasets (!=2.5.0)", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "ruff (>=0.0.241,<=0.0.259)", "urllib3 (<2.0.0)"] +ray = ["ray[tune]"] +retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] +sagemaker = ["sagemaker (>=2.31.0)"] +sentencepiece = ["protobuf (<=3.20.2)", "sentencepiece (>=0.1.91,!=0.1.92)"] +serving = ["fastapi", "pydantic", "starlette", "uvicorn"] +sigopt = ["sigopt"] +sklearn = ["scikit-learn"] +speech = ["kenlm", "librosa", "numba (<0.57.0)", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +testing = ["GitPython (<3.1.19)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf (<=3.20.2)", "psutil", "pytest", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "safetensors (>=0.2.1)", "timeout-decorator"] +tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx"] +tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.4,<2.13)", "tensorflow-text (<2.13)", "tf2onnx"] +tf-speech = ["kenlm", "librosa", "numba (<0.57.0)", "phonemizer", "pyctcdecode (>=0.4.0)"] +timm = ["timm"] +tokenizers = ["tokenizers (>=0.11.1,!=0.11.3,<0.14)"] +torch = ["accelerate (>=0.19.0)", "torch (>=1.9,!=1.12.0)"] +torch-speech = ["kenlm", "librosa", "numba (<0.57.0)", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +torch-vision = ["Pillow", "torchvision"] +torchhub = ["filelock", "huggingface-hub (>=0.14.1,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf (<=3.20.2)", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "tqdm (>=4.27)"] +video = ["av (==9.2.0)", "decord (==0.6.0)"] +vision = ["Pillow"] + [[package]] name = "typer" version = "0.7.0" description = "Typer, build great CLIs. Easy to code. Based on Python type hints." -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3620,7 +4149,7 @@ test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6. name = "typing-extensions" version = "4.5.0" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3632,7 +4161,7 @@ files = [ name = "typing-inspect" version = "0.8.0" description = "Runtime inspection utilities for typing module." -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -3648,7 +4177,7 @@ typing-extensions = ">=3.7.4" name = "tzdata" version = "2023.3" description = "Provider of IANA time zone data" -category = "main" +category = "dev" optional = false python-versions = ">=2" files = [ @@ -3660,7 +4189,7 @@ files = [ name = "tzlocal" version = "5.0" description = "tzinfo object for the local timezone" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3678,7 +4207,7 @@ devenv = ["black", "check-manifest", "flake8", "pyroma", "pytest (>=4.3)", "pyte name = "unstructured" version = "0.6.6" description = "A library that prepares raw documents for downstream ML tasks." -category = "main" +category = "dev" optional = false python-versions = ">=3.7.0" files = [ @@ -3719,7 +4248,7 @@ wikipedia = ["wikipedia"] name = "urllib3" version = "1.26.15" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -3736,7 +4265,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] name = "validators" version = "0.20.0" description = "Python Data Validation for Humans™." -category = "main" +category = "dev" optional = false python-versions = ">=3.4" files = [ @@ -3774,7 +4303,7 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "coverage-enable-subprocess name = "watchdog" version = "3.0.0" description = "Filesystem events monitoring" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3814,7 +4343,7 @@ watchmedo = ["PyYAML (>=3.10)"] name = "wrapt" version = "1.14.1" description = "Module for decorators, wrappers and monkey patching." -category = "main" +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ @@ -3888,7 +4417,7 @@ files = [ name = "xlsxwriter" version = "3.1.0" description = "A Python module for creating Excel XLSX files." -category = "main" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3900,7 +4429,7 @@ files = [ name = "yarl" version = "1.9.2" description = "Yet another URL library" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3988,7 +4517,7 @@ multidict = ">=4.0" name = "zipp" version = "3.15.0" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4003,4 +4532,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = ">3.9.7,<3.12" -content-hash = "98877954a74d36e6fd049aafeb561fad0826fb748ec962eef4a3192b8bbe86f3" +content-hash = "bbb551ef93af03b0dbb9b0a454e460ef1ce65182fe66b3bde35e6f89df1760de" diff --git a/pyproject.toml b/pyproject.toml index 678b9ee..ae5fa22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,21 +7,24 @@ readme = "README.md" [tool.poetry.dependencies] python = ">3.9.7,<3.12" -langchain = "^0.0.168" -pygpt4all = "^1.1.0" -qdrant-client = "^1.1.7" -llama-cpp-python = "<=0.1.49" # 0.1.50 raises an AssertionError / NameError for some reason pdfminer-six = "20221105" python-dotenv = "^1.0.0" -unstructured = "^0.6.6" # Handle ingestion file formats -docx2txt="^0.8" # Handle docx ingestion file formats pathlib = "^1.0.1" +sentencepiece = "^0.1.99" # For convert.py + +[tool.poetry.group.GUI.dependencies] # for the GUI streamlit = { git = "https://github.com/hippalectryon-0/streamlit.git", subdirectory = "lib", "rev" = "0b7fb1c" } # waiting for >1.22.0 so that we can use protobuf>=4 streamlit-chat = "^0.0.2.2" streamlit-extras = "^0.2.7" -sentencepiece = "^0.1.99" # For convert.py - +[tool.poetry.group.LLM.dependencies] +langchain = "^0.0.168" +pygpt4all = "^1.1.0" +qdrant-client = "^1.1.7" +unstructured = "^0.6.6" # Handle ingestion file formats +docx2txt="^0.8" # Handle docx ingestion file formats +llama-cpp-python = "^0.1.50" # 0.1.50 raises an AssertionError / NameError on <5 vic models +sentence_transformers = "^2.2.2" # doesn't install torch properly with poetry, but should be better in later versions [tool.poetry.group.dev.dependencies] pre-commit = "^3.3.1" diff --git a/startLLM.py b/startLLM.py index b5567a4..47a0d62 100644 --- a/startLLM.py +++ b/startLLM.py @@ -3,71 +3,82 @@ import qdrant_client from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.chains import RetrievalQA -from langchain.embeddings import LlamaCppEmbeddings from langchain.vectorstores import Qdrant -from load_env import llama_embeddings_model, model_n_ctx, model_path, model_stop, model_temp, model_type, persist_directory, use_mlock - -qa_system = None - - -def initialize_qa_system(): - global qa_system - if qa_system is None: - # Load stored vectorstore - llama = LlamaCppEmbeddings(model_path=llama_embeddings_model, n_ctx=model_n_ctx) - # Load ggml-formatted model - local_path = model_path - - client = qdrant_client.QdrantClient(path=persist_directory, prefer_grpc=True) - qdrant = Qdrant(client=client, collection_name="test", embeddings=llama) - - # Prepare the LLM chain - callbacks = [StreamingStdOutCallbackHandler()] - match model_type: - case "LlamaCpp": - from langchain.llms import LlamaCpp - - llm = LlamaCpp( - model_path=local_path, - n_ctx=model_n_ctx, - temperature=model_temp, - stop=model_stop, - callbacks=callbacks, - verbose=True, - n_threads=6, - n_batch=1000, - use_mlock=use_mlock, - ) - case "GPT4All": - from langchain.llms import GPT4All - - llm = GPT4All( - model=local_path, - n_ctx=model_n_ctx, - callbacks=callbacks, - verbose=True, - backend="gptj", - ) - case _default: - print("Only LlamaCpp or GPT4All supported right now. Make sure you set up your .env correctly.") - qa = RetrievalQA.from_chain_type( - llm=llm, - chain_type="stuff", - retriever=qdrant.as_retriever(search_type="mmr"), - return_source_documents=True, - ) - qa_system = qa - - -def main(): - initialize_qa_system() +from load_env import ( + chain_type, + get_embedding_model, + get_prompt_template_kwargs, + model_n_ctx, + model_path, + model_stop, + model_temp, + model_type, + n_gpu_layers, + persist_directory, + use_mlock, +) + + +def initialize_qa_system() -> RetrievalQA: + """init the LLM""" + # Get embeddings and local vector store + embeddings = get_embedding_model()[0] + client = qdrant_client.QdrantClient(path=persist_directory, prefer_grpc=True) + qdrant = Qdrant(client=client, collection_name="test", embeddings=embeddings) + + # Prepare the LLM chain + callbacks = [StreamingStdOutCallbackHandler()] + match model_type: + case "LlamaCpp": + from langchain.llms import LlamaCpp + + llm = LlamaCpp( + model_path=model_path, + n_ctx=model_n_ctx, + temperature=model_temp, + stop=model_stop, + callbacks=callbacks, + verbose=True, + n_threads=6, + n_batch=1000, + use_mlock=use_mlock, + ) + # Need this hack because this param isn't yet supported by the python lib + state = llm.client.__getstate__() + state["n_gpu_layers"] = n_gpu_layers + llm.client.__setstate__(state) + case "GPT4All": + from langchain.llms import GPT4All + + llm = GPT4All( + model=model_path, + n_ctx=model_n_ctx, + callbacks=callbacks, + verbose=True, + backend="gptj", + ) + case _: + raise ValueError("Only LlamaCpp or GPT4All supported right now. Make sure you set up your .env correctly.") + + return RetrievalQA.from_chain_type( + llm=llm, + chain_type=chain_type, + retriever=qdrant.as_retriever(search_type="mmr"), + return_source_documents=True, + chain_type_kwargs=get_prompt_template_kwargs(), + ) + + +# noinspection PyMissingOrEmptyDocstring +def main() -> None: + qa_system = initialize_qa_system() # Interactive questions and answers while True: - query = input("\nEnter a query: ") + query = input("\nEnter a query: ").strip() if query == "exit": break - elif not query.strip(): # check if query empty + elif not query: # check if query empty print("Empty query, skipping") continue @@ -76,15 +87,12 @@ def main(): answer, docs = res["result"], res["source_documents"] # Print the result - print("\n\n> Question:") - print(query) - print("\n> Answer:") - print(answer) - - # Print the relevant sources used for the answer - for document in docs: - print("\n> " + document.metadata["source"] + ":") - print(document.page_content) + sources_str = "\n\n".join(f">> {document.metadata['source']}:\n{document.page_content}" for document in docs) + print( + f"""\n\n> Question: {query} +> Answer: {answer} +> Sources:\n{sources_str}""" + ) if __name__ == "__main__":