diff --git a/requirements/base.txt b/requirements/base.txt index bd634f6..080e1ec 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -20,7 +20,7 @@ click==8.1.3 # uvicorn defusedxml==0.7.1 # via nbconvert -fastapi==0.92.0 +fastapi==0.93.0 # via unstructured-api-tools (setup.py) fastjsonschema==2.16.3 # via nbformat @@ -78,7 +78,7 @@ pkgutil-resolve-name==1.3.10 # via jsonschema platformdirs==3.1.0 # via jupyter-core -pydantic==1.10.5 +pydantic==1.10.6 # via fastapi pygments==2.14.0 # via nbconvert diff --git a/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-file-2.ipynb b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-file-2.ipynb new file mode 100644 index 0000000..eb702a0 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-file-2.ipynb @@ -0,0 +1,56 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# File Processing Pipeline" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline-api\n", + "def pipeline_api(\n", + " file\n", + "):\n", + " return {\"silly_result\": ' : '.join([str(len(file.read()))])}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'silly_result': '17'}\n" + ] + } + ], + "source": [ + "import tempfile\n", + "with tempfile.TemporaryFile() as fp:\n", + " fp.write(b'This is some data')\n", + " fp.seek(0)\n", + " print(pipeline_api(fp))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-file-3.ipynb b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-file-3.ipynb new file mode 100644 index 0000000..3767287 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-file-3.ipynb @@ -0,0 +1,56 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# File Processing Pipeline" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline-api\n", + "def pipeline_api(\n", + " file, response_type=\"text/csv\", response_schema=\"isd\"\n", + "):\n", + " return {\"silly_result\": ' : '.join([str(len(file.read())), str(response_type), str(response_schema)])}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'silly_result': '17 : text/csv : isd'}\n" + ] + } + ], + "source": [ + "import tempfile\n", + "with tempfile.TemporaryFile() as fp:\n", + " fp.write(b'This is some data')\n", + " fp.seek(0)\n", + " print(pipeline_api(fp, \"text/csv\", \"isd\"))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-file-4.ipynb b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-file-4.ipynb new file mode 100644 index 0000000..86624d8 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-file-4.ipynb @@ -0,0 +1,74 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# File Processing Pipeline" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline-api\n", + "def pipeline_api(\n", + " file,\n", + " file_content_type=None,\n", + " response_type=\"application/json\",\n", + " response_schema=\"labelstudio\",\n", + " m_input1=[]\n", + "):\n", + " return {\"silly_result\": ' : '.join([\n", + " str(len(file.read())),\n", + " str(file_content_type),\n", + " str(response_type),\n", + " str(response_schema),\n", + " str(m_input1)\n", + " ])}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'silly_result': \"17 : None : application/json : isd : ['input1', 'input2']\"}\n" + ] + } + ], + "source": [ + "import tempfile\n", + "with tempfile.TemporaryFile() as fp:\n", + " fp.write(b'This is some data')\n", + " fp.seek(0)\n", + " print(\n", + " pipeline_api(\n", + " fp,\n", + " None,\n", + " \"application/json\",\n", + " \"isd\",\n", + " [\"input1\", \"input2\"]\n", + " )\n", + " )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-file-5.ipynb b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-file-5.ipynb new file mode 100644 index 0000000..94f09af --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-file-5.ipynb @@ -0,0 +1,77 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# File Processing Pipeline" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline-api\n", + "def pipeline_api(\n", + " file,\n", + " file_content_type=None,\n", + " response_type=\"application/json\",\n", + " response_schema=\"labelstudio\",\n", + " m_input1=[],\n", + " m_input2=[],\n", + "):\n", + " return {\"silly_result\": ' : '.join([\n", + " str(len(file.read())),\n", + " str(file_content_type),\n", + " str(response_type),\n", + " str(response_schema),\n", + " str(m_input1),\n", + " str(m_input2),\n", + " ])}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'silly_result': \"17 : None : application/json : isd : ['input1', 'input2'] : ['m_input2']\"}\n" + ] + } + ], + "source": [ + "import tempfile\n", + "with tempfile.TemporaryFile() as fp:\n", + " fp.write(b'This is some data')\n", + " fp.seek(0)\n", + " print(\n", + " pipeline_api(\n", + " fp,\n", + " None,\n", + " \"application/json\",\n", + " \"isd\",\n", + " [\"input1\", \"input2\"],\n", + " [\"m_input2\"]\n", + " )\n", + " )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-1.ipynb b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-1.ipynb new file mode 100644 index 0000000..c055167 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-1.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Text Processing Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline-api\n", + "def pipeline_api(\n", + " text,\n", + "):\n", + " return {\"silly_result\": ' : '.join([str(len(text)), text])}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'silly_result': '9 : some text'}\n" + ] + } + ], + "source": [ + "print(pipeline_api(\"some text\"))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-2.ipynb b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-2.ipynb new file mode 100644 index 0000000..0c2915a --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-2.ipynb @@ -0,0 +1,59 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Text Processing Pipeline" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline-api\n", + "def pipeline_api(\n", + " text,\n", + " m_input1=[],\n", + " m_input2=[]\n", + "):\n", + " return {\"silly_result\": ' : '.join([\n", + " str(len(text)),\n", + " text,\n", + " str(m_input1),\n", + " str(m_input2)\n", + " ])}" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'silly_result': \"9 : some text : ['first_input'] : ['last', 'input']\"}\n" + ] + } + ], + "source": [ + "print(pipeline_api(\"some text\", m_input1=[\"first_input\"], m_input2=[\"last\", \"input\"]))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-3.ipynb b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-3.ipynb new file mode 100644 index 0000000..fe938d9 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-3.ipynb @@ -0,0 +1,55 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Text Processing Pipeline" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2524a9a4", + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline-api\n", + "def pipeline_api(\n", + " text,\n", + " response_type=\"text/csv\"\n", + "):\n", + " return {\"silly_result\": ' : '.join([str(len(text)), text, str(response_type)])}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6a876bdf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'silly_result': '9 : some text : text/csv : []'}\n" + ] + } + ], + "source": [ + "print(pipeline_api(\"some text\", \"text/csv\"))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-4.ipynb b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-4.ipynb new file mode 100644 index 0000000..eb5f0bd --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-4.ipynb @@ -0,0 +1,54 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Text Processing Pipeline" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline-api\n", + "def pipeline_api(\n", + " text,\n", + " response_type=\"text/csv\",\n", + " response_schema=\"isd\",\n", + "):\n", + " return {\"silly_result\": ' : '.join([str(len(text)), text, str(response_type), str(response_schema)])}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'silly_result': '9 : some text : text/csv : isd'}\n" + ] + } + ], + "source": [ + "print(pipeline_api(\"some text\", \"text/csv\", \"isd\"))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-file-1.ipynb b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-file-1.ipynb new file mode 100644 index 0000000..4485bf4 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-file-1.ipynb @@ -0,0 +1,70 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Text & File Processing Pipeline" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline-api\n", + "def pipeline_api(\n", + " text,\n", + " file=None,\n", + " filename=None,\n", + " file_content_type=None,\n", + "):\n", + " return {\"silly_result\": ' : '.join([\n", + " str(len(text if text else \"\")),\n", + " text,\n", + " str(len(file.read()) if file else None),\n", + " filename,\n", + " str(file_content_type),\n", + " ])}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'silly_result': '9 : some text : 17 : temp-file.txt : None'}\n" + ] + } + ], + "source": [ + "import tempfile\n", + "with tempfile.TemporaryFile() as fp:\n", + " fp.write(b'This is some data')\n", + " fp.seek(0)\n", + " print(pipeline_api(\n", + " text=\"some text\",\n", + " file=fp,\n", + " file_content_type=None,\n", + " filename=\"temp-file.txt\"\n", + " ))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-file-2.ipynb b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-file-2.ipynb new file mode 100644 index 0000000..ffc5b12 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-file-2.ipynb @@ -0,0 +1,76 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Text & File Processing Pipeline" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline-api\n", + "def pipeline_api(\n", + " text,\n", + " file=None,\n", + " filename=None,\n", + " file_content_type=None,\n", + " response_type=\"application/json\",\n", + " m_input2=[]\n", + "):\n", + " return {\"silly_result\": ' : '.join([\n", + " str(len(text if text else \"\")),\n", + " text,\n", + " str(len(file.read()) if file else None),\n", + " filename,\n", + " str(file_content_type),\n", + " str(response_type),\n", + " str(m_input2)\n", + " ])}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'silly_result': \"9 : some text : 17 : temp-file.txt : None : application/json : ['input1', 'input2']\"}\n" + ] + } + ], + "source": [ + "import tempfile\n", + "with tempfile.TemporaryFile() as fp:\n", + " fp.write(b'This is some data')\n", + " fp.seek(0)\n", + " print(pipeline_api(\n", + " text=\"some text\",\n", + " file=fp,\n", + " file_content_type=None,\n", + " filename=\"temp-file.txt\",\n", + " response_type=\"application/json\",\n", + " m_input2=[\"input1\", \"input2\"]\n", + " ))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-file-3.ipynb b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-file-3.ipynb new file mode 100644 index 0000000..7dcd4b7 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-file-3.ipynb @@ -0,0 +1,76 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Text & File Processing Pipeline" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline-api\n", + "def pipeline_api(\n", + " text,\n", + " file=None,\n", + " filename=None,\n", + " file_content_type=None,\n", + " response_type=\"application/json\",\n", + " response_schema=\"isd\"\n", + "):\n", + " return {\"silly_result\": ' : '.join([\n", + " str(len(text if text else \"\")),\n", + " text,\n", + " str(len(file.read()) if file else None),\n", + " filename,\n", + " str(file_content_type),\n", + " str(response_type),\n", + " str(response_schema)\n", + " ])}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'silly_result': '9 : some text : 17 : temp-file.txt : None : application/json : isd'}\n" + ] + } + ], + "source": [ + "import tempfile\n", + "with tempfile.TemporaryFile() as fp:\n", + " fp.write(b'This is some data')\n", + " fp.seek(0)\n", + " print(pipeline_api(\n", + " text=\"some text\",\n", + " file=fp,\n", + " file_content_type=None,\n", + " filename=\"temp-file.txt\",\n", + " response_type=\"application/json\",\n", + " response_schema=\"isd\"\n", + " ))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-file-4.ipynb b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-file-4.ipynb new file mode 100644 index 0000000..ca8a878 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/pipeline-notebooks/pipeline-process-text-file-4.ipynb @@ -0,0 +1,82 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Text & File Processing Pipeline" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline-api\n", + "def pipeline_api(\n", + " text,\n", + " file=None,\n", + " filename=None,\n", + " file_content_type=None,\n", + " response_type=\"application/json\",\n", + " response_schema=\"isd\",\n", + " m_input1=[],\n", + " m_input2=[]\n", + "):\n", + " return {\"silly_result\": ' : '.join([\n", + " str(len(text if text else \"\")),\n", + " text,\n", + " str(len(file.read()) if file else None),\n", + " filename,\n", + " str(file_content_type),\n", + " str(response_type),\n", + " str(response_schema),\n", + " str(m_input1),\n", + " str(m_input2),\n", + " ])}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'silly_result': \"9 : some text : 17 : temp-file.txt : None : application/json : isd : ['input1'] : ['input2', 'input3']\"}\n" + ] + } + ], + "source": [ + "import tempfile\n", + "with tempfile.TemporaryFile() as fp:\n", + " fp.write(b'This is some data')\n", + " fp.seek(0)\n", + " print(pipeline_api(\n", + " text=\"some text\",\n", + " file=fp,\n", + " file_content_type=None,\n", + " filename=\"temp-file.txt\",\n", + " response_type=\"application/json\",\n", + " response_schema=\"isd\",\n", + " m_input1=[\"input1\"],\n", + " m_input2=[\"input2\", \"input3\"]\n", + " ))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/app.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/app.py index 4409b76..afae193 100644 --- a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/app.py +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/app.py @@ -7,6 +7,18 @@ from fastapi import FastAPI, Request, status from .process_file_1 import router as process_file_1_router +from .process_file_2 import router as process_file_2_router +from .process_file_3 import router as process_file_3_router +from .process_file_4 import router as process_file_4_router +from .process_file_5 import router as process_file_5_router +from .process_text_1 import router as process_text_1_router +from .process_text_2 import router as process_text_2_router +from .process_text_3 import router as process_text_3_router +from .process_text_4 import router as process_text_4_router +from .process_text_file_1 import router as process_text_file_1_router +from .process_text_file_2 import router as process_text_file_2_router +from .process_text_file_3 import router as process_text_file_3_router +from .process_text_file_4 import router as process_text_file_4_router app = FastAPI( @@ -16,6 +28,18 @@ ) app.include_router(process_file_1_router) +app.include_router(process_file_2_router) +app.include_router(process_file_3_router) +app.include_router(process_file_4_router) +app.include_router(process_file_5_router) +app.include_router(process_text_1_router) +app.include_router(process_text_2_router) +app.include_router(process_text_3_router) +app.include_router(process_text_4_router) +app.include_router(process_text_file_1_router) +app.include_router(process_text_file_2_router) +app.include_router(process_text_file_3_router) +app.include_router(process_text_file_4_router) @app.get("/healthcheck", status_code=status.HTTP_200_OK) diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_file_2.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_file_2.py new file mode 100644 index 0000000..83999bb --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_file_2.py @@ -0,0 +1,147 @@ +##################################################################### +# THIS FILE IS AUTOMATICALLY GENERATED BY UNSTRUCTURED API TOOLS. +# DO NOT MODIFY DIRECTLY +##################################################################### + +import os +from typing import List, Union +from fastapi import status, FastAPI, File, Form, Request, UploadFile, APIRouter +from fastapi.responses import PlainTextResponse +import json +from fastapi.responses import StreamingResponse +from starlette.types import Send +from base64 import b64encode +from typing import Optional, Mapping, Iterator, Tuple +import secrets + + +app = FastAPI() +router = APIRouter() + + +# pipeline-api +def pipeline_api(file): + return {"silly_result": " : ".join([str(len(file.read()))])} + + +class MultipartMixedResponse(StreamingResponse): + CRLF = b"\r\n" + + def __init__(self, *args, content_type: str = None, **kwargs): + super().__init__(*args, **kwargs) + self.content_type = content_type + + def init_headers(self, headers: Optional[Mapping[str, str]] = None) -> None: + super().init_headers(headers) + self.boundary_value = secrets.token_hex(16) + content_type = f'multipart/mixed; boundary="{self.boundary_value}"' + self.raw_headers.append((b"content-type", content_type.encode("latin-1"))) + + @property + def boundary(self): + return b"--" + self.boundary_value.encode() + + def _build_part_headers(self, headers: dict) -> bytes: + header_bytes = b"" + for header, value in headers.items(): + header_bytes += f"{header}: {value}".encode() + self.CRLF + return header_bytes + + def build_part(self, chunk: bytes) -> bytes: + part = self.boundary + self.CRLF + part_headers = { + "Content-Length": len(chunk), + "Content-Transfer-Encoding": "base64", + } + if self.content_type is not None: + part_headers["Content-Type"] = self.content_type + part += self._build_part_headers(part_headers) + part += self.CRLF + chunk + self.CRLF + return part + + async def stream_response(self, send: Send) -> None: + await send( + { + "type": "http.response.start", + "status": self.status_code, + "headers": self.raw_headers, + } + ) + async for chunk in self.body_iterator: + if not isinstance(chunk, bytes): + chunk = chunk.encode(self.charset) + chunk = b64encode(chunk) + await send( + { + "type": "http.response.body", + "body": self.build_part(chunk), + "more_body": True, + } + ) + + await send({"type": "http.response.body", "body": b"", "more_body": False}) + + +@router.post("/test-project/v1.2.3/process-file-2") +async def pipeline_1( + request: Request, + files: Union[List[UploadFile], None] = File(default=None), +): + content_type = request.headers.get("Accept") + + if isinstance(files, list) and len(files): + if len(files) > 1: + if content_type and content_type not in [ + "*/*", + "multipart/mixed", + "application/json", + ]: + return PlainTextResponse( + content=( + f"Conflict in media type {content_type}" + ' with response type "multipart/mixed".\n' + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + def response_generator(is_multipart): + for file in files: + _file = file.file + + response = pipeline_api( + _file, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + if content_type == "multipart/mixed": + return MultipartMixedResponse( + response_generator(is_multipart=True), + ) + else: + return response_generator(is_multipart=False) + else: + file = files[0] + _file = file.file + + response = pipeline_api( + _file, + ) + + return response + + else: + return PlainTextResponse( + content='Request parameter "files" is required.\n', + status_code=status.HTTP_400_BAD_REQUEST, + ) + + +@app.get("/healthcheck", status_code=status.HTTP_200_OK) +async def healthcheck(request: Request): + return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} + + +app.include_router(router) diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_file_3.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_file_3.py new file mode 100644 index 0000000..f26c729 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_file_3.py @@ -0,0 +1,189 @@ +##################################################################### +# THIS FILE IS AUTOMATICALLY GENERATED BY UNSTRUCTURED API TOOLS. +# DO NOT MODIFY DIRECTLY +##################################################################### + +import os +from typing import List, Union +from fastapi import status, FastAPI, File, Form, Request, UploadFile, APIRouter +from fastapi.responses import PlainTextResponse +import json +from fastapi.responses import StreamingResponse +from starlette.types import Send +from base64 import b64encode +from typing import Optional, Mapping, Iterator, Tuple +import secrets + + +app = FastAPI() +router = APIRouter() + + +def is_expected_response_type(media_type, response_type): + if media_type == "application/json" and response_type not in [dict, list]: + return True + elif media_type == "text/csv" and response_type != str: + return True + else: + return False + + +# pipeline-api +def pipeline_api(file, response_type="text/csv", response_schema="isd"): + return { + "silly_result": " : ".join( + [str(len(file.read())), str(response_type), str(response_schema)] + ) + } + + +class MultipartMixedResponse(StreamingResponse): + CRLF = b"\r\n" + + def __init__(self, *args, content_type: str = None, **kwargs): + super().__init__(*args, **kwargs) + self.content_type = content_type + + def init_headers(self, headers: Optional[Mapping[str, str]] = None) -> None: + super().init_headers(headers) + self.boundary_value = secrets.token_hex(16) + content_type = f'multipart/mixed; boundary="{self.boundary_value}"' + self.raw_headers.append((b"content-type", content_type.encode("latin-1"))) + + @property + def boundary(self): + return b"--" + self.boundary_value.encode() + + def _build_part_headers(self, headers: dict) -> bytes: + header_bytes = b"" + for header, value in headers.items(): + header_bytes += f"{header}: {value}".encode() + self.CRLF + return header_bytes + + def build_part(self, chunk: bytes) -> bytes: + part = self.boundary + self.CRLF + part_headers = { + "Content-Length": len(chunk), + "Content-Transfer-Encoding": "base64", + } + if self.content_type is not None: + part_headers["Content-Type"] = self.content_type + part += self._build_part_headers(part_headers) + part += self.CRLF + chunk + self.CRLF + return part + + async def stream_response(self, send: Send) -> None: + await send( + { + "type": "http.response.start", + "status": self.status_code, + "headers": self.raw_headers, + } + ) + async for chunk in self.body_iterator: + if not isinstance(chunk, bytes): + chunk = chunk.encode(self.charset) + chunk = b64encode(chunk) + await send( + { + "type": "http.response.body", + "body": self.build_part(chunk), + "more_body": True, + } + ) + + await send({"type": "http.response.body", "body": b"", "more_body": False}) + + +@router.post("/test-project/v1.2.3/process-file-3") +async def pipeline_1( + request: Request, + files: Union[List[UploadFile], None] = File(default=None), + output_format: Union[str, None] = Form(default=None), + output_schema: str = Form(default=None), +): + content_type = request.headers.get("Accept") + + default_response_type = output_format or "text/csv" + if not content_type or content_type == "*/*" or content_type == "multipart/mixed": + media_type = default_response_type + else: + media_type = content_type + + default_response_schema = output_schema or "isd" + + if isinstance(files, list) and len(files): + if len(files) > 1: + if content_type and content_type not in [ + "*/*", + "multipart/mixed", + "application/json", + ]: + return PlainTextResponse( + content=( + f"Conflict in media type {content_type}" + ' with response type "multipart/mixed".\n' + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + def response_generator(is_multipart): + for file in files: + _file = file.file + + response = pipeline_api( + _file, + response_type=media_type, + response_schema=default_response_schema, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + if content_type == "multipart/mixed": + return MultipartMixedResponse( + response_generator(is_multipart=True), content_type=media_type + ) + else: + return response_generator(is_multipart=False) + else: + file = files[0] + _file = file.file + + response = pipeline_api( + _file, + response_type=media_type, + response_schema=default_response_schema, + ) + + if is_expected_response_type(media_type, type(response)): + return PlainTextResponse( + content=( + f"Conflict in media type {media_type}" + f" with response type {type(response)}.\n" + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + valid_response_types = ["application/json", "text/csv", "*/*"] + if media_type in valid_response_types: + return response + else: + return PlainTextResponse( + content=f"Unsupported media type {media_type}.\n", + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + else: + return PlainTextResponse( + content='Request parameter "files" is required.\n', + status_code=status.HTTP_400_BAD_REQUEST, + ) + + +@app.get("/healthcheck", status_code=status.HTTP_200_OK) +async def healthcheck(request: Request): + return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} + + +app.include_router(router) diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_file_4.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_file_4.py new file mode 100644 index 0000000..b683e18 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_file_4.py @@ -0,0 +1,206 @@ +##################################################################### +# THIS FILE IS AUTOMATICALLY GENERATED BY UNSTRUCTURED API TOOLS. +# DO NOT MODIFY DIRECTLY +##################################################################### + +import os +from typing import List, Union +from fastapi import status, FastAPI, File, Form, Request, UploadFile, APIRouter +from fastapi.responses import PlainTextResponse +import json +from fastapi.responses import StreamingResponse +from starlette.types import Send +from base64 import b64encode +from typing import Optional, Mapping, Iterator, Tuple +import secrets + + +app = FastAPI() +router = APIRouter() + + +def is_expected_response_type(media_type, response_type): + if media_type == "application/json" and response_type not in [dict, list]: + return True + elif media_type == "text/csv" and response_type != str: + return True + else: + return False + + +# pipeline-api +def pipeline_api( + file, + file_content_type=None, + response_type="application/json", + response_schema="labelstudio", + m_input1=[], +): + return { + "silly_result": " : ".join( + [ + str(len(file.read())), + str(file_content_type), + str(response_type), + str(response_schema), + str(m_input1), + ] + ) + } + + +class MultipartMixedResponse(StreamingResponse): + CRLF = b"\r\n" + + def __init__(self, *args, content_type: str = None, **kwargs): + super().__init__(*args, **kwargs) + self.content_type = content_type + + def init_headers(self, headers: Optional[Mapping[str, str]] = None) -> None: + super().init_headers(headers) + self.boundary_value = secrets.token_hex(16) + content_type = f'multipart/mixed; boundary="{self.boundary_value}"' + self.raw_headers.append((b"content-type", content_type.encode("latin-1"))) + + @property + def boundary(self): + return b"--" + self.boundary_value.encode() + + def _build_part_headers(self, headers: dict) -> bytes: + header_bytes = b"" + for header, value in headers.items(): + header_bytes += f"{header}: {value}".encode() + self.CRLF + return header_bytes + + def build_part(self, chunk: bytes) -> bytes: + part = self.boundary + self.CRLF + part_headers = { + "Content-Length": len(chunk), + "Content-Transfer-Encoding": "base64", + } + if self.content_type is not None: + part_headers["Content-Type"] = self.content_type + part += self._build_part_headers(part_headers) + part += self.CRLF + chunk + self.CRLF + return part + + async def stream_response(self, send: Send) -> None: + await send( + { + "type": "http.response.start", + "status": self.status_code, + "headers": self.raw_headers, + } + ) + async for chunk in self.body_iterator: + if not isinstance(chunk, bytes): + chunk = chunk.encode(self.charset) + chunk = b64encode(chunk) + await send( + { + "type": "http.response.body", + "body": self.build_part(chunk), + "more_body": True, + } + ) + + await send({"type": "http.response.body", "body": b"", "more_body": False}) + + +@router.post("/test-project/v1.2.3/process-file-4") +async def pipeline_1( + request: Request, + files: Union[List[UploadFile], None] = File(default=None), + output_format: Union[str, None] = Form(default=None), + output_schema: str = Form(default=None), + input1: List[str] = Form(default=[]), +): + content_type = request.headers.get("Accept") + + default_response_type = output_format or "application/json" + if not content_type or content_type == "*/*" or content_type == "multipart/mixed": + media_type = default_response_type + else: + media_type = content_type + + default_response_schema = output_schema or "labelstudio" + + if isinstance(files, list) and len(files): + if len(files) > 1: + if content_type and content_type not in [ + "*/*", + "multipart/mixed", + "application/json", + ]: + return PlainTextResponse( + content=( + f"Conflict in media type {content_type}" + ' with response type "multipart/mixed".\n' + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + def response_generator(is_multipart): + for file in files: + _file = file.file + + response = pipeline_api( + _file, + m_input1=input1, + response_type=media_type, + response_schema=default_response_schema, + file_content_type=file.content_type, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + if content_type == "multipart/mixed": + return MultipartMixedResponse( + response_generator(is_multipart=True), content_type=media_type + ) + else: + return response_generator(is_multipart=False) + else: + file = files[0] + _file = file.file + + response = pipeline_api( + _file, + m_input1=input1, + response_type=media_type, + response_schema=default_response_schema, + file_content_type=file.content_type, + ) + + if is_expected_response_type(media_type, type(response)): + return PlainTextResponse( + content=( + f"Conflict in media type {media_type}" + f" with response type {type(response)}.\n" + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + valid_response_types = ["application/json", "text/csv", "*/*"] + if media_type in valid_response_types: + return response + else: + return PlainTextResponse( + content=f"Unsupported media type {media_type}.\n", + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + else: + return PlainTextResponse( + content='Request parameter "files" is required.\n', + status_code=status.HTTP_400_BAD_REQUEST, + ) + + +@app.get("/healthcheck", status_code=status.HTTP_200_OK) +async def healthcheck(request: Request): + return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} + + +app.include_router(router) diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_file_5.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_file_5.py new file mode 100644 index 0000000..d987082 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_file_5.py @@ -0,0 +1,211 @@ +##################################################################### +# THIS FILE IS AUTOMATICALLY GENERATED BY UNSTRUCTURED API TOOLS. +# DO NOT MODIFY DIRECTLY +##################################################################### + +import os +from typing import List, Union +from fastapi import status, FastAPI, File, Form, Request, UploadFile, APIRouter +from fastapi.responses import PlainTextResponse +import json +from fastapi.responses import StreamingResponse +from starlette.types import Send +from base64 import b64encode +from typing import Optional, Mapping, Iterator, Tuple +import secrets + + +app = FastAPI() +router = APIRouter() + + +def is_expected_response_type(media_type, response_type): + if media_type == "application/json" and response_type not in [dict, list]: + return True + elif media_type == "text/csv" and response_type != str: + return True + else: + return False + + +# pipeline-api +def pipeline_api( + file, + file_content_type=None, + response_type="application/json", + response_schema="labelstudio", + m_input1=[], + m_input2=[], +): + return { + "silly_result": " : ".join( + [ + str(len(file.read())), + str(file_content_type), + str(response_type), + str(response_schema), + str(m_input1), + str(m_input2), + ] + ) + } + + +class MultipartMixedResponse(StreamingResponse): + CRLF = b"\r\n" + + def __init__(self, *args, content_type: str = None, **kwargs): + super().__init__(*args, **kwargs) + self.content_type = content_type + + def init_headers(self, headers: Optional[Mapping[str, str]] = None) -> None: + super().init_headers(headers) + self.boundary_value = secrets.token_hex(16) + content_type = f'multipart/mixed; boundary="{self.boundary_value}"' + self.raw_headers.append((b"content-type", content_type.encode("latin-1"))) + + @property + def boundary(self): + return b"--" + self.boundary_value.encode() + + def _build_part_headers(self, headers: dict) -> bytes: + header_bytes = b"" + for header, value in headers.items(): + header_bytes += f"{header}: {value}".encode() + self.CRLF + return header_bytes + + def build_part(self, chunk: bytes) -> bytes: + part = self.boundary + self.CRLF + part_headers = { + "Content-Length": len(chunk), + "Content-Transfer-Encoding": "base64", + } + if self.content_type is not None: + part_headers["Content-Type"] = self.content_type + part += self._build_part_headers(part_headers) + part += self.CRLF + chunk + self.CRLF + return part + + async def stream_response(self, send: Send) -> None: + await send( + { + "type": "http.response.start", + "status": self.status_code, + "headers": self.raw_headers, + } + ) + async for chunk in self.body_iterator: + if not isinstance(chunk, bytes): + chunk = chunk.encode(self.charset) + chunk = b64encode(chunk) + await send( + { + "type": "http.response.body", + "body": self.build_part(chunk), + "more_body": True, + } + ) + + await send({"type": "http.response.body", "body": b"", "more_body": False}) + + +@router.post("/test-project/v1.2.3/process-file-5") +async def pipeline_1( + request: Request, + files: Union[List[UploadFile], None] = File(default=None), + output_format: Union[str, None] = Form(default=None), + output_schema: str = Form(default=None), + input1: List[str] = Form(default=[]), + input2: List[str] = Form(default=[]), +): + content_type = request.headers.get("Accept") + + default_response_type = output_format or "application/json" + if not content_type or content_type == "*/*" or content_type == "multipart/mixed": + media_type = default_response_type + else: + media_type = content_type + + default_response_schema = output_schema or "labelstudio" + + if isinstance(files, list) and len(files): + if len(files) > 1: + if content_type and content_type not in [ + "*/*", + "multipart/mixed", + "application/json", + ]: + return PlainTextResponse( + content=( + f"Conflict in media type {content_type}" + ' with response type "multipart/mixed".\n' + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + def response_generator(is_multipart): + for file in files: + _file = file.file + + response = pipeline_api( + _file, + m_input1=input1, + m_input2=input2, + response_type=media_type, + response_schema=default_response_schema, + file_content_type=file.content_type, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + if content_type == "multipart/mixed": + return MultipartMixedResponse( + response_generator(is_multipart=True), content_type=media_type + ) + else: + return response_generator(is_multipart=False) + else: + file = files[0] + _file = file.file + + response = pipeline_api( + _file, + m_input1=input1, + m_input2=input2, + response_type=media_type, + response_schema=default_response_schema, + file_content_type=file.content_type, + ) + + if is_expected_response_type(media_type, type(response)): + return PlainTextResponse( + content=( + f"Conflict in media type {media_type}" + f" with response type {type(response)}.\n" + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + valid_response_types = ["application/json", "text/csv", "*/*"] + if media_type in valid_response_types: + return response + else: + return PlainTextResponse( + content=f"Unsupported media type {media_type}.\n", + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + else: + return PlainTextResponse( + content='Request parameter "files" is required.\n', + status_code=status.HTTP_400_BAD_REQUEST, + ) + + +@app.get("/healthcheck", status_code=status.HTTP_200_OK) +async def healthcheck(request: Request): + return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} + + +app.include_router(router) diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_1.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_1.py new file mode 100644 index 0000000..d3af1b2 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_1.py @@ -0,0 +1,148 @@ +##################################################################### +# THIS FILE IS AUTOMATICALLY GENERATED BY UNSTRUCTURED API TOOLS. +# DO NOT MODIFY DIRECTLY +##################################################################### + +import os +from typing import List, Union +from fastapi import status, FastAPI, File, Form, Request, UploadFile, APIRouter +from fastapi.responses import PlainTextResponse +import json +from fastapi.responses import StreamingResponse +from starlette.types import Send +from base64 import b64encode +from typing import Optional, Mapping, Iterator, Tuple +import secrets + + +app = FastAPI() +router = APIRouter() + + +def pipeline_api( + text, +): + return {"silly_result": " : ".join([str(len(text)), text])} + + +class MultipartMixedResponse(StreamingResponse): + CRLF = b"\r\n" + + def __init__(self, *args, content_type: str = None, **kwargs): + super().__init__(*args, **kwargs) + self.content_type = content_type + + def init_headers(self, headers: Optional[Mapping[str, str]] = None) -> None: + super().init_headers(headers) + self.boundary_value = secrets.token_hex(16) + content_type = f'multipart/mixed; boundary="{self.boundary_value}"' + self.raw_headers.append((b"content-type", content_type.encode("latin-1"))) + + @property + def boundary(self): + return b"--" + self.boundary_value.encode() + + def _build_part_headers(self, headers: dict) -> bytes: + header_bytes = b"" + for header, value in headers.items(): + header_bytes += f"{header}: {value}".encode() + self.CRLF + return header_bytes + + def build_part(self, chunk: bytes) -> bytes: + part = self.boundary + self.CRLF + part_headers = { + "Content-Length": len(chunk), + "Content-Transfer-Encoding": "base64", + } + if self.content_type is not None: + part_headers["Content-Type"] = self.content_type + part += self._build_part_headers(part_headers) + part += self.CRLF + chunk + self.CRLF + return part + + async def stream_response(self, send: Send) -> None: + await send( + { + "type": "http.response.start", + "status": self.status_code, + "headers": self.raw_headers, + } + ) + async for chunk in self.body_iterator: + if not isinstance(chunk, bytes): + chunk = chunk.encode(self.charset) + chunk = b64encode(chunk) + await send( + { + "type": "http.response.body", + "body": self.build_part(chunk), + "more_body": True, + } + ) + + await send({"type": "http.response.body", "body": b"", "more_body": False}) + + +@router.post("/test-project/v1.2.3/process-text-1") +async def pipeline_1( + request: Request, + text_files: Union[List[UploadFile], None] = File(default=None), +): + content_type = request.headers.get("Accept") + + if isinstance(text_files, list) and len(text_files): + if len(text_files) > 1: + if content_type and content_type not in [ + "*/*", + "multipart/mixed", + "application/json", + ]: + return PlainTextResponse( + content=( + f"Conflict in media type {content_type}" + ' with response type "multipart/mixed".\n' + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + def response_generator(is_multipart): + for file in text_files: + text = file.file.read().decode("utf-8") + + response = pipeline_api( + text, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + if content_type == "multipart/mixed": + return MultipartMixedResponse( + response_generator(is_multipart=True), + ) + else: + return response_generator(is_multipart=False) + else: + text_file = text_files[0] + text = text_file.file.read().decode("utf-8") + + response = pipeline_api( + text, + ) + + return response + + else: + return PlainTextResponse( + content='Request parameter "text_files" is required.\n', + status_code=status.HTTP_400_BAD_REQUEST, + ) + + +@app.get("/healthcheck", status_code=status.HTTP_200_OK) +async def healthcheck(request: Request): + return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} + + +app.include_router(router) diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_2.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_2.py new file mode 100644 index 0000000..1c38acc --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_2.py @@ -0,0 +1,155 @@ +##################################################################### +# THIS FILE IS AUTOMATICALLY GENERATED BY UNSTRUCTURED API TOOLS. +# DO NOT MODIFY DIRECTLY +##################################################################### + +import os +from typing import List, Union +from fastapi import status, FastAPI, File, Form, Request, UploadFile, APIRouter +from fastapi.responses import PlainTextResponse +import json +from fastapi.responses import StreamingResponse +from starlette.types import Send +from base64 import b64encode +from typing import Optional, Mapping, Iterator, Tuple +import secrets + + +app = FastAPI() +router = APIRouter() + + +# pipeline-api +def pipeline_api(text, m_input1=[], m_input2=[]): + return { + "silly_result": " : ".join([str(len(text)), text, str(m_input1), str(m_input2)]) + } + + +class MultipartMixedResponse(StreamingResponse): + CRLF = b"\r\n" + + def __init__(self, *args, content_type: str = None, **kwargs): + super().__init__(*args, **kwargs) + self.content_type = content_type + + def init_headers(self, headers: Optional[Mapping[str, str]] = None) -> None: + super().init_headers(headers) + self.boundary_value = secrets.token_hex(16) + content_type = f'multipart/mixed; boundary="{self.boundary_value}"' + self.raw_headers.append((b"content-type", content_type.encode("latin-1"))) + + @property + def boundary(self): + return b"--" + self.boundary_value.encode() + + def _build_part_headers(self, headers: dict) -> bytes: + header_bytes = b"" + for header, value in headers.items(): + header_bytes += f"{header}: {value}".encode() + self.CRLF + return header_bytes + + def build_part(self, chunk: bytes) -> bytes: + part = self.boundary + self.CRLF + part_headers = { + "Content-Length": len(chunk), + "Content-Transfer-Encoding": "base64", + } + if self.content_type is not None: + part_headers["Content-Type"] = self.content_type + part += self._build_part_headers(part_headers) + part += self.CRLF + chunk + self.CRLF + return part + + async def stream_response(self, send: Send) -> None: + await send( + { + "type": "http.response.start", + "status": self.status_code, + "headers": self.raw_headers, + } + ) + async for chunk in self.body_iterator: + if not isinstance(chunk, bytes): + chunk = chunk.encode(self.charset) + chunk = b64encode(chunk) + await send( + { + "type": "http.response.body", + "body": self.build_part(chunk), + "more_body": True, + } + ) + + await send({"type": "http.response.body", "body": b"", "more_body": False}) + + +@router.post("/test-project/v1.2.3/process-text-2") +async def pipeline_1( + request: Request, + text_files: Union[List[UploadFile], None] = File(default=None), + input1: List[str] = Form(default=[]), + input2: List[str] = Form(default=[]), +): + content_type = request.headers.get("Accept") + + if isinstance(text_files, list) and len(text_files): + if len(text_files) > 1: + if content_type and content_type not in [ + "*/*", + "multipart/mixed", + "application/json", + ]: + return PlainTextResponse( + content=( + f"Conflict in media type {content_type}" + ' with response type "multipart/mixed".\n' + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + def response_generator(is_multipart): + for file in text_files: + text = file.file.read().decode("utf-8") + + response = pipeline_api( + text, + m_input1=input1, + m_input2=input2, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + if content_type == "multipart/mixed": + return MultipartMixedResponse( + response_generator(is_multipart=True), + ) + else: + return response_generator(is_multipart=False) + else: + text_file = text_files[0] + text = text_file.file.read().decode("utf-8") + + response = pipeline_api( + text, + m_input1=input1, + m_input2=input2, + ) + + return response + + else: + return PlainTextResponse( + content='Request parameter "text_files" is required.\n', + status_code=status.HTTP_400_BAD_REQUEST, + ) + + +@app.get("/healthcheck", status_code=status.HTTP_200_OK) +async def healthcheck(request: Request): + return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} + + +app.include_router(router) diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_3.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_3.py new file mode 100644 index 0000000..cba4239 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_3.py @@ -0,0 +1,180 @@ +##################################################################### +# THIS FILE IS AUTOMATICALLY GENERATED BY UNSTRUCTURED API TOOLS. +# DO NOT MODIFY DIRECTLY +##################################################################### + +import os +from typing import List, Union +from fastapi import status, FastAPI, File, Form, Request, UploadFile, APIRouter +from fastapi.responses import PlainTextResponse +import json +from fastapi.responses import StreamingResponse +from starlette.types import Send +from base64 import b64encode +from typing import Optional, Mapping, Iterator, Tuple +import secrets + + +app = FastAPI() +router = APIRouter() + + +def is_expected_response_type(media_type, response_type): + if media_type == "application/json" and response_type not in [dict, list]: + return True + elif media_type == "text/csv" and response_type != str: + return True + else: + return False + + +# pipeline-api +def pipeline_api(text, response_type="text/csv"): + return {"silly_result": " : ".join([str(len(text)), text, str(response_type)])} + + +class MultipartMixedResponse(StreamingResponse): + CRLF = b"\r\n" + + def __init__(self, *args, content_type: str = None, **kwargs): + super().__init__(*args, **kwargs) + self.content_type = content_type + + def init_headers(self, headers: Optional[Mapping[str, str]] = None) -> None: + super().init_headers(headers) + self.boundary_value = secrets.token_hex(16) + content_type = f'multipart/mixed; boundary="{self.boundary_value}"' + self.raw_headers.append((b"content-type", content_type.encode("latin-1"))) + + @property + def boundary(self): + return b"--" + self.boundary_value.encode() + + def _build_part_headers(self, headers: dict) -> bytes: + header_bytes = b"" + for header, value in headers.items(): + header_bytes += f"{header}: {value}".encode() + self.CRLF + return header_bytes + + def build_part(self, chunk: bytes) -> bytes: + part = self.boundary + self.CRLF + part_headers = { + "Content-Length": len(chunk), + "Content-Transfer-Encoding": "base64", + } + if self.content_type is not None: + part_headers["Content-Type"] = self.content_type + part += self._build_part_headers(part_headers) + part += self.CRLF + chunk + self.CRLF + return part + + async def stream_response(self, send: Send) -> None: + await send( + { + "type": "http.response.start", + "status": self.status_code, + "headers": self.raw_headers, + } + ) + async for chunk in self.body_iterator: + if not isinstance(chunk, bytes): + chunk = chunk.encode(self.charset) + chunk = b64encode(chunk) + await send( + { + "type": "http.response.body", + "body": self.build_part(chunk), + "more_body": True, + } + ) + + await send({"type": "http.response.body", "body": b"", "more_body": False}) + + +@router.post("/test-project/v1.2.3/process-text-3") +async def pipeline_1( + request: Request, + text_files: Union[List[UploadFile], None] = File(default=None), + output_format: Union[str, None] = Form(default=None), +): + content_type = request.headers.get("Accept") + + default_response_type = output_format or "text/csv" + if not content_type or content_type == "*/*" or content_type == "multipart/mixed": + media_type = default_response_type + else: + media_type = content_type + + if isinstance(text_files, list) and len(text_files): + if len(text_files) > 1: + if content_type and content_type not in [ + "*/*", + "multipart/mixed", + "application/json", + ]: + return PlainTextResponse( + content=( + f"Conflict in media type {content_type}" + ' with response type "multipart/mixed".\n' + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + def response_generator(is_multipart): + for file in text_files: + text = file.file.read().decode("utf-8") + + response = pipeline_api( + text, + response_type=media_type, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + if content_type == "multipart/mixed": + return MultipartMixedResponse( + response_generator(is_multipart=True), content_type=media_type + ) + else: + return response_generator(is_multipart=False) + else: + text_file = text_files[0] + text = text_file.file.read().decode("utf-8") + + response = pipeline_api( + text, + response_type=media_type, + ) + + if is_expected_response_type(media_type, type(response)): + return PlainTextResponse( + content=( + f"Conflict in media type {media_type}" + f" with response type {type(response)}.\n" + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + valid_response_types = ["application/json", "text/csv", "*/*"] + if media_type in valid_response_types: + return response + else: + return PlainTextResponse( + content=f"Unsupported media type {media_type}.\n", + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + else: + return PlainTextResponse( + content='Request parameter "text_files" is required.\n', + status_code=status.HTTP_400_BAD_REQUEST, + ) + + +@app.get("/healthcheck", status_code=status.HTTP_200_OK) +async def healthcheck(request: Request): + return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} + + +app.include_router(router) diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_4.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_4.py new file mode 100644 index 0000000..2f6db46 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_4.py @@ -0,0 +1,193 @@ +##################################################################### +# THIS FILE IS AUTOMATICALLY GENERATED BY UNSTRUCTURED API TOOLS. +# DO NOT MODIFY DIRECTLY +##################################################################### + +import os +from typing import List, Union +from fastapi import status, FastAPI, File, Form, Request, UploadFile, APIRouter +from fastapi.responses import PlainTextResponse +import json +from fastapi.responses import StreamingResponse +from starlette.types import Send +from base64 import b64encode +from typing import Optional, Mapping, Iterator, Tuple +import secrets + + +app = FastAPI() +router = APIRouter() + + +def is_expected_response_type(media_type, response_type): + if media_type == "application/json" and response_type not in [dict, list]: + return True + elif media_type == "text/csv" and response_type != str: + return True + else: + return False + + +# pipeline-api +def pipeline_api( + text, + response_type="text/csv", + response_schema="isd", +): + return { + "silly_result": " : ".join( + [str(len(text)), text, str(response_type), str(response_schema)] + ) + } + + +class MultipartMixedResponse(StreamingResponse): + CRLF = b"\r\n" + + def __init__(self, *args, content_type: str = None, **kwargs): + super().__init__(*args, **kwargs) + self.content_type = content_type + + def init_headers(self, headers: Optional[Mapping[str, str]] = None) -> None: + super().init_headers(headers) + self.boundary_value = secrets.token_hex(16) + content_type = f'multipart/mixed; boundary="{self.boundary_value}"' + self.raw_headers.append((b"content-type", content_type.encode("latin-1"))) + + @property + def boundary(self): + return b"--" + self.boundary_value.encode() + + def _build_part_headers(self, headers: dict) -> bytes: + header_bytes = b"" + for header, value in headers.items(): + header_bytes += f"{header}: {value}".encode() + self.CRLF + return header_bytes + + def build_part(self, chunk: bytes) -> bytes: + part = self.boundary + self.CRLF + part_headers = { + "Content-Length": len(chunk), + "Content-Transfer-Encoding": "base64", + } + if self.content_type is not None: + part_headers["Content-Type"] = self.content_type + part += self._build_part_headers(part_headers) + part += self.CRLF + chunk + self.CRLF + return part + + async def stream_response(self, send: Send) -> None: + await send( + { + "type": "http.response.start", + "status": self.status_code, + "headers": self.raw_headers, + } + ) + async for chunk in self.body_iterator: + if not isinstance(chunk, bytes): + chunk = chunk.encode(self.charset) + chunk = b64encode(chunk) + await send( + { + "type": "http.response.body", + "body": self.build_part(chunk), + "more_body": True, + } + ) + + await send({"type": "http.response.body", "body": b"", "more_body": False}) + + +@router.post("/test-project/v1.2.3/process-text-4") +async def pipeline_1( + request: Request, + text_files: Union[List[UploadFile], None] = File(default=None), + output_format: Union[str, None] = Form(default=None), + output_schema: str = Form(default=None), +): + content_type = request.headers.get("Accept") + + default_response_type = output_format or "text/csv" + if not content_type or content_type == "*/*" or content_type == "multipart/mixed": + media_type = default_response_type + else: + media_type = content_type + + default_response_schema = output_schema or "isd" + + if isinstance(text_files, list) and len(text_files): + if len(text_files) > 1: + if content_type and content_type not in [ + "*/*", + "multipart/mixed", + "application/json", + ]: + return PlainTextResponse( + content=( + f"Conflict in media type {content_type}" + ' with response type "multipart/mixed".\n' + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + def response_generator(is_multipart): + for file in text_files: + text = file.file.read().decode("utf-8") + + response = pipeline_api( + text, + response_type=media_type, + response_schema=default_response_schema, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + if content_type == "multipart/mixed": + return MultipartMixedResponse( + response_generator(is_multipart=True), content_type=media_type + ) + else: + return response_generator(is_multipart=False) + else: + text_file = text_files[0] + text = text_file.file.read().decode("utf-8") + + response = pipeline_api( + text, + response_type=media_type, + response_schema=default_response_schema, + ) + + if is_expected_response_type(media_type, type(response)): + return PlainTextResponse( + content=( + f"Conflict in media type {media_type}" + f" with response type {type(response)}.\n" + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + valid_response_types = ["application/json", "text/csv", "*/*"] + if media_type in valid_response_types: + return response + else: + return PlainTextResponse( + content=f"Unsupported media type {media_type}.\n", + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + else: + return PlainTextResponse( + content='Request parameter "text_files" is required.\n', + status_code=status.HTTP_400_BAD_REQUEST, + ) + + +@app.get("/healthcheck", status_code=status.HTTP_200_OK) +async def healthcheck(request: Request): + return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} + + +app.include_router(router) diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_file_1.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_file_1.py new file mode 100644 index 0000000..4908731 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_file_1.py @@ -0,0 +1,191 @@ +##################################################################### +# THIS FILE IS AUTOMATICALLY GENERATED BY UNSTRUCTURED API TOOLS. +# DO NOT MODIFY DIRECTLY +##################################################################### + +import os +from typing import List, Union +from fastapi import status, FastAPI, File, Form, Request, UploadFile, APIRouter +from fastapi.responses import PlainTextResponse +import json +from fastapi.responses import StreamingResponse +from starlette.types import Send +from base64 import b64encode +from typing import Optional, Mapping, Iterator, Tuple +import secrets + + +app = FastAPI() +router = APIRouter() + + +# pipeline-api +def pipeline_api( + text, + file=None, + filename=None, + file_content_type=None, +): + return { + "silly_result": " : ".join( + [ + str(len(text if text else "")), + text, + str(len(file.read()) if file else None), + filename, + str(file_content_type), + ] + ) + } + + +class MultipartMixedResponse(StreamingResponse): + CRLF = b"\r\n" + + def __init__(self, *args, content_type: str = None, **kwargs): + super().__init__(*args, **kwargs) + self.content_type = content_type + + def init_headers(self, headers: Optional[Mapping[str, str]] = None) -> None: + super().init_headers(headers) + self.boundary_value = secrets.token_hex(16) + content_type = f'multipart/mixed; boundary="{self.boundary_value}"' + self.raw_headers.append((b"content-type", content_type.encode("latin-1"))) + + @property + def boundary(self): + return b"--" + self.boundary_value.encode() + + def _build_part_headers(self, headers: dict) -> bytes: + header_bytes = b"" + for header, value in headers.items(): + header_bytes += f"{header}: {value}".encode() + self.CRLF + return header_bytes + + def build_part(self, chunk: bytes) -> bytes: + part = self.boundary + self.CRLF + part_headers = { + "Content-Length": len(chunk), + "Content-Transfer-Encoding": "base64", + } + if self.content_type is not None: + part_headers["Content-Type"] = self.content_type + part += self._build_part_headers(part_headers) + part += self.CRLF + chunk + self.CRLF + return part + + async def stream_response(self, send: Send) -> None: + await send( + { + "type": "http.response.start", + "status": self.status_code, + "headers": self.raw_headers, + } + ) + async for chunk in self.body_iterator: + if not isinstance(chunk, bytes): + chunk = chunk.encode(self.charset) + chunk = b64encode(chunk) + await send( + { + "type": "http.response.body", + "body": self.build_part(chunk), + "more_body": True, + } + ) + + await send({"type": "http.response.body", "body": b"", "more_body": False}) + + +@router.post("/test-project/v1.2.3/process-text-file-1") +async def pipeline_1( + request: Request, + files: Union[List[UploadFile], None] = File(default=None), + text_files: Union[List[UploadFile], None] = File(default=None), +): + content_type = request.headers.get("Accept") + + has_text = isinstance(text_files, list) and len(text_files) + has_files = isinstance(files, list) and len(files) + if not has_text and not has_files: + return PlainTextResponse( + content='One of the request parameters "text_files" or "files" is required.\n', + status_code=status.HTTP_400_BAD_REQUEST, + ) + files_list: List = files or [] + text_files_list: List = text_files or [] + + if len(files_list) + len(text_files_list) > 1: + if content_type and content_type not in [ + "*/*", + "multipart/mixed", + "application/json", + ]: + return PlainTextResponse( + content=( + f"Conflict in media type {content_type}" + ' with response type "multipart/mixed".\n' + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + def response_generator(is_multipart): + for text_file in text_files_list: + text = text_file.file.read().decode("utf-8") + + response = pipeline_api( + text=text, + file=None, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + for file in files_list: + _file = file.file + + response = pipeline_api( + text=None, + file=_file, + filename=file.filename, + file_content_type=file.content_type, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + if content_type == "multipart/mixed": + return MultipartMixedResponse( + response_generator(is_multipart=True), + ) + else: + return response_generator(is_multipart=False) + else: + if has_text: + text_file = text_files_list[0] + text = text_file.file.read().decode("utf-8") + response = pipeline_api( + text=text, + file=None, + ) + elif has_files: + file = files_list[0] + _file = file.file + response = pipeline_api( + text=None, + file=_file, + filename=file.filename, + file_content_type=file.content_type, + ) + + return response + + +@app.get("/healthcheck", status_code=status.HTTP_200_OK) +async def healthcheck(request: Request): + return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} + + +app.include_router(router) diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_file_2.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_file_2.py new file mode 100644 index 0000000..d057684 --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_file_2.py @@ -0,0 +1,235 @@ +##################################################################### +# THIS FILE IS AUTOMATICALLY GENERATED BY UNSTRUCTURED API TOOLS. +# DO NOT MODIFY DIRECTLY +##################################################################### + +import os +from typing import List, Union +from fastapi import status, FastAPI, File, Form, Request, UploadFile, APIRouter +from fastapi.responses import PlainTextResponse +import json +from fastapi.responses import StreamingResponse +from starlette.types import Send +from base64 import b64encode +from typing import Optional, Mapping, Iterator, Tuple +import secrets + + +app = FastAPI() +router = APIRouter() + + +def is_expected_response_type(media_type, response_type): + if media_type == "application/json" and response_type not in [dict, list]: + return True + elif media_type == "text/csv" and response_type != str: + return True + else: + return False + + +# pipeline-api +def pipeline_api( + text, + file=None, + filename=None, + file_content_type=None, + response_type="application/json", + m_input2=[], +): + return { + "silly_result": " : ".join( + [ + str(len(text if text else "")), + text, + str(len(file.read()) if file else None), + filename, + str(file_content_type), + str(response_type), + str(m_input2), + ] + ) + } + + +class MultipartMixedResponse(StreamingResponse): + CRLF = b"\r\n" + + def __init__(self, *args, content_type: str = None, **kwargs): + super().__init__(*args, **kwargs) + self.content_type = content_type + + def init_headers(self, headers: Optional[Mapping[str, str]] = None) -> None: + super().init_headers(headers) + self.boundary_value = secrets.token_hex(16) + content_type = f'multipart/mixed; boundary="{self.boundary_value}"' + self.raw_headers.append((b"content-type", content_type.encode("latin-1"))) + + @property + def boundary(self): + return b"--" + self.boundary_value.encode() + + def _build_part_headers(self, headers: dict) -> bytes: + header_bytes = b"" + for header, value in headers.items(): + header_bytes += f"{header}: {value}".encode() + self.CRLF + return header_bytes + + def build_part(self, chunk: bytes) -> bytes: + part = self.boundary + self.CRLF + part_headers = { + "Content-Length": len(chunk), + "Content-Transfer-Encoding": "base64", + } + if self.content_type is not None: + part_headers["Content-Type"] = self.content_type + part += self._build_part_headers(part_headers) + part += self.CRLF + chunk + self.CRLF + return part + + async def stream_response(self, send: Send) -> None: + await send( + { + "type": "http.response.start", + "status": self.status_code, + "headers": self.raw_headers, + } + ) + async for chunk in self.body_iterator: + if not isinstance(chunk, bytes): + chunk = chunk.encode(self.charset) + chunk = b64encode(chunk) + await send( + { + "type": "http.response.body", + "body": self.build_part(chunk), + "more_body": True, + } + ) + + await send({"type": "http.response.body", "body": b"", "more_body": False}) + + +@router.post("/test-project/v1.2.3/process-text-file-2") +async def pipeline_1( + request: Request, + files: Union[List[UploadFile], None] = File(default=None), + text_files: Union[List[UploadFile], None] = File(default=None), + output_format: Union[str, None] = Form(default=None), + input2: List[str] = Form(default=[]), +): + content_type = request.headers.get("Accept") + + default_response_type = output_format or "application/json" + if not content_type or content_type == "*/*" or content_type == "multipart/mixed": + media_type = default_response_type + else: + media_type = content_type + + has_text = isinstance(text_files, list) and len(text_files) + has_files = isinstance(files, list) and len(files) + if not has_text and not has_files: + return PlainTextResponse( + content='One of the request parameters "text_files" or "files" is required.\n', + status_code=status.HTTP_400_BAD_REQUEST, + ) + files_list: List = files or [] + text_files_list: List = text_files or [] + + if len(files_list) + len(text_files_list) > 1: + if content_type and content_type not in [ + "*/*", + "multipart/mixed", + "application/json", + ]: + return PlainTextResponse( + content=( + f"Conflict in media type {content_type}" + ' with response type "multipart/mixed".\n' + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + def response_generator(is_multipart): + for text_file in text_files_list: + text = text_file.file.read().decode("utf-8") + + response = pipeline_api( + text=text, + file=None, + m_input2=input2, + response_type=media_type, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + for file in files_list: + _file = file.file + + response = pipeline_api( + text=None, + file=_file, + m_input2=input2, + response_type=media_type, + filename=file.filename, + file_content_type=file.content_type, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + if content_type == "multipart/mixed": + return MultipartMixedResponse( + response_generator(is_multipart=True), content_type=media_type + ) + else: + return response_generator(is_multipart=False) + else: + if has_text: + text_file = text_files_list[0] + text = text_file.file.read().decode("utf-8") + response = pipeline_api( + text=text, + file=None, + m_input2=input2, + response_type=media_type, + ) + elif has_files: + file = files_list[0] + _file = file.file + response = pipeline_api( + text=None, + file=_file, + m_input2=input2, + response_type=media_type, + filename=file.filename, + file_content_type=file.content_type, + ) + + if is_expected_response_type(media_type, type(response)): + return PlainTextResponse( + content=( + f"Conflict in media type {media_type}" + f" with response type {type(response)}.\n" + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + valid_response_types = ["application/json", "text/csv", "*/*"] + if media_type in valid_response_types: + return response + else: + return PlainTextResponse( + content=f"Unsupported media type {media_type}.\n", + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + +@app.get("/healthcheck", status_code=status.HTTP_200_OK) +async def healthcheck(request: Request): + return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} + + +app.include_router(router) diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_file_3.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_file_3.py new file mode 100644 index 0000000..7a6eadd --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_file_3.py @@ -0,0 +1,237 @@ +##################################################################### +# THIS FILE IS AUTOMATICALLY GENERATED BY UNSTRUCTURED API TOOLS. +# DO NOT MODIFY DIRECTLY +##################################################################### + +import os +from typing import List, Union +from fastapi import status, FastAPI, File, Form, Request, UploadFile, APIRouter +from fastapi.responses import PlainTextResponse +import json +from fastapi.responses import StreamingResponse +from starlette.types import Send +from base64 import b64encode +from typing import Optional, Mapping, Iterator, Tuple +import secrets + + +app = FastAPI() +router = APIRouter() + + +def is_expected_response_type(media_type, response_type): + if media_type == "application/json" and response_type not in [dict, list]: + return True + elif media_type == "text/csv" and response_type != str: + return True + else: + return False + + +# pipeline-api +def pipeline_api( + text, + file=None, + filename=None, + file_content_type=None, + response_type="application/json", + response_schema="isd", +): + return { + "silly_result": " : ".join( + [ + str(len(text if text else "")), + text, + str(len(file.read()) if file else None), + filename, + str(file_content_type), + str(response_type), + str(response_schema), + ] + ) + } + + +class MultipartMixedResponse(StreamingResponse): + CRLF = b"\r\n" + + def __init__(self, *args, content_type: str = None, **kwargs): + super().__init__(*args, **kwargs) + self.content_type = content_type + + def init_headers(self, headers: Optional[Mapping[str, str]] = None) -> None: + super().init_headers(headers) + self.boundary_value = secrets.token_hex(16) + content_type = f'multipart/mixed; boundary="{self.boundary_value}"' + self.raw_headers.append((b"content-type", content_type.encode("latin-1"))) + + @property + def boundary(self): + return b"--" + self.boundary_value.encode() + + def _build_part_headers(self, headers: dict) -> bytes: + header_bytes = b"" + for header, value in headers.items(): + header_bytes += f"{header}: {value}".encode() + self.CRLF + return header_bytes + + def build_part(self, chunk: bytes) -> bytes: + part = self.boundary + self.CRLF + part_headers = { + "Content-Length": len(chunk), + "Content-Transfer-Encoding": "base64", + } + if self.content_type is not None: + part_headers["Content-Type"] = self.content_type + part += self._build_part_headers(part_headers) + part += self.CRLF + chunk + self.CRLF + return part + + async def stream_response(self, send: Send) -> None: + await send( + { + "type": "http.response.start", + "status": self.status_code, + "headers": self.raw_headers, + } + ) + async for chunk in self.body_iterator: + if not isinstance(chunk, bytes): + chunk = chunk.encode(self.charset) + chunk = b64encode(chunk) + await send( + { + "type": "http.response.body", + "body": self.build_part(chunk), + "more_body": True, + } + ) + + await send({"type": "http.response.body", "body": b"", "more_body": False}) + + +@router.post("/test-project/v1.2.3/process-text-file-3") +async def pipeline_1( + request: Request, + files: Union[List[UploadFile], None] = File(default=None), + text_files: Union[List[UploadFile], None] = File(default=None), + output_format: Union[str, None] = Form(default=None), + output_schema: str = Form(default=None), +): + content_type = request.headers.get("Accept") + + default_response_type = output_format or "application/json" + if not content_type or content_type == "*/*" or content_type == "multipart/mixed": + media_type = default_response_type + else: + media_type = content_type + + default_response_schema = output_schema or "isd" + + has_text = isinstance(text_files, list) and len(text_files) + has_files = isinstance(files, list) and len(files) + if not has_text and not has_files: + return PlainTextResponse( + content='One of the request parameters "text_files" or "files" is required.\n', + status_code=status.HTTP_400_BAD_REQUEST, + ) + files_list: List = files or [] + text_files_list: List = text_files or [] + + if len(files_list) + len(text_files_list) > 1: + if content_type and content_type not in [ + "*/*", + "multipart/mixed", + "application/json", + ]: + return PlainTextResponse( + content=( + f"Conflict in media type {content_type}" + ' with response type "multipart/mixed".\n' + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + def response_generator(is_multipart): + for text_file in text_files_list: + text = text_file.file.read().decode("utf-8") + + response = pipeline_api( + text=text, + file=None, + response_type=media_type, + response_schema=default_response_schema, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + for file in files_list: + _file = file.file + + response = pipeline_api( + text=None, + file=_file, + response_type=media_type, + response_schema=default_response_schema, + filename=file.filename, + file_content_type=file.content_type, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + if content_type == "multipart/mixed": + return MultipartMixedResponse( + response_generator(is_multipart=True), content_type=media_type + ) + else: + return response_generator(is_multipart=False) + else: + if has_text: + text_file = text_files_list[0] + text = text_file.file.read().decode("utf-8") + response = pipeline_api( + text=text, + file=None, + response_type=media_type, + response_schema=default_response_schema, + ) + elif has_files: + file = files_list[0] + _file = file.file + response = pipeline_api( + text=None, + file=_file, + response_type=media_type, + response_schema=default_response_schema, + filename=file.filename, + file_content_type=file.content_type, + ) + + if is_expected_response_type(media_type, type(response)): + return PlainTextResponse( + content=( + f"Conflict in media type {media_type}" + f" with response type {type(response)}.\n" + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + valid_response_types = ["application/json", "text/csv", "*/*"] + if media_type in valid_response_types: + return response + else: + return PlainTextResponse( + content=f"Unsupported media type {media_type}.\n", + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + +@app.get("/healthcheck", status_code=status.HTTP_200_OK) +async def healthcheck(request: Request): + return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} + + +app.include_router(router) diff --git a/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_file_4.py b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_file_4.py new file mode 100644 index 0000000..27cd25d --- /dev/null +++ b/test_unstructured_api_tools/pipeline-test-project/prepline_test_project/api/process_text_file_4.py @@ -0,0 +1,251 @@ +##################################################################### +# THIS FILE IS AUTOMATICALLY GENERATED BY UNSTRUCTURED API TOOLS. +# DO NOT MODIFY DIRECTLY +##################################################################### + +import os +from typing import List, Union +from fastapi import status, FastAPI, File, Form, Request, UploadFile, APIRouter +from fastapi.responses import PlainTextResponse +import json +from fastapi.responses import StreamingResponse +from starlette.types import Send +from base64 import b64encode +from typing import Optional, Mapping, Iterator, Tuple +import secrets + + +app = FastAPI() +router = APIRouter() + + +def is_expected_response_type(media_type, response_type): + if media_type == "application/json" and response_type not in [dict, list]: + return True + elif media_type == "text/csv" and response_type != str: + return True + else: + return False + + +# pipeline-api +def pipeline_api( + text, + file=None, + filename=None, + file_content_type=None, + response_type="application/json", + response_schema="isd", + m_input1=[], + m_input2=[], +): + return { + "silly_result": " : ".join( + [ + str(len(text if text else "")), + text, + str(len(file.read()) if file else None), + filename, + str(file_content_type), + str(response_type), + str(response_schema), + str(m_input1), + str(m_input2), + ] + ) + } + + +class MultipartMixedResponse(StreamingResponse): + CRLF = b"\r\n" + + def __init__(self, *args, content_type: str = None, **kwargs): + super().__init__(*args, **kwargs) + self.content_type = content_type + + def init_headers(self, headers: Optional[Mapping[str, str]] = None) -> None: + super().init_headers(headers) + self.boundary_value = secrets.token_hex(16) + content_type = f'multipart/mixed; boundary="{self.boundary_value}"' + self.raw_headers.append((b"content-type", content_type.encode("latin-1"))) + + @property + def boundary(self): + return b"--" + self.boundary_value.encode() + + def _build_part_headers(self, headers: dict) -> bytes: + header_bytes = b"" + for header, value in headers.items(): + header_bytes += f"{header}: {value}".encode() + self.CRLF + return header_bytes + + def build_part(self, chunk: bytes) -> bytes: + part = self.boundary + self.CRLF + part_headers = { + "Content-Length": len(chunk), + "Content-Transfer-Encoding": "base64", + } + if self.content_type is not None: + part_headers["Content-Type"] = self.content_type + part += self._build_part_headers(part_headers) + part += self.CRLF + chunk + self.CRLF + return part + + async def stream_response(self, send: Send) -> None: + await send( + { + "type": "http.response.start", + "status": self.status_code, + "headers": self.raw_headers, + } + ) + async for chunk in self.body_iterator: + if not isinstance(chunk, bytes): + chunk = chunk.encode(self.charset) + chunk = b64encode(chunk) + await send( + { + "type": "http.response.body", + "body": self.build_part(chunk), + "more_body": True, + } + ) + + await send({"type": "http.response.body", "body": b"", "more_body": False}) + + +@router.post("/test-project/v1.2.3/process-text-file-4") +async def pipeline_1( + request: Request, + files: Union[List[UploadFile], None] = File(default=None), + text_files: Union[List[UploadFile], None] = File(default=None), + output_format: Union[str, None] = Form(default=None), + output_schema: str = Form(default=None), + input1: List[str] = Form(default=[]), + input2: List[str] = Form(default=[]), +): + content_type = request.headers.get("Accept") + + default_response_type = output_format or "application/json" + if not content_type or content_type == "*/*" or content_type == "multipart/mixed": + media_type = default_response_type + else: + media_type = content_type + + default_response_schema = output_schema or "isd" + + has_text = isinstance(text_files, list) and len(text_files) + has_files = isinstance(files, list) and len(files) + if not has_text and not has_files: + return PlainTextResponse( + content='One of the request parameters "text_files" or "files" is required.\n', + status_code=status.HTTP_400_BAD_REQUEST, + ) + files_list: List = files or [] + text_files_list: List = text_files or [] + + if len(files_list) + len(text_files_list) > 1: + if content_type and content_type not in [ + "*/*", + "multipart/mixed", + "application/json", + ]: + return PlainTextResponse( + content=( + f"Conflict in media type {content_type}" + ' with response type "multipart/mixed".\n' + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + def response_generator(is_multipart): + for text_file in text_files_list: + text = text_file.file.read().decode("utf-8") + + response = pipeline_api( + text=text, + file=None, + m_input1=input1, + m_input2=input2, + response_type=media_type, + response_schema=default_response_schema, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + for file in files_list: + _file = file.file + + response = pipeline_api( + text=None, + file=_file, + m_input1=input1, + m_input2=input2, + response_type=media_type, + response_schema=default_response_schema, + filename=file.filename, + file_content_type=file.content_type, + ) + if is_multipart: + if type(response) not in [str, bytes]: + response = json.dumps(response) + yield response + + if content_type == "multipart/mixed": + return MultipartMixedResponse( + response_generator(is_multipart=True), content_type=media_type + ) + else: + return response_generator(is_multipart=False) + else: + if has_text: + text_file = text_files_list[0] + text = text_file.file.read().decode("utf-8") + response = pipeline_api( + text=text, + file=None, + m_input1=input1, + m_input2=input2, + response_type=media_type, + response_schema=default_response_schema, + ) + elif has_files: + file = files_list[0] + _file = file.file + response = pipeline_api( + text=None, + file=_file, + m_input1=input1, + m_input2=input2, + response_type=media_type, + response_schema=default_response_schema, + filename=file.filename, + file_content_type=file.content_type, + ) + + if is_expected_response_type(media_type, type(response)): + return PlainTextResponse( + content=( + f"Conflict in media type {media_type}" + f" with response type {type(response)}.\n" + ), + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + valid_response_types = ["application/json", "text/csv", "*/*"] + if media_type in valid_response_types: + return response + else: + return PlainTextResponse( + content=f"Unsupported media type {media_type}.\n", + status_code=status.HTTP_406_NOT_ACCEPTABLE, + ) + + +@app.get("/healthcheck", status_code=status.HTTP_200_OK) +async def healthcheck(request: Request): + return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"} + + +app.include_router(router)