diff --git a/README.md b/README.md index b2b1b18..4e80f24 100644 --- a/README.md +++ b/README.md @@ -48,8 +48,7 @@ PandaETL is an open-source, no-code ETL (Extract, Transform, Load) tool designed 3. Create a `.env` file in the frontend directory with the following: ```bash - NEXT_PUBLIC_API_URL=http://localhost:3000/api/v1 - NEXT_PUBLIC_STORAGE_URL=http://localhost:3000/api/assets + NEXT_PUBLIC_API_URL=http://localhost:5328 ``` or copy the `.env.example` file to `.env` diff --git a/backend/.env.example b/backend/.env.example index aaefd3c..bb27dd7 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -3,5 +3,6 @@ PANDAETL_SERVER_URL="https://api.panda-etl.ai/" # optional API_SERVER_URL="https://api.domer.ai" # optional USE_OPENAI_EMBEDDINGS=false # optional OPENAI_API_KEY=sk-xxxxxxxxxxxx # optional -CHROMA_BATCH_SIZE=5 # optional +CHROMA_BATCH_SIZE=500 # optional MAX_FILE_SIZE=20971520 # optional +PANDAETL_API_KEY=xxx-xxx-xxx-xxx # optional if you already have a PandaETL api key diff --git a/backend/Dockerfile b/backend/Dockerfile index 9cabb40..0d78656 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -2,11 +2,6 @@ FROM python:3.11-slim WORKDIR /app -# Install system dependencies -RUN apt-get update && apt-get install -y \ - build-essential \ - && rm -rf /var/lib/apt/lists/* - # Install Poetry RUN pip install poetry @@ -19,9 +14,6 @@ RUN poetry config virtualenvs.create false # Install dependencies RUN poetry install -# run migrations -RUN make migrate - # Expose the port the app runs on EXPOSE 8000 diff --git a/backend/app/config.py b/backend/app/config.py index df11117..e721a15 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,6 +1,7 @@ import os from dotenv import load_dotenv from pydantic_settings import BaseSettings +from typing import Optional # Load environment variables from .env file load_dotenv() @@ -30,6 +31,9 @@ class Settings(BaseSettings): chat_extraction_doc_threshold: float = 0.5 chat_extraction_max_docs: int = 50 + # PandaETL api key + pandaetl_api_key: Optional[str] = None + class Config: env_file = ".env" diff --git a/backend/app/main.py b/backend/app/main.py index 100a033..3417716 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,6 +1,6 @@ from app import models from app.processing.process_queue import submit_process -from app.repositories import process_repository, project_repository +from app.repositories import process_repository, project_repository, user_repository from fastapi import FastAPI from fastapi.staticfiles import StaticFiles from .database import SessionLocal @@ -8,6 +8,7 @@ from app.processing.file_preprocessing import process_file from .config import settings from .api import v1_router +from app.schemas.user import APIKeyRequest # Initialize the FastAPI app app = FastAPI() @@ -57,6 +58,26 @@ def startup_pending_processes(): print(f"Error in startup_pending_processes: {e}") +def setup_user(): + try: + with SessionLocal() as db: + + if settings.pandaetl_api_key: + user = user_repository.get_users(db, n=1) + api_key = user_repository.get_user_api_key(db) + + if not user: + user = user_repository.create_user(db, APIKeyRequest(email="test@pandai-etl.ai")) + + if not api_key: + user_repository.add_user_api_key(db, user.id, settings.pandaetl_api_key) + + print("Successfully set up user from api key") + + except Exception as e: + print(f"Error in setup user from api key: {e}") + + app.add_middleware( CORSMiddleware, allow_origins=["*"], # Allow all origins (for development) @@ -69,6 +90,6 @@ def startup_pending_processes(): app.include_router(v1_router, prefix="/v1") - +setup_user() startup_pending_processes() startup_file_preprocessing() diff --git a/backend/app/processing/file_preprocessing.py b/backend/app/processing/file_preprocessing.py index 8207912..e2ca247 100644 --- a/backend/app/processing/file_preprocessing.py +++ b/backend/app/processing/file_preprocessing.py @@ -44,7 +44,7 @@ def process_segmentation(project_id: int, asset_id: int, asset_file_name: str): vectorstore.add_docs( docs=docs, metadatas=metadatas, - batch_size=100 + batch_size=settings.chroma_batch_size ) project_repository.update_asset_content_status( diff --git a/backend/app/processing/process_queue.py b/backend/app/processing/process_queue.py index fa1c3d1..d5fe9ee 100644 --- a/backend/app/processing/process_queue.py +++ b/backend/app/processing/process_queue.py @@ -406,4 +406,4 @@ def vectorize_extraction_process_step(project_id: int, process_step_id: int, fil ] # Add documents to vectorstore - vectorstore.add_docs(docs=docs, metadatas=metadatas, batch_size=100) + vectorstore.add_docs(docs=docs, metadatas=metadatas, batch_size=settings.chroma_batch_size) diff --git a/backend/tests/processing/test_process_queue.py b/backend/tests/processing/test_process_queue.py index 4796ef5..b7d2d5c 100644 --- a/backend/tests/processing/test_process_queue.py +++ b/backend/tests/processing/test_process_queue.py @@ -216,7 +216,7 @@ def test_vectorize_extraction_process_step_single_reference(mock_chroma_db): mock_vectorstore.add_docs.assert_called_once_with( docs=expected_docs, metadatas=expected_metadatas, - batch_size=100 + batch_size=5 ) @patch('app.processing.process_queue.ChromaDB') @@ -263,7 +263,7 @@ def test_vectorize_extraction_process_step_multiple_references_concatenation(moc mock_vectorstore.add_docs.assert_called_once_with( docs=expected_docs, metadatas=expected_metadatas, - batch_size=100 + batch_size=5 ) @patch('app.processing.process_queue.ChromaDB') # Replace with the correct module path diff --git a/frontend/.env.example b/frontend/.env.example index 3b74913..ace77db 100644 --- a/frontend/.env.example +++ b/frontend/.env.example @@ -1,4 +1,4 @@ -NEXT_PUBLIC_API_URL=http://localhost:3000/api/v1 -NEXT_PUBLIC_STORAGE_URL=http://localhost:3000/api/assets +NEXT_PUBLIC_API_URL=http://localhost:5328 NEXT_PUBLIC_MIXPANEL_TOKEN=f2e8a71ab2bde33ebf346c5abf6ba9fa NEXT_PUBLIC_ROLLBAR_ACCESS_TOKEN=0df0bee895044430880278e2b2a5b2d2 +# NEXT_PUBLIC_BACKEND_URL=http://backend:5328 # Uncomment this if you're working with a docker setup diff --git a/frontend/next.config.mjs b/frontend/next.config.mjs index d3dba14..b045806 100644 --- a/frontend/next.config.mjs +++ b/frontend/next.config.mjs @@ -1,17 +1,5 @@ const nextConfig = { swcMinify: false, // TODO - track and remove this later: https://github.com/wojtekmaj/react-pdf/issues/1822 - async rewrites() { - return [ - // { - // source: "/api/:path*", - // destination: "http://localhost:5328/:path*", - // }, - { - source: "/assets/:path*", - destination: "http://localhost:5328/assets/:path*", - }, - ]; - }, }; export default nextConfig; diff --git a/frontend/src/constants.ts b/frontend/src/constants.ts index 81c1a93..5d393e0 100644 --- a/frontend/src/constants.ts +++ b/frontend/src/constants.ts @@ -1,3 +1,3 @@ -export const BASE_API_URL = process.env.NEXT_PUBLIC_API_URL; -export const BASE_STORAGE_URL = process.env.NEXT_PUBLIC_STORAGE_URL; +export const BASE_API_URL = `${process.env.NEXT_PUBLIC_API_URL}/v1`; +export const BASE_STORAGE_URL = `${process.env.NEXT_PUBLIC_API_URL}/assets`; export const MAX_FILE_SIZE = 20 * 1024 * 1024; // 20MB in bytes diff --git a/frontend/src/middleware.ts b/frontend/src/middleware.ts index 741836d..71b0902 100644 --- a/frontend/src/middleware.ts +++ b/frontend/src/middleware.ts @@ -1,12 +1,24 @@ import { NextResponse } from "next/server"; import type { NextRequest } from "next/server"; -import { GetAPIKey } from "@/services/user"; import localStorage from "@/lib/localStorage"; +import { APIKeyData } from "./interfaces/user"; +import axios from "axios"; +import { GetAPIKey } from "./services/user"; export async function middleware(request: NextRequest) { let apiKey = null; try { - apiKey = await GetAPIKey(); + const dockerBackendUrl = process.env.NEXT_PUBLIC_BACKEND_URL; + + if (dockerBackendUrl) { + console.log(dockerBackendUrl); + const response = await axios.get<{ data: APIKeyData }>( + `${dockerBackendUrl}/v1/user/get-api-key` + ); + apiKey = { data: { api_key: response.data.data.key } }; + } else { + apiKey = await GetAPIKey(); + } } catch (error) { console.error("Error fetching API key:", error); return NextResponse.redirect(new URL("/api-key-setup", request.url));