zylon-ai · sgresham · Mar 15, 2024 · Mar 20, 2024 · imartinez · Mar 15, 2024
diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py
@@ -7,7 +7,7 @@
 from private_gpt.settings.settings import Settings
 
 logger = logging.getLogger(__name__)
-
+import torch
 
 @singleton
 class EmbeddingComponent:
@@ -28,9 +28,31 @@ def __init__(self, settings: Settings) -> None:
                         "Local dependencies not found, install with `poetry install --extras embeddings-huggingface`"
                     ) from e
 
+                # Get the number of available GPUs
+                num_gpus = torch.cuda.device_count()
+
+                if num_gpus > 0:
+                    print("Available CUDA devices:")
+                    for i in range(num_gpus):
+                        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
+                else:
+                    print("No CUDA devices available. Switching to CPU.")
+
+                # Check if CUDA is available
+                if torch.cuda.is_available():
+                    # If settings.embedding.gpu is specified, use that GPU index
+                    if hasattr(settings, 'huggingface') and hasattr(settings.huggingface, 'gpu_type'):
+                        device = torch.device(f"{settings.huggingface.gpu_type}:{settings.huggingface.gpu_number}")
+                    else:
+                        device = torch.device('cuda:0')
+                else:
+                    # If CUDA is not available, use CPU
+                    device = torch.device("cpu")
+                print("Embedding Device: ",device)
                 self.embedding_model = HuggingFaceEmbedding(
                     model_name=settings.huggingface.embedding_hf_model_name,
                     cache_folder=str(models_cache_path),
+                    device=device
                 )
             case "sagemaker":
                 try:

diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
@@ -1,4 +1,4 @@
-from typing import Literal
+from typing import Literal, Optional
 
 from pydantic import BaseModel, Field
 
@@ -145,6 +145,13 @@ class HuggingFaceSettings(BaseModel):
     embedding_hf_model_name: str = Field(
         description="Name of the HuggingFace model to use for embeddings"
     )
+    gpu_type: Optional[Literal["cuda","cpu"]] = Field(
+        description="GPU typedevice for embedding, can be 'cuda' or cpu"
+    )
+    gpu_number: int = Field(
+        0,
+        description="GPU device number for embedding, will be presented to torch like 'cuda:x'"
+    )
 
 
 class EmbeddingSettings(BaseModel):

diff --git a/settings.yaml b/settings.yaml
@@ -54,9 +54,11 @@ embedding:
   # Should be matching the value above in most cases
   mode: huggingface
   ingest_mode: simple
-
+    
 huggingface:
   embedding_hf_model_name: BAAI/bge-small-en-v1.5
+  gpu_type: cuda      #GPU typedevice for embedding, can be 'cuda', rocm or cpu". defaults to cuda[0], or cpu if cuda not available
+  gpu_number: 1       #Directly select a device, normally 0 if only a single GPU
 
 vectorstore:
   database: qdrant