Skip to content

Commit

Permalink
Add sampling parameters to env
Browse files Browse the repository at this point in the history
  • Loading branch information
aravind10x committed Sep 21, 2024
1 parent c2b9979 commit 5ab296b
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
5 changes: 4 additions & 1 deletion .env-Sample
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,7 @@ RUN_CONFIG_IS_ASYNC="true"
NEO4J_URI=bolt://localhost:7687## use bolt://neo4j:7687 if using docker for ragbuilder
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=ragbuilder
NEO4J_LOAD=true # set to false if graph is already loaded and you don't want to reload
NEO4J_LOAD=true # set to false if graph is already loaded and you don't want to reload
SAMPLING_RATIO=0.10 # Sampling ratio: If set to 0.10, ~10% of original data will be sampled, and used for RAG building.
SAMPLING_SIZE_THRESHOLD=750_000 # If your source data is larger than this threshold, RAGBuilder will default to sampling.
SAMPLING_FILE_SIZE_THRESHOLD=500_000 # When sampling directories, individual files that are larger this threshold, will be sampled at file level.
10 changes: 7 additions & 3 deletions src/ragbuilder/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,19 @@

setup_logging()
logger = logging.getLogger("ragbuilder")
SAMPLING_RATIO = float(os.getenv('SAMPLING_RATIO', '0.1'))
SAMPLING_SIZE_THRESHOLD = int(os.getenv('SAMPLING_SIZE_THRESHOLD', '750_000'))
SAMPLING_FILE_SIZE_THRESHOLD = int(os.getenv('SAMPLING_FILE_SIZE_THRESHOLD', '500_000'))


class DataSampler:
def __init__(
self,
data_source: str,
enable_sampling: bool = True,
sample_size_threshold: int = 750_000, #1_000_000_000,
sample_ratio: float = 0.1,
file_size_threshold: int = 500_000 #1_000_000
sample_size_threshold: int = SAMPLING_SIZE_THRESHOLD,
sample_ratio: float = SAMPLING_RATIO,
file_size_threshold: int = SAMPLING_FILE_SIZE_THRESHOLD
):
self.data_source = data_source
self.enable_sampling = enable_sampling
Expand Down

0 comments on commit 5ab296b

Please sign in to comment.