Skip to content

Commit

Permalink
feat(feature_store.py): simplify distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
tpoisonooo committed Sep 12, 2024
1 parent c1381bd commit 33cc022
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion huixiangdou/service/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import shutil
from multiprocessing import Pool
from typing import Any, Dict, List, Optional

import random
import pytoml
from loguru import logger
from torch.cuda import empty_cache
Expand Down Expand Up @@ -175,6 +175,9 @@ def build_dense(self, files: List[FileName], work_dir: str, markdown_as_txt: boo

def analyze(self, chunks: List[Chunk]):
"""Output documents length mean, median and histogram."""
MAX_COUNT = 10000
if len(chunks) > MAX_COUNT:
chunks = random.sample(chunks, MAX_COUNT)

text_lens = []
token_lens = []
Expand Down

0 comments on commit 33cc022

Please sign in to comment.