Skip to content

Commit

Permalink
Migrate dataset reference back to DATA_DIR
Browse files Browse the repository at this point in the history
  • Loading branch information
skyfenton committed Feb 12, 2025
1 parent 9667326 commit aee9881
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
6 changes: 3 additions & 3 deletions mediabridge/data_processing/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
Rating,
get_engine,
)
from mediabridge.definitions import FULL_TITLES_TXT, OUTPUT_DIR, PROJECT_DIR
from mediabridge.definitions import DATA_DIR, FULL_TITLES_TXT, OUTPUT_DIR

GLOB = "mv_00*.txt"

Expand Down Expand Up @@ -50,8 +50,8 @@ def _etl_movie_title() -> None:

def _etl_user_rating(max_rows: int) -> None:
"""Writes out/rating.csv.gz if needed, then populates rating table from it."""
training_folder = PROJECT_DIR.parent / "Netflix-Dataset/training_set/training_set"
diagnostic = "Please clone https://github.com/deesethu/Netflix-Dataset.git"
training_folder = DATA_DIR / "training_set"
diagnostic = "Please run `pipenv run dev init` to download the necessary dataset"
assert training_folder.exists(), diagnostic
path_re = re.compile(r"/mv_(\d{7}).txt$")
is_initial = True
Expand Down
4 changes: 2 additions & 2 deletions mediabridge/data_processing/wiki_to_netflix_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
EXPECTED_SPARQL_QUERY,
WIKIDATA_RESPONSE_THE_ROOM,
)
from mediabridge.definitions import FULL_TITLES_TXT, PROJECT_DIR
from mediabridge.definitions import DATA_DIR, FULL_TITLES_TXT
from mediabridge.schemas.movies import EnrichedMovieData, MovieData
from tests.util.logging_util import silence_logging

TITLES_TXT = PROJECT_DIR / "tests/test-data/movie_titles_test.txt"
TITLES_TXT = DATA_DIR / "movie_titles.txt"
TITLES_CSV = TITLES_TXT.with_suffix(".csv")


Expand Down

0 comments on commit aee9881

Please sign in to comment.