-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdvc.yaml
40 lines (32 loc) · 986 Bytes
/
dvc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
stages:
start-hadoop:
cmd:
- start-dfs.sh
- start-yarn.sh
start-spark:
cmd:
- start-master.sh
install_library:
cmd:
- pip3 install -r ~/project-icta/project-test/requirement.txt --break-system-packages
ingestion_data:
cmd:
- cd ~/project-icta/project-test/
- python3 src/icta/pipeline/stage_01_ingestion_data.py
deps:
- src/icta/pipeline/stage_01_ingestion_data.py
- src/icta/components/ingestion_data.py
outs:
- datasets/rfm-customer.csv
create_and_put_data:
cmd:
- cd ~hadoop && bin/hdfs dfs -mkdir /nthaihoc/data
- bin/hdfs dfs -put -f ~/project-icta/project-test/datasets/rfm-customer.csv /nthaihoc/data
train_and_evaluate_model:
cmd:
- cd ~
- spark-submit --master yarn --deploy-mode client main.py
deps:
- src/icta/components/spark_session.py
- src/icta/components/train_model.py
- src/icta/components/evaluate_model.py