-
Notifications
You must be signed in to change notification settings - Fork 392
/
Copy pathtemplate.json.jinja
64 lines (60 loc) · 1.76 KB
/
template.json.jinja
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{%- set name = "mnist" %}
{%- set image = "" %}
{%- set worker_replicas = 3 %}
{%- set ps_replicas = 2 -%}
{%- set dns_domain = "marathon.mesos" %}
{%- set port = 2333 %}
{%- set train_dir = "hdfs://namenode/train_dir" %}
{%- set tensorboard = true %}
{%- set cpu = 2 %}
{%- set mem = 4096 %}
{%- set replicas = {"worker": worker_replicas, "ps": ps_replicas} -%}
{%- macro worker_hosts() -%}
{%- for i in range(worker_replicas) -%}
{%- if not loop.first -%},{%- endif -%}
worker-{{ i }}-{{ name }}.{{ dns_domain }}:{{ port + i }}
{%- endfor -%}
{%- endmacro -%}
{%- macro ps_hosts() -%}
{%- for i in range(ps_replicas) -%}
{%- if not loop.first -%},{%- endif -%}
ps-{{ i }}-{{ name }}.{{ dns_domain }}:{{ port + worker_replicas + i }}
{%- endfor -%}
{%- endmacro -%}
{
"id": "{{ name }}",
"apps": [
{%- for job in ["worker", "ps"] -%}
{%- for i in range(replicas[job]) %}
{
"id": "{{ job }}-{{ i }}",
"container": {
"docker": {
"image": "{{ image }}"
},
"network": "HOST",
"type": "MESOS"
},
"args": ["--worker_hosts", "{{ worker_hosts() }}", "--ps_hosts", "{{ ps_hosts() }}", "--job_name", "{{ job }}", "--task_index", "{{ i }}", "--train_dir", "{{ train_dir }}", "--sync_replica", "True", "--train_steps", "2000"],
"cpus": {{ cpu }},
"mem": {{ mem }},
"instances": 1
}{%- if not loop.last or tensorboard %},{% endif %}
{%- endfor %}
{%- endfor %}
{
"id": "tensorboard",
"container": {
"docker": {
"image": "{{ image }}"
},
"network": "HOST",
"type": "MESOS"
},
"cmd": "tensorboard --logdir {{ train_dir }}",
"cpus": {{ cpu }},
"mem": {{ mem }},
"instances": 1
}
]
}