-
Notifications
You must be signed in to change notification settings - Fork 9
/
setup.sh
executable file
·299 lines (265 loc) · 8.53 KB
/
setup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
#!/bin/bash
set -e
if [ "$PPID" -le 1 ]; then
printf '\033[36m[INFO] Started from process %d. Re-entry for protection.\033[0m\n' "$PPID" >&2
$0 $@
exit $!
fi
trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM EXIT
# ================================================================
# Environment Configuration
# ================================================================
export ROOT_DIR="$(realpath -e "$(dirname "$0")")"
export STAGE='/etc/roaster/stage'
. <(sed 's/^\(..*\)/export DISTRO_\1/' '/etc/os-release')
case "$DISTRO_ID" in
'centos' | 'fedora' | 'rhel' | 'scientific')
export RPM_CACHE_REPO="/etc/yum.repos.d/codingcafe-mirror.repo"
;;
esac
# ----------------------------------------------------------------
export IS_CONTAINER="$("$ROOT_DIR/inside_container.sh" && echo 'true' || echo 'false')"
[ "$IS_CONTAINER" ]
if ! "$IS_CONTAINER" && [ "$(whoami)" = 'root' ]; then
printf '\033[31m[ERROR] Please use a non-root user with sudo permission.\033[0m\n' >&2
exit 1
fi
# ----------------------------------------------------------------
export SCRATCH='/tmp/scratch'
for candidate in '/media/Scratch'; do
if ! $IS_CONTAINER && [ -d "$candidate" ]; then
export SCRATCH="$(mktemp -p "$candidate")"
break
fi
done
# ================================================================
# Infomation
# ================================================================
echo '================================================================'
date
echo '----------------------------------------------------------------'
echo ' CodingCafe CentOS Deployment '
$IS_CONTAINER && \
echo ' -- In Container -- '
echo '----------------------------------------------------------------'
echo -n '| Node | '
uname -no
echo -n '| Kernel | '
uname -sr
echo -n '| Platform | '
uname -m
echo '| GPU | '
which nvidia-smi >/dev/null 2>/dev/null && nvidia-smi -L | sed 's/^/| ******| /'
echo '| Sensor | '
which sensors >/dev/null 2>/dev/null && sensors | sed 's/^/| ******| /'
echo -n '| User | '
whoami
echo -n '| | '
id
echo '----------------------------------------------------------------'
df -h --sync --output=target,fstype,size,used,avail,pcent,source | sed 's/^/| /'
echo '================================================================'
echo
echo
# ================================================================
# Cache sudo Credentials
# ================================================================
if ! which sudo; then
if [ "_$(whoami)" != '_root' ]; then
printf '\033[31m[ERROR] Insufficient permission to bootstrap. Please install sudo manually or provide root access.\033[0m\n' >&2
exit 1
fi
case "$DISTRO_ID" in
'centos' | 'fedora' | 'rhel')
which dnf >/dev/null 2>&1 && dnf makecache -y || yum makecache -y
which dnf >/dev/null 2>&1 && dnf install -y sudo || yum install -y sudo
;;
'debian' | 'linuxmint' | 'ubuntu' | 'scientific')
apt-get update -o 'DPkg::Lock::Timeout=3600' -y
DEBIAN_FRONTEND=noninteractive apt-get install -o 'DPkg::Lock::Timeout=3600' -y sudo
;;
esac
fi
if ! which ps || ! which xargs; then
case "$DISTRO_ID" in
'centos' | 'fedora' | 'rhel' | 'scientific')
sudo which dnf >/dev/null 2>&1 && dnf makecache -y || yum makecache -y
sudo which dnf >/dev/null 2>&1 && sudo dnf install -y findutils procps-ng || sudo yum install -y findutils procps-ng
;;
'debian' | 'linuxmint' | 'ubuntu')
sudo apt-get update -o 'DPkg::Lock::Timeout=3600' -y
sudo DEBIAN_FRONTEND=noninteractive apt-get install -o 'DPkg::Lock::Timeout=3600' -y findutils procps
;;
esac
fi
. "$ROOT_DIR/pkgs/utils/sudo_ping_daemon.sh"
sudo -llp "
----------------------------------------------------------------
We would like to pre-activate a sudo session.
Please provide your password.
Session may still timeout, depending on system configuration.
You will be asked for password again at that time.
----------------------------------------------------------------
[sudo] password for %p: "
# ================================================================
# Configure Scratch Directory
# ================================================================
rm -rvf "$SCRATCH"
mkdir -p "$SCRATCH"
# $IS_CONTAINER || mount -t tmpfs -o size=100% tmpfs $SCRATCH
pushd "$SCRATCH"
# ================================================================
# Initialize Setup Stage
# ================================================================
[ -d "$STAGE" ] && [ $# -eq 0 ] || ( set -xe
sudo rm -rvf "$STAGE"
sudo mkdir -p "$(dirname "$STAGE")/.$(basename "$STAGE")"
cd $_
[ $# -gt 0 ] && sudo touch $@ || sudo touch repo font pkg-stable pkg-skip pkg-all fpm auth vim tmux tex ss trojan intel nasm lm-sensors lz4 zstd cmake hiredis ccache c-ares axel ipt python-3.{7,8,9,10,11} cuda gdrcopy ucx ompi llvm-{gcc,clang} boost jemalloc eigen openblas gtest benchmark gflags glog snappy jsoncpp rapidjson simdjson utf8proc pugixml protobuf nsync grpc catch2 pybind libpng x264 x265 mkl-dnn ispc halide xgboost sentencepiece opencv leveldb rocksdb lmdb nvcodec ffmpeg onnx pytorch torchvision apex ort
sync || true
cd "$SCRATCH"
sudo mv -vf "$(dirname "$STAGE")/.$(basename "$STAGE")" $STAGE
)
# Refer to "man ccache" for supported units.
if which ccache 2>/dev/null >/dev/null; then
! $IS_CONTAINER || ccache -M 128Gi
ccache -z
fi
for i in $(echo "
env/mirror
env/cred
repo
env/pkg
font
pkg
fpm
firewall
auth
vim
tmux
slurm
nagios
shadowsocks
trojan
texlive
intel
nasm
lm-sensors
lz4
zstd
cmake
hiredis
ccache
c-ares
axel
ipt
python
cuda
nvcodec
gdrcopy
ucx
openmpi
nccl
argyll
llvm
boost
jemalloc
eigen
openblas
gtest
benchmark
gflags
glog
snappy
jsoncpp
rapidjson
simdjson
utf8proc
pugixml
protobuf
nsync
catch2
pybind
grpc
libpng
libgdiplus
x264
x265
mkl-dnn
ispc
halide
xgboost
sentencepiece
opencv
leveldb
rocksdb
lmdb
nvcodec
ffmpeg
onnx
caffe
pytorch
torchvision
apex
ort
"); do
. "$ROOT_DIR/pkgs/$i.sh"
done
# ================================================================
# Cleanup
# ================================================================
popd
rm -rvf "$SCRATCH"
sudo ldconfig
which ccache 2>/dev/null >/dev/null && ccache -s
if $IS_CONTAINER; then
case "$DISTRO_ID" in
'centos' | 'fedora' | 'rhel' | 'scientific')
sudo which dnf >/dev/null 2>&1 && sudo dnf autoremove -y || sudo yum autoremove -y
! sudo which dnf >/dev/null 2>&1 || sudo dnf clean all --enablerepo='*'
sudo yum clean all
sudo rm -rf /var/cache/yum
# DNF may log GB of data here.
sudo rm -rf /var/log/dnf.librepo.log
;;
'debian' | 'linuxmint' | 'ubuntu')
sudo apt-get autoremove -o 'DPkg::Lock::Timeout=3600' -y
sudo apt-get clean -o 'DPkg::Lock::Timeout=3600'
sudo rm -rf /var/lib/apt/lists/*
;;
esac
fi
# ----------------------------------------------------------------
echo
echo
echo '================================================================'
date
echo '----------------------------------------------------------------'
echo ' Completed! '
$IS_CONTAINER && \
echo ' -- In Container -- '
echo '----------------------------------------------------------------'
echo -n '| Node | '
uname -no
echo -n '| Kernel | '
uname -sr
echo -n '| Platform | '
uname -m
echo '| GPU | '
which nvidia-smi >/dev/null 2>/dev/null && nvidia-smi -L | sed 's/^/| ******| /'
echo '| Sensor | '
which sensors >/dev/null 2>/dev/null && sensors | sed 's/^/| ******| /'
echo -n '| User | '
whoami
echo -n '| | '
id
echo '----------------------------------------------------------------'
if which ccache >/dev/null 2>/dev/null; then
ccache -s | sed 's/^/| /'
# $IS_CONTAINER && ccache -Cz >/dev/null 2>/dev/null
echo '----------------------------------------------------------------'
fi
df -h --sync --output=target,fstype,size,used,avail,pcent,source | sed 's/^/| /'
echo '================================================================'
# ----------------------------------------------------------------
trap - SIGTERM SIGINT EXIT