@@ -8,22 +8,19 @@ repository=823773083436.dkr.ecr.us-east-1.amazonaws.com/buildkite
8
8
9
9
# list of all the tests
10
10
tests=( \
11
- test-cpu-openmpi-py2_7-tf1_1_0-keras2_0_0-torch0_4_0-mxnet1_4_1-pyspark2_3_2 \
12
- test-cpu-openmpi-py3_6-tf1_1_0-keras2_0_0-torch0_4_0-mxnet1_4_1-pyspark2_3_2 \
13
11
test-cpu-openmpi-py2_7-tf1_6_0-keras2_1_2-torch0_4_1-mxnet1_4_1-pyspark2_3_2 \
14
12
test-cpu-openmpi-py3_6-tf1_6_0-keras2_1_2-torch0_4_1-mxnet1_4_1-pyspark2_3_2 \
15
13
test-cpu-gloo-py2_7-tf1_15_0-keras2_3_1-torch1_4_0-mxnet1_5_0-pyspark2_4_0 \
16
14
test-cpu-gloo-py3_6-tf1_15_0-keras2_3_1-torch1_4_0-mxnet1_5_0-pyspark2_4_0 \
17
- test-cpu-gloo-py3_7-tf1_15_0 -keras2_3_1-torch1_4_0 -mxnet1_5_0-pyspark2_4_0 \
15
+ test-cpu-gloo-py3_7-tf2_2_0 -keras2_3_1-torch1_5_0 -mxnet1_5_0-pyspark2_4_0 \
18
16
test-cpu-gloo-py3_8-tf2_2_0-keras2_3_1-torch1_5_0-mxnet1_5_0-pyspark2_4_0 \
19
17
test-cpu-openmpi-py3_6-tf1_14_0-keras2_2_4-torch1_2_0-mxnet1_4_1-pyspark2_4_0 \
20
- test-cpu-openmpi-gloo-py3_6-tf1_14_0-keras2_3_1-torch1_3_0-mxnet1_4_1-pyspark2_4_0 \
21
18
test-cpu-openmpi-py2_7-tf2_0_0-keras2_3_1-torch1_3_0-mxnet1_5_0-pyspark2_4_0 \
22
19
test-cpu-openmpi-py3_6-tf2_0_0-keras2_3_1-torch1_3_0-mxnet1_5_0-pyspark2_4_0 \
23
20
test-cpu-openmpi-py3_6-tfhead-kerashead-torchhead-mxnethead-pyspark2_4_0 \
24
- test-cpu-mpich-py3_6-tf1_14_0 -keras2_3_1-torch1_3_0-mxnet1_5_0-pyspark2_4_0 \
25
- test-cpu-oneccl-py3_6-tf1_14_0 -keras2_3_1-torch1_3_0-mxnet1_5_0-pyspark2_4_0 \
26
- test-cpu-oneccl-ofi-py3_6-tf1_14_0 -keras2_3_1-torch1_3_0-mxnet1_5_0-pyspark2_4_0 \
21
+ test-cpu-mpich-py3_6-tf1_15_0 -keras2_3_1-torch1_3_0-mxnet1_5_0-pyspark2_4_0 \
22
+ test-cpu-oneccl-py3_6-tf1_15_0 -keras2_3_1-torch1_3_0-mxnet1_5_0-pyspark2_4_0 \
23
+ test-cpu-oneccl-ofi-py3_6-tf1_15_0 -keras2_3_1-torch1_3_0-mxnet1_5_0-pyspark2_4_0 \
27
24
test-gpu-openmpi-py3_6-tf1_15_0-keras2_3_1-torch1_3_0-mxnet1_4_1-pyspark2_4_0 \
28
25
test-gpu-gloo-py3_6-tf1_15_0-keras2_3_1-torch1_3_0-mxnet1_4_1-pyspark2_4_0 \
29
26
test-gpu-openmpi-gloo-py3_6-tf1_15_0-keras2_3_1-torch1_3_0-mxnet1_4_1-pyspark2_4_0 \
@@ -98,15 +95,20 @@ run_mpi_pytest() {
98
95
local oneccl_env=${3:- }
99
96
oneccl_env=$( echo ${oneccl_env//:/ } )
100
97
101
- local exclude_keras_if_needed =" "
98
+ local exclude_keras =" "
102
99
if [[ ${test} == * " tf2_" * ]] || [[ ${test} == * " tfhead" * ]]; then
103
100
# TODO: support for Keras + TF 2.0 and TF-Keras 2.0
104
- exclude_keras_if_needed =" | sed 's/test_keras.py//g' | sed 's/test_tensorflow_keras.py//g'"
101
+ exclude_keras =" | sed 's/test_keras.py//g' | sed 's/test_tensorflow_keras.py//g'"
105
102
else
106
- exclude_keras_if_needed =" | sed 's/[a-z_]*tensorflow2[a-z_.]*//g'"
103
+ exclude_keras =" | sed 's/[a-z_]*tensorflow2[a-z_.]*//g'"
107
104
fi
108
105
109
- local exclude_interactiverun=" | sed 's/test_interactiverun.py//g' | sed 's/test_spark_keras.py//g' | sed 's/test_spark_torch.py//g'"
106
+ local exclude_elastic=" "
107
+ if [[ ${test} == * " py2_" * ]]; then
108
+ exclude_elastic=" | sed 's/test_elastic[a-z_.]*//g'"
109
+ fi
110
+
111
+ local excluded_tests=" | sed 's/test_interactiverun.py//g' | sed 's/test_spark_keras.py//g' | sed 's/test_spark_torch.py//g'"
110
112
111
113
# Spark and Run test does not need to be executed with horovodrun, but we still run it below.
112
114
local exclude_standalone_test=" | sed 's/test_spark.py//g' | sed 's/test_run.py//g'"
@@ -121,7 +123,7 @@ run_mpi_pytest() {
121
123
# pytests have 4x GPU use cases and require a separate queue
122
124
run_test " ${test} " " ${queue} " \
123
125
" :pytest: Run PyTests (${test} )" \
124
- " bash -c \" ${oneccl_env} cd /horovod/test && (echo test_*.py ${exclude_keras_if_needed } ${exclude_interactiverun } ${exclude_standalone_test} | xargs -n 1 \\\$ (cat /mpirun_command) pytest -v --capture=no) && pytest --forked -v --capture=no ${standalone_tests} \" "
126
+ " bash -c \" ${oneccl_env} cd /horovod/test && (echo test_*.py ${exclude_keras } ${exclude_elastic} ${excluded_tests } ${exclude_standalone_test} | xargs -n 1 \\\$ (cat /mpirun_command) pytest -v --capture=no) && pytest --forked -v --capture=no ${standalone_tests} \" "
125
127
}
126
128
127
129
run_mpi_integration () {
@@ -156,7 +158,7 @@ run_mpi_integration() {
156
158
fi
157
159
158
160
run_test " ${test} " " ${queue} " \
159
- " :python : Test PyTorch MNIST (${test} )" \
161
+ " :fire : Test PyTorch MNIST (${test} )" \
160
162
" bash -c \" ${oneccl_env} \\\$ (cat /mpirun_command) python /horovod/examples/pytorch_mnist.py\" "
161
163
162
164
run_test " ${test} " " ${queue} " \
@@ -165,7 +167,7 @@ run_mpi_integration() {
165
167
166
168
# tests that should be executed only with the latest release since they don't test
167
169
# a framework-specific functionality
168
- if [[ ${test} == * " tf1_14_0 " * ]]; then
170
+ if [[ ${test} == * " tf1_15_0 " * ]]; then
169
171
run_test " ${test} " " ${queue} " \
170
172
" :muscle: Test Stall (${test} )" \
171
173
" bash -c \" ${oneccl_env} \\\$ (cat /mpirun_command) python /horovod/test/test_stall.py\" "
@@ -206,12 +208,17 @@ run_gloo_pytest() {
206
208
local test=$1
207
209
local queue=$2
208
210
209
- local exclude_keras_if_needed =" "
211
+ local exclude_keras =" "
210
212
if [[ ${test} == * " tf2_" * ]] || [[ ${test} == * " tfhead" * ]]; then
211
213
# TODO: support for Keras + TF 2.0 and TF-Keras 2.0
212
- exclude_keras_if_needed =" | sed 's/test_keras.py//g' | sed 's/test_tensorflow_keras.py//g'"
214
+ exclude_keras =" | sed 's/test_keras.py//g' | sed 's/test_tensorflow_keras.py//g'"
213
215
else
214
- exclude_keras_if_needed=" | sed 's/[a-z_]*tensorflow2[a-z_.]*//g'"
216
+ exclude_keras=" | sed 's/[a-z_]*tensorflow2[a-z_.]*//g'"
217
+ fi
218
+
219
+ local exclude_elastic=" "
220
+ if [[ ${test} == * " py2_" * ]]; then
221
+ exclude_elastic=" | sed 's/test_elastic[a-z_.]*//g'"
215
222
fi
216
223
217
224
# These are tested as integration style tests.
@@ -229,7 +236,7 @@ run_gloo_pytest() {
229
236
230
237
run_test " ${test} " " ${queue} " \
231
238
" :pytest: Run PyTests (${test} )" \
232
- " bash -c \" cd /horovod/test && (echo test_*.py ${exclude_keras_if_needed } ${excluded_tests} ${exclude_standalone_test} | xargs -n 1 horovodrun -np 2 -H localhost:2 --gloo pytest -v --capture=no) && pytest --forked -v --capture=no ${standalone_tests} \" "
239
+ " bash -c \" cd /horovod/test && (echo test_*.py ${exclude_keras} ${exclude_elastic } ${excluded_tests} ${exclude_standalone_test} | xargs -n 1 horovodrun -np 2 -H localhost:2 --gloo pytest -v --capture=no) && pytest --forked -v --capture=no ${standalone_tests} \" "
233
240
}
234
241
235
242
run_gloo_integration () {
@@ -256,12 +263,24 @@ run_gloo_integration() {
256
263
fi
257
264
258
265
run_test " ${test} " " ${queue} " \
259
- " :python : Test PyTorch MNIST (${test} )" \
266
+ " :fire : Test PyTorch MNIST (${test} )" \
260
267
" horovodrun -np 2 -H localhost:2 --gloo python /horovod/examples/pytorch_mnist.py"
261
268
262
269
run_test " ${test} " " ${queue} " \
263
270
" :muscle: Test MXNet MNIST (${test} )" \
264
271
" horovodrun -np 2 -H localhost:2 --gloo python /horovod/examples/mxnet_mnist.py"
272
+
273
+ # Elastic
274
+ if [[ ${test} == * " py3_" * ]]; then
275
+ local elastic_tensorflow=" test_elastic_tensorflow.py"
276
+ if [[ ${test} == * " tf2_" * ]] || [[ ${test} == * " tfhead" * ]]; then
277
+ elastic_tensorflow=" test_elastic_tensorflow2.py"
278
+ fi
279
+
280
+ run_test " ${test} " " ${queue} " \
281
+ " :factory: Elastic Tests (${test} )" \
282
+ " bash -c \" cd /horovod/test/integration && pytest -v --log-cli-level 10 --capture=no test_elastic_torch.py ${elastic_tensorflow} \" "
283
+ fi
265
284
}
266
285
267
286
run_gloo () {
@@ -322,7 +341,7 @@ run_single_integration() {
322
341
fi
323
342
324
343
run_test " ${test} " " ${queue} " \
325
- " :python : Single PyTorch MNIST (${test} )" \
344
+ " :fire : Single PyTorch MNIST (${test} )" \
326
345
" bash -c \" ${oneccl_env} python /horovod/examples/pytorch_mnist.py --epochs 3\" "
327
346
328
347
run_test " ${test} " " ${queue} " \
0 commit comments