Skip to content

Commit b72722b

Browse files
authored
Add Intel(R) MPI support for horovodrun (horovod#2374)
Signed-off-by: Yana Shchyokotova <[email protected]>
1 parent 6889773 commit b72722b

File tree

11 files changed

+226
-95
lines changed

11 files changed

+226
-95
lines changed

.buildkite/gen-pipeline.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -169,25 +169,25 @@ run_mpi_integration() {
169169
":muscle: MPI Stall (${test})" \
170170
"bash -c \"${oneccl_env} \\\$(cat /mpirun_command) python /horovod/test/integration/test_stall.py\""
171171

172-
if [[ ${test} == *"openmpi"* ]]; then
172+
if [[ ${test} == *"openmpi"* ]] || [[ ${test} == *"oneccl"* ]]; then
173173
run_test "${test}" "${queue}" \
174174
":terminal: MPI Horovodrun (${test})" \
175-
"horovodrun -np 2 -H localhost:2 python /horovod/examples/tensorflow/tensorflow_mnist.py"
175+
"bash -c \"${oneccl_env} horovodrun -np 2 -H localhost:2 python /horovod/examples/tensorflow/tensorflow_mnist.py\""
176176
run_test "${test}" "${queue}" \
177177
":terminal: MPI Horovodrun (${test})" \
178-
"bash -c \"echo 'localhost slots=2' > hostfile && horovodrun -np 2 -hostfile hostfile python /horovod/examples/mxnet/mxnet_mnist.py\""
178+
"bash -c \"${oneccl_env} echo 'localhost slots=2' > hostfile && horovodrun -np 2 -hostfile hostfile python /horovod/examples/mxnet/mxnet_mnist.py\""
179179
fi
180180
fi
181181

182182
# TensorFlow 2.0 tests
183183
if [[ ${test} == *"tf2_"* ]] || [[ ${test} == *"tfhead"* ]]; then
184184
run_test "${test}" "${queue}" \
185185
":tensorflow: MPI TensorFlow 2.0 MNIST (${test})" \
186-
"bash -c \"\\\$(cat /mpirun_command) python /horovod/examples/tensorflow2/tensorflow2_mnist.py\""
186+
"bash -c \"${oneccl_env} \\\$(cat /mpirun_command) python /horovod/examples/tensorflow2/tensorflow2_mnist.py\""
187187

188188
run_test "${test}" "${queue}" \
189189
":tensorflow: MPI TensorFlow 2.0 Keras MNIST (${test})" \
190-
"bash -c \"\\\$(cat /mpirun_command) python /horovod/examples/tensorflow2/tensorflow2_keras_mnist.py\""
190+
"bash -c \"${oneccl_env} \\\$(cat /mpirun_command) python /horovod/examples/tensorflow2/tensorflow2_keras_mnist.py\""
191191
fi
192192
}
193193

CHANGELOG.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
88

99
### Added
1010

11-
### Changed
11+
- Added support for Intel(R) MPI in horovodrun. ([#2374](https://github.com/horovod/horovod/pull/2374))
12+
13+
### Changed
1214

1315
### Deprecated
1416

Dockerfile.test.cpu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ RUN if [[ ${MPI_KIND} == "OpenMPI" ]]; then \
106106
cp /mpirun_command_ofi /mpirun_command_mpi && \
107107
sed -i 's/export CCL_ATL_TRANSPORT=ofi;/export CCL_ATL_TRANSPORT=mpi;/g' /mpirun_command_mpi && \
108108
sed -i 's/export HOROVOD_CCL_CACHE=1;/export HOROVOD_CCL_CACHE=0;/g' /mpirun_command_mpi && \
109+
echo "/mpirun_command_mpi" > /mpirun_command && \
109110
echo "-L/usr/local/oneccl/lib -lmpi -I/usr/local/oneccl/include" > /mpicc_oneccl && \
110-
chmod +x /mpicc_oneccl && \
111-
echo "/mpirun_command_mpi" > /mpirun_command; \
111+
chmod +x /mpicc_oneccl; \
112112
elif [[ ${MPI_KIND} == "MPICH" ]]; then \
113113
apt-get install -y mpich && \
114114
echo "mpirun -np 2" > /mpirun_command; \

docs/oneccl.rst

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,13 @@ Source ``setvars.sh`` to start using oneCCL.
1414
1515
source <install_dir>/env/setvars.sh
1616
17-
2. Install the `Intel(R) MPI Library <https://software.intel.com/en-us/mpi-library>`_.
18-
19-
To install the Intel MPI Library, follow `these instructions <https://software.intel.com/en-us/mpi-library/documentation/get-started>`_.
20-
21-
Source ``mpivars.sh`` script to establish the proper environment settings.
22-
23-
.. code-block:: bash
24-
25-
source <installdir_MPI>/intel64/bin/mpivars.sh release_mt
26-
27-
3. Set ``HOROVOD_CPU_OPERATIONS`` variable
17+
2. Set ``HOROVOD_CPU_OPERATIONS`` variable
2818

2919
.. code-block:: bash
3020
3121
export HOROVOD_CPU_OPERATIONS=CCL
3222
33-
4. Install Horovod from source code
23+
3. Install Horovod from source code
3424

3525
.. code-block:: bash
3626

docs/running.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ Usage of ``horovodrun`` requires one of the following:
5757
* MPICH
5858
* OpenRTE
5959
* Gloo
60+
* Intel(R) MPI
6061

6162
If you do not have MPI installed, you can run ``horovodrun`` using Gloo. Gloo dependencies come with Horovod
6263
automatically, and only require CMake to be available on your system at the time you install Horovod.
@@ -97,4 +98,10 @@ In some advanced cases you might want fine-grained control over options passed t
9798
To learn how to run Horovod training directly using Open MPI,
9899
read `Run Horovod with Open MPI <mpirun.rst>`_.
99100

101+
Run Horovod with Intel(R) MPI
102+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
103+
``horovodrun`` automatically converts some parameters to the format supported by Intel(R) MPI ``mpirun``. The set of allowed options includes ``-np``, ``-H`` and
104+
ssh arguments (-p, -i). Intel(R) MPI ``mpirun`` does not support MCA parameters, but you can set some of the options via `environment variables <https://software.intel.com/content/www/us/en/develop/documentation/mpi-developer-reference-linux/environment-variable-reference.html>`__.
105+
For additional information refer to `Intel(R) MPI official documentation <https://software.intel.com/content/www/us/en/develop/documentation/mpi-developer-reference-linux/top/command-reference/mpiexec-hydra/global-options.html>`__.
106+
100107
.. inclusion-marker-end-do-not-remove

horovod/runner/mpi_run.py

Lines changed: 58 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
_OMPI_IMPL = 'OpenMPI'
2626
_SMPI_IMPL = 'SpectrumMPI'
2727
_MPICH_IMPL = 'MPICH'
28+
_IMPI_IMPL = "IntelMPI"
2829
_UNKNOWN_IMPL = 'Unknown'
2930
_MISSING_IMPL = 'Missing'
3031

@@ -35,6 +36,8 @@
3536
_SMPI_FLAGS_TCP = ['-tcp']
3637
# MPICH Flags
3738
_MPICH_FLAGS = []
39+
# Intel MPI Flags
40+
_IMPI_FLAGS = []
3841

3942
# Threshold for large cluster MPI issues:
4043
_LARGE_CLUSTER_THRESHOLD = 64
@@ -70,6 +73,10 @@ def is_mpich(env=None):
7073
return _get_mpi_implementation(env) == _MPICH_IMPL
7174

7275

76+
def is_intel_mpi(env=None):
77+
return _get_mpi_implementation(env) == _IMPI_IMPL
78+
79+
7380
def _get_mpi_implementation(env=None):
7481
"""
7582
Detects the available MPI implementation by invoking `mpirun --version`.
@@ -78,7 +85,7 @@ def _get_mpi_implementation(env=None):
7885
represents the stdout and stderr as a string.
7986
8087
Returns one of:
81-
- _OMPI_IMPL, _SMPI_IMPL or _MPICH_IMPL for known implementations
88+
- _OMPI_IMPL, _SMPI_IMPL, _MPICH_IMPL or _IMPI_IMPL for known implementations
8289
- _UNKNOWN_IMPL for any unknown implementation
8390
- _MISSING_IMPL if `mpirun --version` could not be executed.
8491
@@ -98,6 +105,8 @@ def _get_mpi_implementation(env=None):
98105
return _SMPI_IMPL
99106
elif 'MPICH' in output:
100107
return _MPICH_IMPL
108+
elif 'Intel(R) MPI' in output:
109+
return _IMPI_IMPL
101110

102111
print('Unknown MPI implementation given in output of mpirun --version:', file=sys.stderr)
103112
print(output, file=sys.stderr)
@@ -110,13 +119,15 @@ def _get_mpi_implementation(env=None):
110119

111120
def _get_mpi_implementation_flags(tcp_flag, env=None):
112121
if is_open_mpi(env):
113-
return list(_OMPI_FLAGS), list(_NO_BINDING_ARGS)
122+
return list(_OMPI_FLAGS), list(_NO_BINDING_ARGS), _OMPI_IMPL
114123
elif is_spectrum_mpi(env):
115-
return list(_SMPI_FLAGS) if not tcp_flag else list(_SMPI_FLAGS_TCP), list(_SOCKET_BINDING_ARGS)
124+
return (list(_SMPI_FLAGS_TCP) if tcp_flag else list(_SMPI_FLAGS)), list(_SOCKET_BINDING_ARGS), _SMPI_IMPL
116125
elif is_mpich(env):
117-
return list(_MPICH_FLAGS), list(_NO_BINDING_ARGS)
126+
return list(_MPICH_FLAGS), list(_NO_BINDING_ARGS), _MPICH_IMPL
127+
elif is_intel_mpi(env):
128+
return list(_IMPI_FLAGS), [], _IMPI_IMPL
118129
else:
119-
return None, None
130+
return None, None, None
120131

121132

122133
def mpi_run(settings, nics, env, command, stdout=None, stderr=None):
@@ -138,10 +149,12 @@ def mpi_run(settings, nics, env, command, stdout=None, stderr=None):
138149
raise Exception('env argument must be a dict, not {type}: {env}'
139150
.format(type=type(env), env=env))
140151

141-
mpi_impl_flags, impl_binding_args = _get_mpi_implementation_flags(settings.tcp_flag, env=env)
152+
mpi_impl_flags, impl_binding_args, mpi = _get_mpi_implementation_flags(settings.tcp_flag, env=env)
142153
if mpi_impl_flags is None:
143154
raise Exception(_MPI_NOT_FOUND_ERROR_MSG)
144155

156+
impi = _IMPI_IMPL == mpi
157+
145158
ssh_args = []
146159
if settings.ssh_port:
147160
ssh_args += [f'-p {settings.ssh_port}']
@@ -151,51 +164,71 @@ def mpi_run(settings, nics, env, command, stdout=None, stderr=None):
151164
mpi_ssh_args = ''
152165
if ssh_args:
153166
joined_ssh_args = ' '.join(ssh_args)
154-
mpi_ssh_args = f'-mca plm_rsh_args \"{joined_ssh_args}\"'
155-
156-
# if user does not specify any hosts, mpirun by default uses local host.
157-
# There is no need to specify localhost.
158-
hosts_arg = '-H {hosts}'.format(hosts=settings.hosts)
167+
mpi_ssh_args = f'-bootstrap=ssh -bootstrap-exec-args \"{joined_ssh_args}\"' if impi else f'-mca plm_rsh_args \"{joined_ssh_args}\"'
159168

160169
tcp_intf_arg = '-mca btl_tcp_if_include {nics}'.format(
161-
nics=','.join(nics)) if nics else ''
162-
nccl_socket_intf_arg = '-x NCCL_SOCKET_IFNAME={nics}'.format(
170+
nics=','.join(nics)) if nics and not impi else ''
171+
nccl_socket_intf_arg = '-{opt} NCCL_SOCKET_IFNAME={nics}'.format(
172+
opt='genv' if impi else 'x',
163173
nics=','.join(nics)) if nics else ''
164174

165175
# On large cluster runs (e.g. Summit), we need extra settings to work around OpenMPI issues
166-
host_names, _ = hosts.parse_hosts_and_slots(settings.hosts)
167-
if host_names and len(host_names) >= _LARGE_CLUSTER_THRESHOLD:
176+
host_names, host_to_slots = hosts.parse_hosts_and_slots(settings.hosts)
177+
if not impi and host_names and len(host_names) >= _LARGE_CLUSTER_THRESHOLD:
168178
mpi_impl_flags.append('-mca plm_rsh_no_tree_spawn true')
169179
mpi_impl_flags.append('-mca plm_rsh_num_concurrent {}'.format(len(host_names)))
170180

171-
if settings.prefix_output_with_timestamp:
181+
# if user does not specify any hosts, mpirun by default uses local host.
182+
# There is no need to specify localhost.
183+
hosts_arg = '-{opt} {hosts}'.format(opt='hosts' if impi else 'H',
184+
hosts=','.join(host_names) if host_names and impi else settings.hosts)
185+
186+
ppn_arg = ' '
187+
if host_to_slots and impi:
188+
ppn = host_to_slots[host_names[0]]
189+
for h_name in host_names[1:]:
190+
if ppn != host_to_slots[h_name]:
191+
raise Exception('''Different slots in -hosts parameter are not supported in Intel(R) MPI.
192+
Use -machinefile <machine_file> for this purpose.''')
193+
ppn_arg = ' -ppn {} '.format(ppn)
194+
195+
if settings.prefix_output_with_timestamp and not impi:
172196
mpi_impl_flags.append('--timestamp-output')
173197

174-
binding_args = settings.binding_args if settings.binding_args else ' '.join(impl_binding_args)
198+
binding_args = settings.binding_args if settings.binding_args and not impi else ' '.join(impl_binding_args)
199+
200+
basic_args = '-l' if impi else '--allow-run-as-root --tag-output'
201+
202+
output = []
203+
if settings.output_filename:
204+
output.append('-outfile-pattern' if impi else '--output-filename')
205+
output.append(settings.output_filename)
206+
207+
env_list = '' if impi else ' '.join(
208+
'-x %s' % key for key in sorted(env.keys()) if env_util.is_exportable(key))
175209

176210
# Pass all the env variables to the mpirun command.
177211
mpirun_command = (
178-
'mpirun --allow-run-as-root --tag-output '
179-
'-np {num_proc} {hosts_arg} '
212+
'mpirun {basic_args} '
213+
'-np {num_proc}{ppn_arg}{hosts_arg} '
180214
'{binding_args} '
181215
'{mpi_args} '
182216
'{mpi_ssh_args} '
183217
'{tcp_intf_arg} '
184218
'{nccl_socket_intf_arg} '
185219
'{output_filename_arg} '
186220
'{env} {extra_mpi_args} {command}' # expect a lot of environment variables
187-
.format(num_proc=settings.num_proc,
221+
.format(basic_args=basic_args,
222+
num_proc=settings.num_proc,
223+
ppn_arg=ppn_arg,
188224
hosts_arg=hosts_arg,
189225
binding_args=binding_args,
190226
mpi_args=' '.join(mpi_impl_flags),
191227
tcp_intf_arg=tcp_intf_arg,
192228
nccl_socket_intf_arg=nccl_socket_intf_arg,
193229
mpi_ssh_args=mpi_ssh_args,
194-
output_filename_arg='--output-filename ' + settings.output_filename
195-
if settings.output_filename else '',
196-
env=' '.join('-x %s' % key for key in sorted(env.keys())
197-
if env_util.is_exportable(key)),
198-
230+
output_filename_arg=' '.join(output),
231+
env=env_list,
199232
extra_mpi_args=settings.extra_mpi_args if settings.extra_mpi_args else '',
200233
command=' '.join(quote(par) for par in command))
201234
)

test/integration/test_spark.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ def fn():
581581
return 0
582582

583583
def mpi_impl_flags(tcp, env=None):
584-
return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"]
584+
return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"], None
585585

586586
def gloo_exec_command_fn(driver_addresses, key, settings, env):
587587
def _exec_command(command, alloc_info, event):
@@ -625,7 +625,7 @@ def fn():
625625
return 1
626626

627627
def mpi_impl_flags(tcp, env=None):
628-
return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"]
628+
return ["--mock-mpi-impl-flags"], ["--mock-mpi-binding-args"], None
629629

630630
def exception(*args, **argv):
631631
raise Exception('Test Exception')
@@ -647,7 +647,7 @@ def exception(*args, **argv):
647647
self.assertEqual(str(e.value), 'Test Exception')
648648

649649
# call the mocked _get_mpi_implementation_flags method
650-
mpi_flags, binding_args = horovod.runner.mpi_run._get_mpi_implementation_flags(False)
650+
mpi_flags, binding_args, _ = horovod.runner.mpi_run._get_mpi_implementation_flags(False)
651651
self.assertIsNotNone(mpi_flags)
652652
expected_command = ('mpirun '
653653
'--allow-run-as-root --tag-output '

test/integration/test_static_run.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from horovod.runner.common.util import safe_shell_exec
3030
from horovod.runner import _HorovodArgs
3131
from horovod.runner.launch import _check_all_hosts_ssh_successful, _run
32-
from horovod.runner.mpi_run import mpi_available, is_mpich
32+
from horovod.runner.mpi_run import mpi_available, is_mpich, is_intel_mpi
3333

3434
sys.path.append(os.path.join(os.path.dirname(__file__), os.pardir, 'utils'))
3535

@@ -143,6 +143,8 @@ def test_run_success(self, controller, mode, run):
143143
self.skipTest("MPI is not available")
144144
if is_mpich():
145145
self.skipTest("MPICH is not testable")
146+
if is_intel_mpi():
147+
self.skipTest("Intel(R) MPI is not testable because it is based on MPICH")
146148

147149
self.do_test_run_with_controller_success(controller, mode, run)
148150

@@ -156,6 +158,8 @@ def test_run_failure(self, controller, mode, run):
156158
self.skipTest("MPI is not available")
157159
if is_mpich():
158160
self.skipTest("MPICH is not testable")
161+
if is_intel_mpi():
162+
self.skipTest("Intel(R) MPI is not testable because it is based on MPICH")
159163

160164
self.do_test_run_with_controller_failure(controller, mode, run)
161165

0 commit comments

Comments
 (0)