Skip to content

Commit c40fada

Browse files
authored
Init BladeDISC repo (#2)
1 parent 19128b8 commit c40fada

File tree

1,030 files changed

+110851
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,030 files changed

+110851
-0
lines changed

.bazelrc

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Android configs. Bazel needs to have --cpu and --fat_apk_cpu both set to the
2+
# target CPU to build transient dependencies correctly. See
3+
# https://docs.bazel.build/versions/master/user-manual.html#flag--fat_apk_cpu
4+
build:android --crosstool_top=//external:android/crosstool
5+
build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
6+
build:android_arm --config=android
7+
build:android_arm --cpu=armeabi-v7a
8+
build:android_arm --fat_apk_cpu=armeabi-v7a
9+
build:android_arm64 --config=android
10+
build:android_arm64 --cpu=arm64-v8a
11+
build:android_arm64 --fat_apk_cpu=arm64-v8a
12+
13+
# Config to use a mostly-static build and disable modular op registration
14+
# support (this will revert to loading TensorFlow with RTLD_GLOBAL in Python).
15+
# By default, TensorFlow will build with a dependence on
16+
# //tensorflow:libtensorflow_framework.so.
17+
build:monolithic --define framework_shared_object=false
18+
19+
# For projects which use TensorFlow as part of a Bazel build process, putting
20+
# nothing in a bazelrc will default to a monolithic build. The following line
21+
# opts in to modular op registration support by default.
22+
build --define framework_shared_object=true
23+
24+
# Please note that MKL on MacOS or windows is still not supported.
25+
# If you would like to use a local MKL instead of downloading, please set the
26+
# environment variable "TF_MKL_ROOT" every time before build.
27+
build:mkl --define=build_with_mkl=true --define=enable_mkl=true
28+
build:mkl -c opt
29+
30+
# This config option is used to enable MKL-DNN open source library only,
31+
# without depending on MKL binary version.
32+
build:mkl_open_source_only --define=build_with_mkl_dnn_only=true
33+
build:mkl_open_source_only --define=build_with_mkl=true --define=enable_mkl=true
34+
35+
build:download_clang --crosstool_top=@local_config_download_clang//:toolchain
36+
build:download_clang --define=using_clang=true
37+
# Instruct clang to use LLD for linking.
38+
# This only works with GPU builds currently, since Bazel sets -B/usr/bin in
39+
# auto-generated CPU crosstool, forcing /usr/bin/ld.lld to be preferred over
40+
# the downloaded one.
41+
build:download_clang_use_lld --linkopt='-fuse-ld=lld'
42+
43+
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
44+
build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
45+
46+
build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain
47+
build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true
48+
49+
build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain
50+
build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true
51+
52+
build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain
53+
build:sycl --define=using_sycl=true --define=using_trisycl=false
54+
55+
build:sycl_nodouble --crosstool_top=@local_config_sycl//crosstool:toolchain
56+
build:sycl_nodouble --define=using_sycl=true --cxxopt -DTENSORFLOW_SYCL_NO_DOUBLE
57+
58+
build:sycl_asan --crosstool_top=@local_config_sycl//crosstool:toolchain
59+
build:sycl_asan --define=using_sycl=true --define=using_trisycl=false --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address
60+
61+
build:sycl_trisycl --crosstool_top=@local_config_sycl//crosstool:toolchain
62+
build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true
63+
64+
# Options extracted from configure script
65+
build:gdr --define=with_gdr_support=true
66+
build:ngraph --define=with_ngraph_support=true
67+
build:verbs --define=with_verbs_support=true
68+
69+
build --define=use_fast_cpp_protos=true
70+
build --define=allow_oversize_protos=true
71+
build --define=grpc_no_ares=true
72+
73+
build --spawn_strategy=standalone
74+
build --genrule_strategy=standalone
75+
build -c opt
76+
77+
# By default, build TF in C++ 14 mode.
78+
build --cxxopt=-std=c++14
79+
build --host_cxxopt=-std=c++14
80+
81+
82+
# Other build flags.
83+
build --define=grpc_no_ares=true
84+
85+
# Modular TF build options
86+
build:dynamic_kernels --define=dynamic_loaded_kernels=true
87+
88+
# Default paths for TF_SYSTEM_LIBS
89+
build --define=PREFIX=/usr
90+
build --define=LIBDIR=$(PREFIX)/lib
91+
build --define=INCLUDEDIR=$(PREFIX)/include
92+
93+
# Do not commit the tf_configure.bazelrc line

.bazelversion

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.7.2

.github/workflows/main.yml

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
# This is a basic workflow that is manually triggered
2+
3+
name: DISC
4+
5+
# Controls when the action will run. Workflow runs when manually triggered using the UI
6+
# or API.
7+
on: [push]
8+
9+
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
10+
jobs:
11+
# This workflow contains a single job called "greet"
12+
CUDA10-TF115:
13+
# The type of runner that the job will run on
14+
runs-on: [self-hosted, gpu-t4]
15+
env:
16+
REMOTE_CACHE_TOKEN: ${{ secrets.REMOTE_CACHE_TOKEN }}
17+
18+
# Steps represent a sequence of tasks that will be executed as part of the job
19+
steps:
20+
# Runs a single command using the runners shell
21+
- name: Checkout
22+
uses: actions/[email protected]
23+
- name: Build Dev Docker
24+
shell: bash
25+
run: |
26+
set -e
27+
git submodule sync
28+
git submodule update --depth=1 --init --recursive
29+
docker build -t disc-dev-cuda10 --build-arg BASEIMAGE=nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 -f docker/dev/Dockerfile .
30+
- name: Build And Test DISC
31+
env:
32+
REMOTE_CACHE_TOKEN: ${{ secrets.REMOTE_CACHE_TOKEN }}
33+
run: |
34+
set -e
35+
nvidia-docker run --rm -t --user $(id -u) \
36+
-v $HOME/.cache:$HOME/.cache \
37+
-v /etc/passwd:/etc/passwd:ro \
38+
-v /etc/group:/etc/group:ro \
39+
-v $PWD:/disc \
40+
-e GITHUB_WORKFLOW=$GITHUB_WORKFLOW \
41+
-e TF_REMOTE_CACHE=$REMOTE_CACHE_TOKEN \
42+
-w /disc \
43+
disc-dev-cuda10 bash ./scripts/ci/build_and_test.sh
44+
- name: Deploy Docker
45+
if: github.event.ref == 'refs/heads/pai_disc'
46+
env:
47+
ALIYUN_DOCKER_USERNAME: ${{ secrets.ALIYUN_DOCKER_USERNAME }}
48+
ALIYUN_DOCKER_PASSWORD: ${{ secrets.ALIYUN_DOCKER_PASSWORD }}
49+
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
50+
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
51+
GITHUB_PULL_REQUEST: ${{ github.event.number }}
52+
LOCAL_DEV_DOCKER: disc-dev-cuda10
53+
REMOTE_DEV_DOCKER: bladedisc:latest-devel-cuda10
54+
REMOTE_RUNTIME_DOCKER: bladedisc:latest-runtime-tf115
55+
RUNTIME_BASEIMAGE: tensorflow/tensorflow:1.15.5-gpu
56+
run: |
57+
set -e
58+
bash ./scripts/ci/deploy_docker.sh
59+
CUDA11-TF24:
60+
# The type of runner that the job will run on
61+
runs-on: [self-hosted, gpu-t4]
62+
env:
63+
REMOTE_CACHE_TOKEN: ${{ secrets.REMOTE_CACHE_TOKEN }}
64+
# Steps represent a sequence of tasks that will be executed as part of the job
65+
steps:
66+
# Runs a single command using the runners shell
67+
- name: Checkout
68+
uses: actions/[email protected]
69+
- name: Build Dev Docker
70+
run: |
71+
set -e
72+
git submodule sync
73+
git submodule update --depth=1 --init --recursive
74+
docker build -t disc-dev-cuda11.0 --build-arg BASEIMAGE=nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 -f docker/dev/Dockerfile .
75+
- name: Build And Test DISC
76+
run: |
77+
set -e
78+
nvidia-docker run --rm -t --user $(id -u) \
79+
-v $HOME/.cache:$HOME/.cache \
80+
-v /etc/passwd:/etc/passwd:ro \
81+
-v /etc/group:/etc/group:ro \
82+
-v $PWD:/disc \
83+
-e GITHUB_WORKFLOW=$GITHUB_WORKFLOW \
84+
-e TF_REMOTE_CACHE=$REMOTE_CACHE_TOKEN \
85+
-w /disc \
86+
disc-dev-cuda11.0 bash ./scripts/ci/build_and_test.sh
87+
- name: Deploy Docker
88+
if: github.event.ref == 'refs/heads/pai_disc'
89+
env:
90+
ALIYUN_DOCKER_USERNAME: ${{ secrets.ALIYUN_DOCKER_USERNAME }}
91+
ALIYUN_DOCKER_PASSWORD: ${{ secrets.ALIYUN_DOCKER_PASSWORD }}
92+
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
93+
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
94+
GITHUB_PULL_REQUEST: ${{ github.event.number }}
95+
LOCAL_DEV_DOCKER: disc-dev-cuda11.0
96+
REMOTE_DEV_DOCKER: bladedisc:latest-devel-cuda11.0
97+
REMOTE_RUNTIME_DOCKER: bladedisc:latest-runtime-tf24
98+
RUNTIME_BASEIMAGE: tensorflow/tensorflow:2.4.0-gpu
99+
run: |
100+
set -e
101+
bash ./scripts/ci/deploy_tf_wrapper.sh
102+
CUDA11-TORCH171:
103+
# The type of runner that the job will run on
104+
runs-on: [self-hosted, gpu-t4]
105+
env:
106+
REMOTE_CACHE_TOKEN: ${{ secrets.REMOTE_CACHE_TOKEN }}
107+
# Steps represent a sequence of tasks that will be executed as part of the job
108+
steps:
109+
# Runs a single command using the runners shell
110+
- name: Checkout
111+
uses: actions/[email protected]
112+
- name: Build Dev Docker
113+
run: |
114+
set -e
115+
git submodule sync
116+
git submodule update --depth=1 --init --recursive
117+
docker build -t disc-dev-cuda11.0 --build-arg BASEIMAGE=nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 -f docker/dev/Dockerfile .
118+
- name: Build and Test DISC
119+
run: |
120+
set -e
121+
nvidia-docker run --rm -t --user $(id -u) \
122+
-v $HOME/.cache:$HOME/.cache \
123+
-v /etc/passwd:/etc/passwd:ro \
124+
-v /etc/group:/etc/group:ro \
125+
-v $PWD:/disc \
126+
-e GITHUB_WORKFLOW=$GITHUB_WORKFLOW \
127+
-e TF_REMOTE_CACHE=$REMOTE_CACHE_TOKEN \
128+
-w /disc \
129+
disc-dev-cuda11.0 bash ./scripts/ci/test_pytorch_addons.sh
130+
- name: Deploy PyTorch Addons
131+
if: github.event.ref == 'refs/heads/pai_disc'
132+
env:
133+
ALIYUN_DOCKER_USERNAME: ${{ secrets.ALIYUN_DOCKER_USERNAME }}
134+
ALIYUN_DOCKER_PASSWORD: ${{ secrets.ALIYUN_DOCKER_PASSWORD }}
135+
DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
136+
DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
137+
GITHUB_PULL_REQUEST: ${{ github.event.number }}
138+
LOCAL_DEV_DOCKER: disc-dev-cuda11.0
139+
REMOTE_DEV_DOCKER: bladedisc:latest-devel-cuda11.0
140+
REMOTE_RUNTIME_DOCKER: bladedisc:latest-runtime-torch1.7.1
141+
RUNTIME_BASEIMAGE: pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime
142+
run: |
143+
set -e
144+
bash ./scripts/ci/deploy_pytorch_addons.sh .sh
145+
CPU:
146+
# The type of runner that the job will run on
147+
runs-on: [self-hosted, cpu]
148+
env:
149+
REMOTE_CACHE_TOKEN: ${{ secrets.REMOTE_CACHE_TOKEN }}
150+
# Steps represent a sequence of tasks that will be executed as part of the job
151+
steps:
152+
# Runs a single command using the runners shell
153+
- name: Checkout
154+
uses: actions/[email protected]
155+
- name: Build Dev Docker
156+
run: |
157+
set -e
158+
git submodule sync
159+
git submodule update --depth=1 --init --recursive
160+
docker build -t disc-dev-cpu --build-arg BASEIMAGE=nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 -f docker/dev/Dockerfile .
161+
- name: Build And Test DISC
162+
run: |
163+
# TODO(yancey.yx): enable CPU unit test
164+
#set -e
165+
#docker run --rm -t --user $(id -u) \
166+
# -v $HOME/.cache:$HOME/.cache \
167+
# -v /etc/passwd:/etc/passwd:ro \
168+
# -v /etc/group:/etc/group:ro \
169+
# -v $PWD:/disc \
170+
# -e GITHUB_WORKFLOW=$GITHUB_WORKFLOW \
171+
# -e TF_REMOTE_CACHE=$REMOTE_CACHE_TOKEN \
172+
# -w /disc \
173+
# disc-dev-cpu bash ./scripts/ci/build_and_test.sh --cpu-only

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.DS_Store
2+
.ipynb_checkpoints
3+
*.pyc
4+
platform_alibaba/ci_build/tao_dev_with_venv.Dockerfile
5+
.vscode

.gitmodules

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[submodule "pytorch_addons/third_party/googletest"]
2+
path = pytorch_addons/third_party/googletest
3+
url = [email protected]:google/googletest.git
4+
[submodule "tao/third_party/json"]
5+
path = tao/third_party/json
6+
url = https://github.com/nlohmann/json.git
7+
[submodule "tf_community"]
8+
path = tf_community
9+
url = [email protected]:pai-disc/tensorflow.git
10+
[submodule "tao/third_party/abseil-cpp"]
11+
path = tao/third_party/abseil-cpp
12+
url = [email protected]:abseil/abseil-cpp.git

VERSION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0.21.05

docker/dev/Dockerfile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
ARG BASEIMAGE=nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
2+
FROM ${BASEIMAGE}
3+
4+
COPY docker/scripts /install/scripts
5+
6+
RUN bash /install/scripts/find-fastest-apt.sh && \
7+
apt-get install -y python3 python3-pip git curl vim libssl-dev wget unzip openjdk-11-jdk && \
8+
pip3 install --upgrade pip && \
9+
ln -s /usr/bin/python3 /usr/bin/python && \
10+
bash /install/scripts/install-cmake.sh && \
11+
bash /install/scripts/install-bazel.sh && \
12+
bash /install/scripts/install-python.sh
13+
14+
ENV PATH="/opt/cmake/bin:${PATH}"

docker/runtime/Dockerfile.pytorch

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
2+
FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime
3+
4+
ADD ./docker/scripts /install/scripts
5+
RUN bash /install/scripts/find-fastest-apt.sh
6+
7+
ADD ./build/torch_addons*.whl /install/python/
8+
9+
RUN apt-get update -y \
10+
&& apt-get install -y python3.6 python3-pip protobuf-compiler libprotobuf-dev cmake \
11+
&& ln -s /usr/bin/python3.6 /usr/bin/python \
12+
&& python3.6 -m pip install pip --upgrade \
13+
&& python3.6 -m pip install /install/python/torch_addons*.whl -f https://download.pytorch.org/whl/cu110/torch_stable.html
14+
15+
ENV PATH /usr/bin:$PATH

docker/runtime/Dockerfile.tf

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
ARG BASEIMAGE=nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
2+
FROM ${BASEIMAGE}
3+
4+
ARG WHEEL_FILE=blade_disc*.whl
5+
6+
ADD ./docker/scripts /install/scripts
7+
RUN bash /install/scripts/find-fastest-apt.sh
8+
9+
ADD ./build/${WHEEL_FILE} /install/python/
10+
11+
RUN apt-get install -y python3 python3-pip \
12+
&& pip3 install --upgrade pip \
13+
&& ln -s /usr/bin/python3.6 /usr/bin/python \
14+
&& pip install /install/python/${WHEEL_FILE}

0 commit comments

Comments
 (0)