alibaba
diff --git a/‎.bazelrc
Lines changed: 93 additions & 0 deletions b/‎.bazelrc
Lines changed: 93 additions & 0 deletions
diff --git a/‎.bazelversion
Lines changed: 1 addition & 0 deletions b/‎.bazelversion
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/main.yml
Lines changed: 173 additions & 0 deletions b/‎.github/workflows/main.yml
Lines changed: 173 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 5 additions & 0 deletions b/‎.gitignore
Lines changed: 5 additions & 0 deletions
diff --git a/‎.gitmodules
Lines changed: 12 additions & 0 deletions b/‎.gitmodules
Lines changed: 12 additions & 0 deletions
diff --git a/‎VERSION
Lines changed: 1 addition & 0 deletions b/‎VERSION
Lines changed: 1 addition & 0 deletions
diff --git a/‎docker/dev/Dockerfile
Lines changed: 14 additions & 0 deletions b/‎docker/dev/Dockerfile
Lines changed: 14 additions & 0 deletions
diff --git a/‎docker/runtime/Dockerfile.pytorch
Lines changed: 15 additions & 0 deletions b/‎docker/runtime/Dockerfile.pytorch
Lines changed: 15 additions & 0 deletions
diff --git a/‎docker/runtime/Dockerfile.tf
Lines changed: 14 additions & 0 deletions b/‎docker/runtime/Dockerfile.tf
Lines changed: 14 additions & 0 deletions
@@ -0,0 +1,93 @@
+# Android configs. Bazel needs to have --cpu and --fat_apk_cpu both set to the
+# target CPU to build transient dependencies correctly. See
+# https://docs.bazel.build/versions/master/user-manual.html#flag--fat_apk_cpu
+build:android --crosstool_top=//external:android/crosstool
+build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
+build:android_arm --config=android
+build:android_arm --cpu=armeabi-v7a
+build:android_arm --fat_apk_cpu=armeabi-v7a
+build:android_arm64 --config=android
+build:android_arm64 --cpu=arm64-v8a
+build:android_arm64 --fat_apk_cpu=arm64-v8a
+
+# Config to use a mostly-static build and disable modular op registration
+# support (this will revert to loading TensorFlow with RTLD_GLOBAL in Python).
+# By default, TensorFlow will build with a dependence on
+# //tensorflow:libtensorflow_framework.so.
+build:monolithic --define framework_shared_object=false
+
+# For projects which use TensorFlow as part of a Bazel build process, putting
+# nothing in a bazelrc will default to a monolithic build. The following line
+# opts in to modular op registration support by default.
+build --define framework_shared_object=true
+
+# Please note that MKL on MacOS or windows is still not supported.
+# If you would like to use a local MKL instead of downloading, please set the
+# environment variable "TF_MKL_ROOT" every time before build.
+build:mkl --define=build_with_mkl=true --define=enable_mkl=true
+build:mkl -c opt
+
+# This config option is used to enable MKL-DNN open source library only,
+# without depending on MKL binary version.
+build:mkl_open_source_only --define=build_with_mkl_dnn_only=true
+build:mkl_open_source_only --define=build_with_mkl=true --define=enable_mkl=true
+
+build:download_clang --crosstool_top=@local_config_download_clang//:toolchain
+build:download_clang --define=using_clang=true
+# Instruct clang to use LLD for linking.
+# This only works with GPU builds currently, since Bazel sets -B/usr/bin in
+# auto-generated CPU crosstool, forcing /usr/bin/ld.lld to be preferred over
+# the downloaded one.
+build:download_clang_use_lld --linkopt='-fuse-ld=lld'
+
+build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
+build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
+
+build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain
+build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true
+
+build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain
+build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true
+
+build:sycl --crosstool_top=@local_config_sycl//crosstool:toolchain
+build:sycl --define=using_sycl=true --define=using_trisycl=false
+
+build:sycl_nodouble --crosstool_top=@local_config_sycl//crosstool:toolchain
+build:sycl_nodouble --define=using_sycl=true --cxxopt -DTENSORFLOW_SYCL_NO_DOUBLE
+
+build:sycl_asan --crosstool_top=@local_config_sycl//crosstool:toolchain
+build:sycl_asan --define=using_sycl=true --define=using_trisycl=false --copt -fno-omit-frame-pointer --copt -fsanitize-coverage=3 --copt -DGPR_NO_DIRECT_SYSCALLS --linkopt -fPIC --linkopt -fsanitize=address
+
+build:sycl_trisycl --crosstool_top=@local_config_sycl//crosstool:toolchain
+build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true
+
+# Options extracted from configure script
+build:gdr --define=with_gdr_support=true
+build:ngraph --define=with_ngraph_support=true
+build:verbs --define=with_verbs_support=true
+
+build --define=use_fast_cpp_protos=true
+build --define=allow_oversize_protos=true
+build --define=grpc_no_ares=true
+
+build --spawn_strategy=standalone
+build --genrule_strategy=standalone
+build -c opt
+
+# By default, build TF in C++ 14 mode.
+build --cxxopt=-std=c++14
+build --host_cxxopt=-std=c++14
+
+
+# Other build flags.
+build --define=grpc_no_ares=true
+
+# Modular TF build options
+build:dynamic_kernels --define=dynamic_loaded_kernels=true
+
+# Default paths for TF_SYSTEM_LIBS
+build --define=PREFIX=/usr
+build --define=LIBDIR=$(PREFIX)/lib
+build --define=INCLUDEDIR=$(PREFIX)/include
+
+# Do not commit the tf_configure.bazelrc line
@@ -0,0 +1 @@
+3.7.2
@@ -0,0 +1,173 @@
+# This is a basic workflow that is manually triggered
+
+name: DISC 
+
+# Controls when the action will run. Workflow runs when manually triggered using the UI
+# or API.
+on: [push]
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  # This workflow contains a single job called "greet"
+  CUDA10-TF115:
+    # The type of runner that the job will run on
+    runs-on: [self-hosted, gpu-t4]
+    env:
+      REMOTE_CACHE_TOKEN: ${{ secrets.REMOTE_CACHE_TOKEN }}
+      
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+    # Runs a single command using the runners shell
+    - name: Checkout
+      uses: actions/[email protected]
+    - name: Build Dev Docker
+      shell: bash
+      run: |
+        set -e
+        git submodule sync
+        git submodule update --depth=1 --init --recursive
+        docker build -t disc-dev-cuda10 --build-arg BASEIMAGE=nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 -f docker/dev/Dockerfile .
+    - name: Build And Test DISC
+      env:
+        REMOTE_CACHE_TOKEN: ${{ secrets.REMOTE_CACHE_TOKEN }}
+      run: |
+        set -e
+        nvidia-docker run --rm -t --user $(id -u) \
+          -v $HOME/.cache:$HOME/.cache \
+          -v /etc/passwd:/etc/passwd:ro \
+          -v /etc/group:/etc/group:ro \
+          -v $PWD:/disc \
+          -e GITHUB_WORKFLOW=$GITHUB_WORKFLOW \
+          -e TF_REMOTE_CACHE=$REMOTE_CACHE_TOKEN \
+          -w /disc \
+          disc-dev-cuda10 bash ./scripts/ci/build_and_test.sh
+    - name: Deploy Docker
+      if: github.event.ref == 'refs/heads/pai_disc'
+      env:
+        ALIYUN_DOCKER_USERNAME: ${{ secrets.ALIYUN_DOCKER_USERNAME }}
+        ALIYUN_DOCKER_PASSWORD: ${{ secrets.ALIYUN_DOCKER_PASSWORD }}
+        DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
+        DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
+        GITHUB_PULL_REQUEST: ${{ github.event.number }}
+        LOCAL_DEV_DOCKER: disc-dev-cuda10
+        REMOTE_DEV_DOCKER: bladedisc:latest-devel-cuda10
+        REMOTE_RUNTIME_DOCKER: bladedisc:latest-runtime-tf115
+        RUNTIME_BASEIMAGE: tensorflow/tensorflow:1.15.5-gpu
+      run: |
+        set -e
+        bash ./scripts/ci/deploy_docker.sh
+  CUDA11-TF24:
+    # The type of runner that the job will run on
+    runs-on: [self-hosted, gpu-t4]
+    env:
+      REMOTE_CACHE_TOKEN: ${{ secrets.REMOTE_CACHE_TOKEN }}
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+    # Runs a single command using the runners shell
+    - name: Checkout
+      uses: actions/[email protected]
+    - name: Build Dev Docker
+      run: |
+        set -e
+        git submodule sync
+        git submodule update --depth=1 --init --recursive
+        docker build -t disc-dev-cuda11.0 --build-arg BASEIMAGE=nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 -f docker/dev/Dockerfile .
+    - name: Build And Test DISC
+      run: |
+        set -e
+        nvidia-docker run --rm -t --user $(id -u) \
+          -v $HOME/.cache:$HOME/.cache \
+          -v /etc/passwd:/etc/passwd:ro \
+          -v /etc/group:/etc/group:ro \
+          -v $PWD:/disc \
+          -e GITHUB_WORKFLOW=$GITHUB_WORKFLOW \
+          -e TF_REMOTE_CACHE=$REMOTE_CACHE_TOKEN \
+          -w /disc \
+          disc-dev-cuda11.0 bash ./scripts/ci/build_and_test.sh
+    - name: Deploy Docker
+      if: github.event.ref == 'refs/heads/pai_disc'
+      env:
+        ALIYUN_DOCKER_USERNAME: ${{ secrets.ALIYUN_DOCKER_USERNAME }}
+        ALIYUN_DOCKER_PASSWORD: ${{ secrets.ALIYUN_DOCKER_PASSWORD }}
+        DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
+        DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
+        GITHUB_PULL_REQUEST: ${{ github.event.number }}
+        LOCAL_DEV_DOCKER: disc-dev-cuda11.0
+        REMOTE_DEV_DOCKER: bladedisc:latest-devel-cuda11.0
+        REMOTE_RUNTIME_DOCKER: bladedisc:latest-runtime-tf24
+        RUNTIME_BASEIMAGE: tensorflow/tensorflow:2.4.0-gpu
+      run: |
+        set -e
+        bash ./scripts/ci/deploy_tf_wrapper.sh
+  CUDA11-TORCH171:
+    # The type of runner that the job will run on
+    runs-on: [self-hosted, gpu-t4]
+    env:
+      REMOTE_CACHE_TOKEN: ${{ secrets.REMOTE_CACHE_TOKEN }}
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+    # Runs a single command using the runners shell
+    - name: Checkout
+      uses: actions/[email protected]
+    - name: Build Dev Docker
+      run: |
+        set -e
+        git submodule sync
+        git submodule update --depth=1 --init --recursive
+        docker build -t disc-dev-cuda11.0 --build-arg BASEIMAGE=nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 -f docker/dev/Dockerfile .
+    - name: Build and Test DISC
+      run: |
+        set -e
+        nvidia-docker run --rm -t --user $(id -u) \
+          -v $HOME/.cache:$HOME/.cache \
+          -v /etc/passwd:/etc/passwd:ro \
+          -v /etc/group:/etc/group:ro \
+          -v $PWD:/disc \
+          -e GITHUB_WORKFLOW=$GITHUB_WORKFLOW \
+          -e TF_REMOTE_CACHE=$REMOTE_CACHE_TOKEN \
+          -w /disc \
+          disc-dev-cuda11.0 bash ./scripts/ci/test_pytorch_addons.sh
+    - name: Deploy PyTorch Addons
+      if: github.event.ref == 'refs/heads/pai_disc'
+      env:
+        ALIYUN_DOCKER_USERNAME: ${{ secrets.ALIYUN_DOCKER_USERNAME }}
+        ALIYUN_DOCKER_PASSWORD: ${{ secrets.ALIYUN_DOCKER_PASSWORD }}
+        DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }}
+        DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
+        GITHUB_PULL_REQUEST: ${{ github.event.number }}
+        LOCAL_DEV_DOCKER: disc-dev-cuda11.0
+        REMOTE_DEV_DOCKER: bladedisc:latest-devel-cuda11.0
+        REMOTE_RUNTIME_DOCKER: bladedisc:latest-runtime-torch1.7.1
+        RUNTIME_BASEIMAGE: pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime
+      run: |
+        set -e
+        bash ./scripts/ci/deploy_pytorch_addons.sh .sh
+  CPU:
+    # The type of runner that the job will run on
+    runs-on: [self-hosted, cpu]
+    env:
+        REMOTE_CACHE_TOKEN: ${{ secrets.REMOTE_CACHE_TOKEN }}
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+    # Runs a single command using the runners shell
+    - name: Checkout
+      uses: actions/[email protected]
+    - name: Build Dev Docker
+      run: |
+        set -e
+        git submodule sync
+        git submodule update --depth=1 --init --recursive
+        docker build -t disc-dev-cpu --build-arg BASEIMAGE=nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 -f docker/dev/Dockerfile .
+    - name: Build And Test DISC
+      run: |
+        # TODO(yancey.yx): enable CPU unit test
+        #set -e
+        #docker run --rm -t --user $(id -u) \
+        #  -v $HOME/.cache:$HOME/.cache \
+        #  -v /etc/passwd:/etc/passwd:ro \
+        #  -v /etc/group:/etc/group:ro \
+        #  -v $PWD:/disc \
+        #  -e GITHUB_WORKFLOW=$GITHUB_WORKFLOW \
+        #  -e TF_REMOTE_CACHE=$REMOTE_CACHE_TOKEN \
+        #  -w /disc \
+        #  disc-dev-cpu bash ./scripts/ci/build_and_test.sh --cpu-only
@@ -0,0 +1,5 @@
+.DS_Store
+.ipynb_checkpoints
+*.pyc
+platform_alibaba/ci_build/tao_dev_with_venv.Dockerfile
+.vscode
@@ -0,0 +1,12 @@
+[submodule "pytorch_addons/third_party/googletest"]
+	path = pytorch_addons/third_party/googletest
+	url = [email protected]:google/googletest.git
+[submodule "tao/third_party/json"]
+	path = tao/third_party/json
+	url = https://github.com/nlohmann/json.git
+[submodule "tf_community"]
+	path = tf_community
+	url = [email protected]:pai-disc/tensorflow.git
+[submodule "tao/third_party/abseil-cpp"]
+	path = tao/third_party/abseil-cpp
+	url = [email protected]:abseil/abseil-cpp.git
@@ -0,0 +1 @@
+0.21.05
@@ -0,0 +1,14 @@
+ARG BASEIMAGE=nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
+FROM ${BASEIMAGE}
+
+COPY docker/scripts /install/scripts
+
+RUN bash /install/scripts/find-fastest-apt.sh && \
+    apt-get install -y python3 python3-pip git curl vim libssl-dev wget unzip openjdk-11-jdk && \
+    pip3 install --upgrade pip && \
+    ln -s /usr/bin/python3 /usr/bin/python && \
+    bash /install/scripts/install-cmake.sh && \
+    bash /install/scripts/install-bazel.sh && \
+    bash /install/scripts/install-python.sh
+
+ENV PATH="/opt/cmake/bin:${PATH}"
@@ -0,0 +1,15 @@
+
+FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime
+
+ADD ./docker/scripts /install/scripts
+RUN bash /install/scripts/find-fastest-apt.sh
+
+ADD ./build/torch_addons*.whl  /install/python/
+
+RUN apt-get update -y \
+    && apt-get install -y python3.6 python3-pip protobuf-compiler libprotobuf-dev cmake \
+    && ln -s /usr/bin/python3.6 /usr/bin/python \
+    && python3.6 -m pip install pip --upgrade \
+    && python3.6 -m pip install /install/python/torch_addons*.whl -f https://download.pytorch.org/whl/cu110/torch_stable.html
+
+ENV PATH /usr/bin:$PATH
@@ -0,0 +1,14 @@
+ARG BASEIMAGE=nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
+FROM ${BASEIMAGE}
+
+ARG WHEEL_FILE=blade_disc*.whl
+
+ADD ./docker/scripts /install/scripts
+RUN bash /install/scripts/find-fastest-apt.sh
+
+ADD ./build/${WHEEL_FILE}  /install/python/
+
+RUN apt-get install -y python3 python3-pip \
+    && pip3 install --upgrade pip \
+    && ln -s /usr/bin/python3.6 /usr/bin/python \
+    && pip install /install/python/${WHEEL_FILE}