Skip to content

Commit b8f0472

Browse files
yanjun.qiuhouqi
authored andcommitted
fix cuda>=12.8 build
1 parent ffb34a7 commit b8f0472

File tree

5 files changed

+23
-5
lines changed

5 files changed

+23
-5
lines changed

3rdparty/cutlass

Submodule cutlass updated 2045 files

include/flux/cuda/cuda_common_device.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@
1717

1818
#pragma once
1919

20+
2021
#include "cute/util/debug.hpp"
2122
#include "cute/util/print.hpp"
2223
#include "cutlass/detail/helper_macros.hpp"
2324
#include <cstddef>
2425
#include <cstring>
2526

26-
#if defined(CUDA_VERSION) && CUDA_VERSION > 12080
27+
#include <cuda.h>
28+
#if defined(CUDA_VERSION) && CUDA_VERSION >= 12080
2729
#include <cuda/atomic>
2830
#else
2931
#include <cuda/std/atomic>

src/gemm_rs/reduce_scatter_kernel.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include <ostream>
2525
#include <type_traits>
2626
#include <cuda_runtime_api.h>
27-
#include <cuda/std/atomic>
2827
#include <utility>
2928
#include "cutlass/detail/helper_macros.hpp"
3029
#ifdef FLUX_SHM_USE_NVSHMEM
@@ -52,6 +51,13 @@
5251
#include <nccl.h>
5352
#endif
5453

54+
#include <cuda.h>
55+
#if defined(CUDA_VERSION) && CUDA_VERSION >= 12080
56+
#include <cuda/atomic>
57+
#else
58+
#include <cuda/std/atomic>
59+
#endif
60+
5561
#define NextRank(rank_) (((rank_) + 1) % kLocalWorldSize)
5662
#define NextLocalRank(rank_, node_) ((((rank_) + 1) % kNumaWorldSize + (node_) * kNumaWorldSize))
5763
#define PrevLocalRank(rank_, node_) \

src/moe_ag_scatter/cutlass_impls/ag_scatter_gemm_grouped_with_absmax.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,20 @@
3737

3838
#pragma once
3939

40-
4140
#include "cutlass/cutlass.h"
4241
#include "cutlass/float8.h"
4342
#include "cutlass/matrix_coord.h"
4443
#include "cutlass/complex.h"
4544
#include "cutlass/gemm/kernel/gemm_transpose_operands.h"
4645
#include <ctime>
4746
#include <type_traits>
47+
48+
#include <cuda.h>
49+
#if defined(CUDA_VERSION) && CUDA_VERSION >= 12080
50+
#include <cuda/atomic>
51+
#else
4852
#include <cuda/std/atomic>
53+
#endif
4954
#include "ag_scatter_grouped_problem_visitor.hpp"
5055

5156
/////////////////////////////////////////////////////////////////////////////////////////////////

src/moe_gather_rs/cutlass_impls/gather_rs_gemm_grouped_with_absmax.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,13 @@
5252
#include <type_traits>
5353

5454
#include "cutlass/barrier.h"
55-
#include <cuda/std/atomic>
5655

56+
#include <cuda.h>
57+
#if defined(CUDA_VERSION) && CUDA_VERSION >= 12080
58+
#include <cuda/atomic>
59+
#else
60+
#include <cuda/std/atomic>
61+
#endif
5762
/////////////////////////////////////////////////////////////////////////////////////////////////
5863

5964
namespace cutlass {

0 commit comments

Comments
 (0)