Skip to content

Commit 57dd7a7

Browse files
fix: fix matrix multiply dimension problem
1 parent 44a811d commit 57dd7a7

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

gpu/cuda/matrix/matrix_multiply.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ int main() {
3636

3737
int *data_A = (int *) malloc(sizeof(int) * M * N);
3838
int *data_B = (int *) malloc(sizeof(int) * M * N);
39-
int *data_C = (int *) malloc(sizeof(int) * M * N);
39+
int *data_C = (int *) malloc(sizeof(int) * M * M);
4040
for (int i = 0; i < M * N; i++) {
4141
data_A[i] = i;
4242
data_B[i] = i;
@@ -95,7 +95,7 @@ int main() {
9595
HANDLE_ERROR(cudaMemcpy((void *) dev_C, (void *) C, sizeof(int *) * M, cudaMemcpyHostToDevice));
9696

9797
dim3 threadPerBlock(5, 5);
98-
dim3 numBlocks(M / threadPerBlock.x, N / threadPerBlock.y);
98+
dim3 numBlocks(M / threadPerBlock.x, M / threadPerBlock.y);
9999

100100
matrix_multiply <<<numBlocks, threadPerBlock>>> (dev_A, dev_B, dev_C);
101101

@@ -105,7 +105,7 @@ int main() {
105105
// print result:
106106
printf("The matrix multiply result is:\n");
107107
for (int i = 0; i < M; i++) {
108-
for (int j = 0; j < N ; j++) {
108+
for (int j = 0; j < M ; j++) {
109109
printf("%d ", data_C[i * M + j]);
110110
}
111111
printf("\n");

0 commit comments

Comments
 (0)