Skip to content

Commit 3b40593

Browse files
authored
feat: migrate HIP implementation to CUDA with HIPIFY support. (#49)
* feat: migrate HIP implementation to CUDA with hipify support. * feat: enable CPU compilation of CUDA projects.
1 parent 1368a06 commit 3b40593

File tree

12 files changed

+93
-269
lines changed

12 files changed

+93
-269
lines changed

nsy/cuda/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
nsy
2+
nsy.o
3+
nsy.cpp
4+
nsy.cu.hip

nsy/cuda/Makefile

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
ifndef CXX
2+
CXX := nvcc
3+
endif
4+
CXXFLAGS := -O3 -Wall -Wno-pedantic
5+
ifeq ($(CXX), nvcc)
6+
CXXFLAGS := -Xcompiler "$(CXXFLAGS)"
7+
endif
8+
LDFLAGS :=
9+
10+
TARGET := nsy
11+
12+
SOURCES := $(wildcard ./*.cu)
13+
OBJECTS := $(patsubst %.cu, %.o, $(SOURCES))
14+
15+
all: $(TARGET)
16+
17+
$(TARGET): $(OBJECTS)
18+
$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $^
19+
20+
ifneq (,$(filter $(CXX),clang clang++))
21+
CXXFLAGS += -march=native -std=c++17
22+
LDFLAGS += -ltbb
23+
24+
_SOURCES := $(SOURCES)
25+
SOURCES := $(patsubst %.cu, %.cpp, $(SOURCES))
26+
27+
$(SOURCES): $(_SOURCES)
28+
hipify-clang --hip-kernel-execution-syntax -o $@ $<
29+
endif
30+
31+
ifeq ($(CXX), hipcc)
32+
_SOURCES := $(SOURCES)
33+
SOURCES := $(patsubst %.cu, %.cu.hip, $(SOURCES))
34+
35+
$(SOURCES): $(_SOURCES)
36+
hipify-clang -o $@ $<
37+
endif
38+
39+
$(OBJECTS): $(SOURCES)
40+
$(CXX) $(CXXFLAGS) -c -o $@ $<
41+
42+
clean:
43+
rm -f nsy
44+
rm -f nsy.o
45+
rm -f nsy.cpp
46+
rm -f nsy.cu.hip

nsy/cuda/README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Build for CPU
2+
3+
Requirements: clang compiler, [HIP-CPU](https://github.com/ROCm/HIP-CPU)
4+
5+
```
6+
export CXX=clang++
7+
make
8+
```
9+
10+
# Build for NVIDIA GPUs
11+
12+
Requirements: NVIDIA Cuda compiler
13+
14+
```
15+
export CXX=nvcc
16+
make
17+
```
18+
19+
# Build for AMDGPUs
20+
21+
Requirements: HIP C++ compiler, AMD clang compiler
22+
23+
```
24+
export CXX=hipcc
25+
make
26+
```
Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,21 @@
1-
#ifdef _MSC_VER
2-
#include "msvc_defines.h"
3-
#endif
4-
#include <hip/hip_runtime.h>
1+
#include <cmath>
2+
#include <cuda_runtime.h>
53
#include <iostream>
64
#include <locale>
75
#include <string>
86
#include <vector>
97

10-
#define HIP_CHECK(status) \
11-
if (status != hipSuccess) { \
12-
fprintf(stderr, "error: '%s' at %s:%d\n", hipGetErrorString(status), \
8+
#define CUDA_CHECK(status) \
9+
if (status != cudaSuccess) { \
10+
fprintf(stderr, "error: '%s' at %s:%d\n", cudaGetErrorString(status), \
1311
__FILE__, __LINE__); \
1412
exit(0); \
1513
}
1614

1715
__global__ void kernel(const wchar_t *in, wchar_t *out, const size_t dim,
1816
const wchar_t mark) {
19-
const int idx = hipThreadIdx_x * dim + hipThreadIdx_y;
20-
if (hipBlockIdx_x) {
21-
out[idx * 2 + 1] = mark;
22-
} else {
23-
out[idx * 2] = in[idx];
24-
}
17+
const int idx = threadIdx.x * dim + threadIdx.y;
18+
out[idx * 2 + blockIdx.x] = blockIdx.x == 0 ? in[idx] : mark;
2519
}
2620

2721
wchar_t get_mark(char *s) {
@@ -41,7 +35,7 @@ int main(int argc, char *argv[]) {
4135
std::vector<wchar_t> str;
4236

4337
wchar_t c;
44-
while ((c = std::wcin.get()) != WEOF) {
38+
while ((c = std::wcin.get()) != (wchar_t)WEOF) {
4539
if (c == ' ') {
4640
continue;
4741
}
@@ -55,21 +49,22 @@ int main(int argc, char *argv[]) {
5549
const size_t input_size = sizeof(wchar_t) * dim * dim;
5650
const size_t output_length = dim * 2 * dim;
5751
const size_t output_size = sizeof(wchar_t) * output_length;
58-
HIP_CHECK(hipMalloc((void **)&input, input_size));
59-
HIP_CHECK(hipMalloc((void **)&output, output_size));
60-
HIP_CHECK(hipMemcpy(input, str.data(), sizeof(wchar_t) * length,
61-
hipMemcpyHostToDevice));
52+
CUDA_CHECK(cudaMalloc((void **)&input, input_size));
53+
CUDA_CHECK(cudaMalloc((void **)&output, output_size));
54+
CUDA_CHECK(cudaMemcpy(input, str.data(), sizeof(wchar_t) * length,
55+
cudaMemcpyHostToDevice));
6256

6357
const wchar_t mark = argc < 2 ? L'\xFF01' : get_mark(argv[1]);
6458
kernel<<<2, dim3(dim, dim), 0, 0>>>(input, output, dim, mark);
6559

6660
auto result = new wchar_t[output_length + 2]; // mark ... \x0000
6761
*result = mark;
68-
HIP_CHECK(hipMemcpy(result + 1, output, output_size, hipMemcpyDeviceToHost));
69-
result[length * 2 + 1] = 0;
62+
CUDA_CHECK(
63+
cudaMemcpy(result + 1, output, output_size, cudaMemcpyDeviceToHost));
64+
result[length * 2 + 1] = L'\0';
7065

71-
HIP_CHECK(hipFree(input));
72-
HIP_CHECK(hipFree(output));
66+
CUDA_CHECK(cudaFree(input));
67+
CUDA_CHECK(cudaFree(output));
7368

7469
std::wcout << result << L'\n';
7570

nsy/hip/.gitignore

Lines changed: 0 additions & 3 deletions
This file was deleted.

nsy/hip/Makefile

Lines changed: 0 additions & 20 deletions
This file was deleted.

nsy/hip/msvc_defines.h

Lines changed: 0 additions & 62 deletions
This file was deleted.

nsy/hip/nsy_hip.sln

Lines changed: 0 additions & 25 deletions
This file was deleted.

nsy/hip/nsy_hip.vcxproj

Lines changed: 0 additions & 106 deletions
This file was deleted.

0 commit comments

Comments
 (0)