Skip to content

Commit 5d892f8

Browse files
authored
chore(cuda): reduce binary size (#3379)
fix(cuda): reduce binary size Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 7f06954 commit 5d892f8

File tree

2 files changed

+9
-2
lines changed

2 files changed

+9
-2
lines changed

Dockerfile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,14 @@ COPY --from=grpc /opt/grpc /usr/local
286286
WORKDIR /build
287287

288288
## Build the binary
289-
RUN make build
289+
## If it's CUDA, we want to skip some of the llama-compat backends to save space
290+
## We only leave the most CPU-optimized variant and the fallback for the cublas build
291+
## (both will use CUDA for the actual computation)
292+
RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
293+
SKIP_GRPC_BACKEND="backend-assets/grpc/llama-cpp-avx backend-assets/grpc/llama-cpp-avx2" make build; \
294+
else \
295+
make build; \
296+
fi
290297

291298
RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
292299
mkdir -p /build/sources/go-piper/piper-phonemize/pi/lib/ \

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
88
# llama.cpp versions
99
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
1010
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
11-
CPPLLAMA_VERSION?=3ba780e2a8f0ffe13f571b27f0bbf2ca5a199efc
11+
CPPLLAMA_VERSION?=e11bd856d538e44d24d8cad4b0381fba0984d162
1212

1313
# go-rwkv version
1414
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp

0 commit comments

Comments
 (0)