@@ -38,7 +38,10 @@ const (
38
38
LLamaCPPFallback = "llama-cpp-fallback"
39
39
LLamaCPPCUDA = "llama-cpp-cuda"
40
40
LLamaCPPHipblas = "llama-cpp-hipblas"
41
- LLamaCPPGRPC = "llama-cpp-grpc"
41
+ LLamaCPPSycl16 = "llama-cpp-sycl_16"
42
+ LLamaCPPSycl32 = "llama-cpp-sycl_32"
43
+
44
+ LLamaCPPGRPC = "llama-cpp-grpc"
42
45
43
46
Gpt4AllLlamaBackend = "gpt4all-llama"
44
47
Gpt4AllMptBackend = "gpt4all-mpt"
94
97
if autoDetect {
95
98
// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
96
99
// when starting the service
97
- foundLCPPAVX , foundLCPPAVX2 , foundLCPPFallback , foundLCPPGRPC , foundLCPPCuda , foundLCPPHipblas := false , false , false , false , false , false
100
+ foundLCPPAVX , foundLCPPAVX2 , foundLCPPFallback , foundLCPPGRPC , foundLCPPCuda , foundLCPPHipblas , foundSycl16 , foundSycl32 := false , false , false , false , false , false , false , false
98
101
if _ , ok := backends [LLamaCPP ]; ! ok {
99
102
for _ , e := range entry {
100
103
if strings .Contains (e .Name (), LLamaCPPAVX2 ) && ! foundLCPPAVX2 {
@@ -121,6 +124,14 @@ ENTRY:
121
124
backends [LLamaCPP ] = append (backends [LLamaCPP ], LLamaCPPHipblas )
122
125
foundLCPPHipblas = true
123
126
}
127
+ if strings .Contains (e .Name (), LLamaCPPSycl16 ) && ! foundSycl16 {
128
+ backends [LLamaCPP ] = append (backends [LLamaCPP ], LLamaCPPSycl16 )
129
+ foundSycl16 = true
130
+ }
131
+ if strings .Contains (e .Name (), LLamaCPPSycl32 ) && ! foundSycl32 {
132
+ backends [LLamaCPP ] = append (backends [LLamaCPP ], LLamaCPPSycl32 )
133
+ foundSycl32 = true
134
+ }
124
135
}
125
136
}
126
137
}
@@ -172,9 +183,10 @@ ENTRY:
172
183
}
173
184
174
185
// selectGRPCProcess selects the GRPC process to start based on system capabilities
175
- func selectGRPCProcess (backend , assetDir string ) string {
186
+ func selectGRPCProcess (backend , assetDir string , f16 bool ) string {
176
187
foundCUDA := false
177
188
foundAMDGPU := false
189
+ foundIntelGPU := false
178
190
var grpcProcess string
179
191
180
192
// Select backend now just for llama.cpp
@@ -211,10 +223,24 @@ func selectGRPCProcess(backend, assetDir string) string {
211
223
log .Info ().Msgf ("GPU device found but no HIPBLAS backend present" )
212
224
}
213
225
}
226
+ if strings .Contains (gpu .String (), "intel" ) {
227
+ backend := LLamaCPPSycl16
228
+ if ! f16 {
229
+ backend = LLamaCPPSycl32
230
+ }
231
+ p := backendPath (assetDir , backend )
232
+ if _ , err := os .Stat (p ); err == nil {
233
+ log .Info ().Msgf ("[%s] attempting to load with Intel variant" , backend )
234
+ grpcProcess = p
235
+ foundIntelGPU = true
236
+ } else {
237
+ log .Info ().Msgf ("GPU device found but no Intel backend present" )
238
+ }
239
+ }
214
240
}
215
241
}
216
242
217
- if foundCUDA || foundAMDGPU {
243
+ if foundCUDA || foundAMDGPU || foundIntelGPU {
218
244
return grpcProcess
219
245
}
220
246
@@ -236,6 +262,7 @@ func selectGRPCProcess(backend, assetDir string) string {
236
262
// It also loads the model
237
263
func (ml * ModelLoader ) grpcModel (backend string , o * Options ) func (string , string ) (ModelAddress , error ) {
238
264
return func (modelName , modelFile string ) (ModelAddress , error ) {
265
+
239
266
log .Debug ().Msgf ("Loading Model %s with gRPC (file: %s) (backend: %s): %+v" , modelName , modelFile , backend , * o )
240
267
241
268
var client ModelAddress
@@ -284,7 +311,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
284
311
285
312
if autoDetect {
286
313
// autoDetect GRPC process to start based on system capabilities
287
- if selectedProcess := selectGRPCProcess (backend , o .assetDir ); selectedProcess != "" {
314
+ if selectedProcess := selectGRPCProcess (backend , o .assetDir , o . gRPCOptions . F16Memory ); selectedProcess != "" {
288
315
grpcProcess = selectedProcess
289
316
}
290
317
}
0 commit comments