80
80
if e .IsDir () {
81
81
continue
82
82
}
83
+ if strings .HasSuffix (e .Name (), ".log" ) {
84
+ continue
85
+ }
83
86
84
87
// Skip the llama.cpp variants if we are autoDetecting
85
88
// But we always load the fallback variant if it exists
@@ -265,12 +268,12 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
265
268
266
269
// starts the grpcModelProcess for the backend, and returns a grpc client
267
270
// It also loads the model
268
- func (ml * ModelLoader ) grpcModel (backend string , o * Options ) func (string , string ) (ModelAddress , error ) {
269
- return func (modelName , modelFile string ) (ModelAddress , error ) {
271
+ func (ml * ModelLoader ) grpcModel (backend string , o * Options ) func (string , string ) (* Model , error ) {
272
+ return func (modelName , modelFile string ) (* Model , error ) {
270
273
271
274
log .Debug ().Msgf ("Loading Model %s with gRPC (file: %s) (backend: %s): %+v" , modelName , modelFile , backend , * o )
272
275
273
- var client ModelAddress
276
+ var client * Model
274
277
275
278
getFreeAddress := func () (string , error ) {
276
279
port , err := freeport .GetFreePort ()
@@ -298,26 +301,26 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
298
301
log .Debug ().Msgf ("external backend is file: %+v" , fi )
299
302
serverAddress , err := getFreeAddress ()
300
303
if err != nil {
301
- return "" , fmt .Errorf ("failed allocating free ports: %s" , err .Error ())
304
+ return nil , fmt .Errorf ("failed allocating free ports: %s" , err .Error ())
302
305
}
303
306
// Make sure the process is executable
304
307
if err := ml .startProcess (uri , o .model , serverAddress ); err != nil {
305
308
log .Error ().Err (err ).Str ("path" , uri ).Msg ("failed to launch " )
306
- return "" , err
309
+ return nil , err
307
310
}
308
311
309
312
log .Debug ().Msgf ("GRPC Service Started" )
310
313
311
- client = ModelAddress (serverAddress )
314
+ client = NewModel (serverAddress )
312
315
} else {
313
316
log .Debug ().Msg ("external backend is uri" )
314
317
// address
315
- client = ModelAddress (uri )
318
+ client = NewModel (uri )
316
319
}
317
320
} else {
318
321
grpcProcess := backendPath (o .assetDir , backend )
319
322
if err := utils .VerifyPath (grpcProcess , o .assetDir ); err != nil {
320
- return "" , fmt .Errorf ("grpc process not found in assetdir: %s" , err .Error ())
323
+ return nil , fmt .Errorf ("grpc process not found in assetdir: %s" , err .Error ())
321
324
}
322
325
323
326
if autoDetect {
@@ -329,12 +332,12 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
329
332
330
333
// Check if the file exists
331
334
if _ , err := os .Stat (grpcProcess ); os .IsNotExist (err ) {
332
- return "" , fmt .Errorf ("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS" , grpcProcess )
335
+ return nil , fmt .Errorf ("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS" , grpcProcess )
333
336
}
334
337
335
338
serverAddress , err := getFreeAddress ()
336
339
if err != nil {
337
- return "" , fmt .Errorf ("failed allocating free ports: %s" , err .Error ())
340
+ return nil , fmt .Errorf ("failed allocating free ports: %s" , err .Error ())
338
341
}
339
342
340
343
args := []string {}
@@ -344,12 +347,12 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
344
347
345
348
// Make sure the process is executable in any circumstance
346
349
if err := ml .startProcess (grpcProcess , o .model , serverAddress , args ... ); err != nil {
347
- return "" , err
350
+ return nil , err
348
351
}
349
352
350
353
log .Debug ().Msgf ("GRPC Service Started" )
351
354
352
- client = ModelAddress (serverAddress )
355
+ client = NewModel (serverAddress )
353
356
}
354
357
355
358
// Wait for the service to start up
@@ -369,7 +372,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
369
372
370
373
if ! ready {
371
374
log .Debug ().Msgf ("GRPC Service NOT ready" )
372
- return "" , fmt .Errorf ("grpc service not ready" )
375
+ return nil , fmt .Errorf ("grpc service not ready" )
373
376
}
374
377
375
378
options := * o .gRPCOptions
@@ -380,27 +383,16 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
380
383
381
384
res , err := client .GRPC (o .parallelRequests , ml .wd ).LoadModel (o .context , & options )
382
385
if err != nil {
383
- return "" , fmt .Errorf ("could not load model: %w" , err )
386
+ return nil , fmt .Errorf ("could not load model: %w" , err )
384
387
}
385
388
if ! res .Success {
386
- return "" , fmt .Errorf ("could not load model (no success): %s" , res .Message )
389
+ return nil , fmt .Errorf ("could not load model (no success): %s" , res .Message )
387
390
}
388
391
389
392
return client , nil
390
393
}
391
394
}
392
395
393
- func (ml * ModelLoader ) resolveAddress (addr ModelAddress , parallel bool ) (grpc.Backend , error ) {
394
- if parallel {
395
- return addr .GRPC (parallel , ml .wd ), nil
396
- }
397
-
398
- if _ , ok := ml .grpcClients [string (addr )]; ! ok {
399
- ml .grpcClients [string (addr )] = addr .GRPC (parallel , ml .wd )
400
- }
401
- return ml .grpcClients [string (addr )], nil
402
- }
403
-
404
396
func (ml * ModelLoader ) BackendLoader (opts ... Option ) (client grpc.Backend , err error ) {
405
397
o := NewOptions (opts ... )
406
398
@@ -425,7 +417,6 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
425
417
log .Error ().Err (err ).Str ("keptModel" , o .model ).Msg ("error while shutting down all backends except for the keptModel" )
426
418
return nil , err
427
419
}
428
-
429
420
}
430
421
431
422
var backendToConsume string
@@ -438,26 +429,28 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
438
429
backendToConsume = backend
439
430
}
440
431
441
- addr , err := ml .LoadModel (o .model , ml .grpcModel (backendToConsume , o ))
432
+ model , err := ml .LoadModel (o .model , ml .grpcModel (backendToConsume , o ))
442
433
if err != nil {
443
434
return nil , err
444
435
}
445
436
446
- return ml . resolveAddress ( addr , o .parallelRequests )
437
+ return model . GRPC ( o .parallelRequests , ml . wd ), nil
447
438
}
448
439
449
440
func (ml * ModelLoader ) GreedyLoader (opts ... Option ) (grpc.Backend , error ) {
450
441
o := NewOptions (opts ... )
451
442
452
443
ml .mu .Lock ()
444
+
453
445
// Return earlier if we have a model already loaded
454
446
// (avoid looping through all the backends)
455
- if m := ml .CheckIsLoaded (o .model ); m != "" {
447
+ if m := ml .CheckIsLoaded (o .model ); m != nil {
456
448
log .Debug ().Msgf ("Model '%s' already loaded" , o .model )
457
449
ml .mu .Unlock ()
458
450
459
- return ml . resolveAddress ( m , o .parallelRequests )
451
+ return m . GRPC ( o .parallelRequests , ml . wd ), nil
460
452
}
453
+
461
454
// If we can have only one backend active, kill all the others (except external backends)
462
455
if o .singleActiveBackend {
463
456
log .Debug ().Msgf ("Stopping all backends except '%s'" , o .model )
0 commit comments