Skip to content

Commit dfadc36

Browse files
authored
feat(llama.cpp): allow to set kv-overrides (#5745)
Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent dbcf5fb commit dfadc36

File tree

4 files changed

+13
-1
lines changed

4 files changed

+13
-1
lines changed

backend/backend.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,8 @@ message ModelOptions {
258258
repeated GrammarTrigger GrammarTriggers = 65;
259259

260260
bool Reranking = 71;
261+
262+
repeated string Overrides = 72;
261263
}
262264

263265
message Result {

backend/cpp/llama/grpc-server.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "backend.pb.h"
1414
#include "backend.grpc.pb.h"
15+
#include "common.h"
1516
#include <getopt.h>
1617
#include <grpcpp/ext/proto_server_reflection_plugin.h>
1718
#include <grpcpp/grpcpp.h>
@@ -260,6 +261,13 @@ static void params_parse(const backend::ModelOptions* request,
260261
}
261262
}
262263

264+
// Add kv_overrides
265+
if (request->overrides_size() > 0) {
266+
for (int i = 0; i < request->overrides_size(); i++) {
267+
string_parse_kv_override(request->overrides(i).c_str(), params.kv_overrides);
268+
}
269+
}
270+
263271
// TODO: Add yarn
264272

265273
if (!request->tensorsplit().empty()) {

core/backend/options.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
143143
CLIPModel: c.Diffusers.ClipModel,
144144
CLIPSubfolder: c.Diffusers.ClipSubFolder,
145145
Options: c.Options,
146+
Overrides: c.Overrides,
146147
CLIPSkip: int32(c.Diffusers.ClipSkip),
147148
ControlNet: c.Diffusers.ControlNet,
148149
ContextSize: int32(ctxSize),

core/config/backend_config.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ type BackendConfig struct {
7070
Description string `yaml:"description"`
7171
Usage string `yaml:"usage"`
7272

73-
Options []string `yaml:"options"`
73+
Options []string `yaml:"options"`
74+
Overrides []string `yaml:"overrides"`
7475
}
7576

7677
// Pipeline defines other models to use for audio-to-audio

0 commit comments

Comments
 (0)