Skip to content

Commit 3ed055c

Browse files
committed
WIP ContextWindowStrategy
1 parent 9ba587c commit 3ed055c

File tree

3 files changed

+80
-1
lines changed

3 files changed

+80
-1
lines changed

prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/build.gradle.kts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ kotlin {
1212
sourceSets {
1313
commonMain {
1414
dependencies {
15+
api(project(":agents:agents-core"))
1516
api(project(":agents:agents-tools"))
1617
api(project(":prompt:prompt-llm"))
1718
api(project(":prompt:prompt-model"))
@@ -63,4 +64,4 @@ kotlin {
6364
explicitApi()
6465
}
6566

66-
publishToMaven()
67+
publishToMaven()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package ai.koog.prompt.executor.ollama.client
2+
3+
import ai.koog.agents.core.annotation.ExperimentalAgentsApi
4+
import ai.koog.prompt.dsl.Prompt
5+
import ai.koog.prompt.llm.LLModel
6+
import kotlin.math.round
7+
8+
/**
9+
* Represents a strategy for computing the context window length for `OllamaClient` requests and responses.
10+
* Different implementations define specific approaches to computing the context window length.
11+
* Based on the context window length computed by the strategy, Ollama will truncate the context window accordingly.
12+
*
13+
* Example implementations:
14+
* - [ContextWindowStrategy.Default]
15+
* - [ContextWindowStrategy.Fixed]
16+
* - [ContextWindowStrategy.ModelBased]
17+
*/
18+
@ExperimentalAgentsApi
19+
public interface ContextWindowStrategy {
20+
21+
public data class ContextWindow(
22+
val maxRequestTokenCount: Int? = null,
23+
val maxResponseTokenCount: Int? = null,
24+
)
25+
26+
public fun computeContextWindow(prompt: Prompt, model: LLModel): ContextWindow
27+
28+
public companion object {
29+
30+
/**
31+
* A strategy for letting the Ollama server decide the context window.
32+
* Ollama decides the to see if a `num_ctx` is present in the model definition,
33+
*/
34+
public data object Default : ContextWindowStrategy {
35+
override fun computeContextWindow(prompt: Prompt, model: LLModel): ContextWindow = ContextWindow(
36+
maxRequestTokenCount = null,
37+
maxResponseTokenCount = null,
38+
)
39+
}
40+
41+
public data class Fixed(
42+
val maxRequestTokenCount: Int? = null,
43+
val maxResponseTokenCount: Int? = null,
44+
) : ContextWindowStrategy {
45+
override fun computeContextWindow(
46+
prompt: Prompt,
47+
model: LLModel,
48+
): ContextWindow = ContextWindow(
49+
maxRequestTokenCount = maxRequestTokenCount,
50+
maxResponseTokenCount = maxResponseTokenCount,
51+
)
52+
}
53+
54+
public data class ModelBased(
55+
val requestRatio: Double? = null,
56+
val responseRatio: Double? = null,
57+
) : ContextWindowStrategy {
58+
init {
59+
require(requestRatio + responseRatio <= 1.0) {
60+
"Total ratio must be less than 1.0 but is ${requestRatio + responseRatio}"
61+
}
62+
}
63+
64+
override fun computeContextWindow(
65+
prompt: Prompt,
66+
model: LLModel,
67+
): ContextWindow {
68+
val contextLength = 10000 // model.contextLength
69+
return ContextWindow(
70+
maxRequestTokenCount = round(contextLength * responseRatio).toInt(),
71+
maxResponseTokenCount = round(contextLength * requestRatio).toInt(),
72+
)
73+
}
74+
}
75+
}
76+
}

prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/src/commonMain/kotlin/ai/koog/prompt/executor/ollama/client/dto/OllamaModels.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ internal data class OllamaChatRequestDTO(
7272
internal data class Options(
7373
val temperature: Double? = null,
7474
val seed: Int? = null,
75+
@SerialName("num_ctx") val numCtx: Int? = null,
76+
@SerialName("num_predict") val numPredict: Int? = null,
7577
)
7678
}
7779

0 commit comments

Comments
 (0)