WIP ContextWindowStrategy

ptitjes · ptitjes · commit 3ed055c56d79 · 2025-07-14T11:25:23.000+02:00
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/build.gradle.kts b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/build.gradle.kts
@@ -12,6 +12,7 @@ kotlin {
     sourceSets {
         commonMain {
             dependencies {
+                api(project(":agents:agents-core"))
                 api(project(":agents:agents-tools"))
                 api(project(":prompt:prompt-llm"))
                 api(project(":prompt:prompt-model"))
@@ -63,4 +64,4 @@ kotlin {
     explicitApi()
 }
 
-publishToMaven()
+publishToMaven()
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/src/commonMain/kotlin/ai/koog/prompt/executor/ollama/client/ContextWindowStrategy.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/src/commonMain/kotlin/ai/koog/prompt/executor/ollama/client/ContextWindowStrategy.kt
@@ -0,0 +1,76 @@
+package ai.koog.prompt.executor.ollama.client
+
+import ai.koog.agents.core.annotation.ExperimentalAgentsApi
+import ai.koog.prompt.dsl.Prompt
+import ai.koog.prompt.llm.LLModel
+import kotlin.math.round
+
+/**
+ * Represents a strategy for computing the context window length for `OllamaClient` requests and responses.
+ * Different implementations define specific approaches to computing the context window length.
+ * Based on the context window length computed by the strategy, Ollama will truncate the context window accordingly.
+ *
+ * Example implementations:
+ * - [ContextWindowStrategy.Default]
+ * - [ContextWindowStrategy.Fixed]
+ * - [ContextWindowStrategy.ModelBased]
+ */
+@ExperimentalAgentsApi
+public interface ContextWindowStrategy {
+
+    public data class ContextWindow(
+        val maxRequestTokenCount: Int? = null,
+        val maxResponseTokenCount: Int? = null,
+    )
+
+    public fun computeContextWindow(prompt: Prompt, model: LLModel): ContextWindow
+
+    public companion object {
+
+        /**
+         * A strategy for letting the Ollama server decide the context window.
+         * Ollama decides the  to see if a `num_ctx` is present in the model definition,
+         */
+        public data object Default : ContextWindowStrategy {
+            override fun computeContextWindow(prompt: Prompt, model: LLModel): ContextWindow = ContextWindow(
+                maxRequestTokenCount = null,
+                maxResponseTokenCount = null,
+            )
+        }
+
+        public data class Fixed(
+            val maxRequestTokenCount: Int? = null,
+            val maxResponseTokenCount: Int? = null,
+        ) : ContextWindowStrategy {
+            override fun computeContextWindow(
+                prompt: Prompt,
+                model: LLModel,
+            ): ContextWindow = ContextWindow(
+                maxRequestTokenCount = maxRequestTokenCount,
+                maxResponseTokenCount = maxResponseTokenCount,
+            )
+        }
+
+        public data class ModelBased(
+            val requestRatio: Double? = null,
+            val responseRatio: Double? = null,
+        ) : ContextWindowStrategy {
+            init {
+                require(requestRatio + responseRatio <= 1.0) {
+                    "Total ratio must be less than 1.0 but is ${requestRatio + responseRatio}"
+                }
+            }
+
+            override fun computeContextWindow(
+                prompt: Prompt,
+                model: LLModel,
+            ): ContextWindow {
+                val contextLength = 10000 // model.contextLength
+                return ContextWindow(
+                    maxRequestTokenCount = round(contextLength * responseRatio).toInt(),
+                    maxResponseTokenCount = round(contextLength * requestRatio).toInt(),
+                )
+            }
+        }
+    }
+}
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/src/commonMain/kotlin/ai/koog/prompt/executor/ollama/client/dto/OllamaModels.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/src/commonMain/kotlin/ai/koog/prompt/executor/ollama/client/dto/OllamaModels.kt
@@ -72,6 +72,8 @@ internal data class OllamaChatRequestDTO(
     internal data class Options(
         val temperature: Double? = null,
         val seed: Int? = null,
+        @SerialName("num_ctx") val numCtx: Int? = null,
+        @SerialName("num_predict") val numPredict: Int? = null,
     )
 }
 

Original file line number	Diff line number	Diff line change
`@@ -72,6 +72,8 @@ internal data class OllamaChatRequestDTO(`
`72`	`72`	`internal data class Options(`
`73`	`73`	`val temperature: Double? = null,`
`74`	`74`	`val seed: Int? = null,`
	`75`	`+ @SerialName("num_ctx") val numCtx: Int? = null,`
	`76`	`+ @SerialName("num_predict") val numPredict: Int? = null,`
`75`	`77`	`)`
`76`	`78`	`}`
`77`	`79`