Add support for Google's "thinking" mode in generation config (#414)

nicklyu · Nikita.Lyubimov · aozherelyeva · web-flow · commit 0ad59941a37a · 2025-08-11T10:12:42.000+02:00
Co-authored-by: Nikita.Lyubimov &lt;nikita.lyubimov@jetbrains.com&gt;
Co-authored-by: Anastasiia.Zarechneva &lt;Anastasiia.Zarechneva@jetbrains.com&gt;
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/DataModel.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/DataModel.kt
@@ -238,6 +238,8 @@ internal class GoogleFunctionDeclaration(
  * @property numberOfChoices The number of reply choices to generate.
  * @property topP The maximum cumulative probability of tokens to consider when sampling.
  * @property topK The maximum number of tokens to consider when sampling.
+ * @property thinkingConfig Controls whether the model should expose its chain-of-thought
+ * and how many tokens it may spend on it (see [GoogleThinkingConfig]).
  */
 @Serializable
 internal class GoogleGenerationConfig(
@@ -249,6 +251,7 @@ internal class GoogleGenerationConfig(
     val numberOfChoices: Int? = null,
     val topP: Double? = null,
     val topK: Int? = null,
+    val thinkingConfig: GoogleThinkingConfig? = null
 )
 
 /**
@@ -263,6 +266,20 @@ internal class GoogleToolConfig(
     val functionCallingConfig: GoogleFunctionCallingConfig? = null,
 )
 
+/**
+ * Optional block that controls Gemini's "thinking" mode.
+ *
+ * @property includeThoughts When set to `true`, the model will return its intermediate reasoning.
+ * @property thinkingBudget Token limit for reasoning, `0` disables it (Flash 2.5).
+ *
+ * API reference: https://ai.google.dev/gemini-api/docs/thinking#set-budget
+ */
+@Serializable
+internal data class GoogleThinkingConfig(
+    val includeThoughts: Boolean? = null,
+    val thinkingBudget: Int? = null
+)
+
 /**
  * Configuration for tool calling
  *
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleLLMClient.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleLLMClient.kt
@@ -61,7 +61,7 @@ import kotlin.uuid.Uuid
  */
 public class GoogleClientSettings(
     public val baseUrl: String = "https://generativelanguage.googleapis.com",
-    public val timeoutConfig: ConnectionTimeoutConfig = ConnectionTimeoutConfig()
+    public val timeoutConfig: ConnectionTimeoutConfig = ConnectionTimeoutConfig(),
 )
 
 /**
@@ -335,6 +335,10 @@ public open class GoogleLLMClient(
                 null
             },
             maxOutputTokens = 2048,
+            thinkingConfig = GoogleThinkingConfig(
+                includeThoughts = prompt.params.includeThoughts.takeIf { it == true },
+                thinkingBudget = prompt.params.thinkingBudget
+            ).takeIf { it.includeThoughts != null || it.thinkingBudget != null }
         )
 
         val functionCallingConfig = when (val toolChoice = prompt.params.toolChoice) {
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/jvmTest/kotlin/ai/koog/prompt/executor/clients/google/ThinkingConfigTest.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/jvmTest/kotlin/ai/koog/prompt/executor/clients/google/ThinkingConfigTest.kt
@@ -0,0 +1,16 @@
+package ai.koog.prompt.executor.clients.google
+
+import kotlinx.serialization.json.Json
+import org.junit.jupiter.api.Assertions.assertTrue
+import kotlin.test.Test
+
+class ThinkingConfigTest {
+    @Test
+    fun serializeThinkingBudget() {
+        val cfg = GoogleGenerationConfig(
+            thinkingConfig = GoogleThinkingConfig(thinkingBudget = 0)
+        )
+        val json = Json.encodeToString(GoogleGenerationConfig.serializer(), cfg)
+        assertTrue("\"thinkingBudget\":0" in json)
+    }
+}
diff --git a/prompt/prompt-model/src/commonMain/kotlin/ai/koog/prompt/dsl/Prompt.kt b/prompt/prompt-model/src/commonMain/kotlin/ai/koog/prompt/dsl/Prompt.kt
@@ -137,13 +137,22 @@ public data class Prompt(
      *
      * @property user An optional user identifier that can be used for tracking or personalization purposes. This property
      * is mutable to allow updates to the user context.
+     *
+     * @property includeThoughts If `true`, requests the model to add reasoning blocks to the response. Defaults to `null`.
+     * When set to `true`, responses may include detailed reasoning steps.
+     * When `false` or `null`, responses are typically shorter and faster.
+     *
+     * @property thinkingBudget Hard cap for reasoning tokens. Ignored by models that don't support budgets.
+     * This can be used to limit the amount of tokens used for reasoning when `includeThoughts` is enabled.
      */
     public class LLMParamsUpdateContext internal constructor(
         public var temperature: Double?,
         public var speculation: String?,
         public var schema: Schema?,
         public var toolChoice: ToolChoice?,
         public var user: String? = null,
+        public var includeThoughts: Boolean? = null,
+        public var thinkingBudget: Int? = null,
     ) {
         /**
          * Secondary constructor for `LLMParamsUpdateContext` that initializes the context using an
@@ -157,7 +166,9 @@ public data class Prompt(
             params.speculation,
             params.schema,
             params.toolChoice,
-            params.user
+            params.user,
+            params.includeThoughts,
+            params.thinkingBudget
         )
 
         /**
@@ -171,7 +182,9 @@ public data class Prompt(
             speculation = speculation,
             schema = schema,
             toolChoice = toolChoice,
-            user = user
+            user = user,
+            includeThoughts = includeThoughts,
+            thinkingBudget = thinkingBudget
         )
     }
 
diff --git a/prompt/prompt-model/src/commonMain/kotlin/ai/koog/prompt/params/LLMParams.kt b/prompt/prompt-model/src/commonMain/kotlin/ai/koog/prompt/params/LLMParams.kt
@@ -18,6 +18,13 @@ import kotlinx.serialization.json.JsonObject
  *
  * @property user An optional identifier for the user making the request, which can be used for tracking purposes.
  *
+ * @property includeThoughts If `true`, requests the model to add reasoning blocks to the response. Defaults to `null`.
+ * When set to `true`, responses may include detailed reasoning steps.
+ * When `false` or `null`, responses are typically shorter and faster.
+ *
+ * @property thinkingBudget Hard cap for reasoning tokens. Ignored by models that don't support budgets.
+ * This can be used to limit the amount of tokens used for reasoning when `includeThoughts` is enabled.
+ *
  * This class also includes a nested `Builder` class to facilitate constructing instances in a more
  * customizable and incremental way.
  */
@@ -29,6 +36,8 @@ public data class LLMParams(
     val schema: Schema? = null,
     val toolChoice: ToolChoice? = null,
     val user: String? = null,
+    val includeThoughts: Boolean? = null,
+    val thinkingBudget: Int? = null,
 ) {
     init {
         temperature?.let { temp ->
@@ -64,6 +73,8 @@ public data class LLMParams(
         speculation = speculation ?: default.speculation,
         schema = schema ?: default.schema,
         user = user ?: default.user,
+        includeThoughts = includeThoughts ?: default.includeThoughts,
+        thinkingBudget = thinkingBudget ?: default.thinkingBudget,
     )
 
     /**