Skip to content

Commit 0ad5994

Browse files
nicklyuNikita.Lyubimovaozherelyeva
authored
Add support for Google's "thinking" mode in generation config (#414)
Co-authored-by: Nikita.Lyubimov <[email protected]> Co-authored-by: Anastasiia.Zarechneva <[email protected]>
1 parent 090c70d commit 0ad5994

File tree

5 files changed

+64
-3
lines changed

5 files changed

+64
-3
lines changed

prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/DataModel.kt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,8 @@ internal class GoogleFunctionDeclaration(
238238
* @property numberOfChoices The number of reply choices to generate.
239239
* @property topP The maximum cumulative probability of tokens to consider when sampling.
240240
* @property topK The maximum number of tokens to consider when sampling.
241+
* @property thinkingConfig Controls whether the model should expose its chain-of-thought
242+
* and how many tokens it may spend on it (see [GoogleThinkingConfig]).
241243
*/
242244
@Serializable
243245
internal class GoogleGenerationConfig(
@@ -249,6 +251,7 @@ internal class GoogleGenerationConfig(
249251
val numberOfChoices: Int? = null,
250252
val topP: Double? = null,
251253
val topK: Int? = null,
254+
val thinkingConfig: GoogleThinkingConfig? = null
252255
)
253256

254257
/**
@@ -263,6 +266,20 @@ internal class GoogleToolConfig(
263266
val functionCallingConfig: GoogleFunctionCallingConfig? = null,
264267
)
265268

269+
/**
270+
* Optional block that controls Gemini's "thinking" mode.
271+
*
272+
* @property includeThoughts When set to `true`, the model will return its intermediate reasoning.
273+
* @property thinkingBudget Token limit for reasoning, `0` disables it (Flash 2.5).
274+
*
275+
* API reference: https://ai.google.dev/gemini-api/docs/thinking#set-budget
276+
*/
277+
@Serializable
278+
internal data class GoogleThinkingConfig(
279+
val includeThoughts: Boolean? = null,
280+
val thinkingBudget: Int? = null
281+
)
282+
266283
/**
267284
* Configuration for tool calling
268285
*

prompt/prompt-executor/prompt-executor-clients/prompt-executor-google-client/src/commonMain/kotlin/ai/koog/prompt/executor/clients/google/GoogleLLMClient.kt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ import kotlin.uuid.Uuid
6161
*/
6262
public class GoogleClientSettings(
6363
public val baseUrl: String = "https://generativelanguage.googleapis.com",
64-
public val timeoutConfig: ConnectionTimeoutConfig = ConnectionTimeoutConfig()
64+
public val timeoutConfig: ConnectionTimeoutConfig = ConnectionTimeoutConfig(),
6565
)
6666

6767
/**
@@ -335,6 +335,10 @@ public open class GoogleLLMClient(
335335
null
336336
},
337337
maxOutputTokens = 2048,
338+
thinkingConfig = GoogleThinkingConfig(
339+
includeThoughts = prompt.params.includeThoughts.takeIf { it == true },
340+
thinkingBudget = prompt.params.thinkingBudget
341+
).takeIf { it.includeThoughts != null || it.thinkingBudget != null }
338342
)
339343

340344
val functionCallingConfig = when (val toolChoice = prompt.params.toolChoice) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package ai.koog.prompt.executor.clients.google
2+
3+
import kotlinx.serialization.json.Json
4+
import org.junit.jupiter.api.Assertions.assertTrue
5+
import kotlin.test.Test
6+
7+
class ThinkingConfigTest {
8+
@Test
9+
fun serializeThinkingBudget() {
10+
val cfg = GoogleGenerationConfig(
11+
thinkingConfig = GoogleThinkingConfig(thinkingBudget = 0)
12+
)
13+
val json = Json.encodeToString(GoogleGenerationConfig.serializer(), cfg)
14+
assertTrue("\"thinkingBudget\":0" in json)
15+
}
16+
}

prompt/prompt-model/src/commonMain/kotlin/ai/koog/prompt/dsl/Prompt.kt

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,22 @@ public data class Prompt(
137137
*
138138
* @property user An optional user identifier that can be used for tracking or personalization purposes. This property
139139
* is mutable to allow updates to the user context.
140+
*
141+
* @property includeThoughts If `true`, requests the model to add reasoning blocks to the response. Defaults to `null`.
142+
* When set to `true`, responses may include detailed reasoning steps.
143+
* When `false` or `null`, responses are typically shorter and faster.
144+
*
145+
* @property thinkingBudget Hard cap for reasoning tokens. Ignored by models that don't support budgets.
146+
* This can be used to limit the amount of tokens used for reasoning when `includeThoughts` is enabled.
140147
*/
141148
public class LLMParamsUpdateContext internal constructor(
142149
public var temperature: Double?,
143150
public var speculation: String?,
144151
public var schema: Schema?,
145152
public var toolChoice: ToolChoice?,
146153
public var user: String? = null,
154+
public var includeThoughts: Boolean? = null,
155+
public var thinkingBudget: Int? = null,
147156
) {
148157
/**
149158
* Secondary constructor for `LLMParamsUpdateContext` that initializes the context using an
@@ -157,7 +166,9 @@ public data class Prompt(
157166
params.speculation,
158167
params.schema,
159168
params.toolChoice,
160-
params.user
169+
params.user,
170+
params.includeThoughts,
171+
params.thinkingBudget
161172
)
162173

163174
/**
@@ -171,7 +182,9 @@ public data class Prompt(
171182
speculation = speculation,
172183
schema = schema,
173184
toolChoice = toolChoice,
174-
user = user
185+
user = user,
186+
includeThoughts = includeThoughts,
187+
thinkingBudget = thinkingBudget
175188
)
176189
}
177190

prompt/prompt-model/src/commonMain/kotlin/ai/koog/prompt/params/LLMParams.kt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@ import kotlinx.serialization.json.JsonObject
1818
*
1919
* @property user An optional identifier for the user making the request, which can be used for tracking purposes.
2020
*
21+
* @property includeThoughts If `true`, requests the model to add reasoning blocks to the response. Defaults to `null`.
22+
* When set to `true`, responses may include detailed reasoning steps.
23+
* When `false` or `null`, responses are typically shorter and faster.
24+
*
25+
* @property thinkingBudget Hard cap for reasoning tokens. Ignored by models that don't support budgets.
26+
* This can be used to limit the amount of tokens used for reasoning when `includeThoughts` is enabled.
27+
*
2128
* This class also includes a nested `Builder` class to facilitate constructing instances in a more
2229
* customizable and incremental way.
2330
*/
@@ -29,6 +36,8 @@ public data class LLMParams(
2936
val schema: Schema? = null,
3037
val toolChoice: ToolChoice? = null,
3138
val user: String? = null,
39+
val includeThoughts: Boolean? = null,
40+
val thinkingBudget: Int? = null,
3241
) {
3342
init {
3443
temperature?.let { temp ->
@@ -64,6 +73,8 @@ public data class LLMParams(
6473
speculation = speculation ?: default.speculation,
6574
schema = schema ?: default.schema,
6675
user = user ?: default.user,
76+
includeThoughts = includeThoughts ?: default.includeThoughts,
77+
thinkingBudget = thinkingBudget ?: default.thinkingBudget,
6778
)
6879

6980
/**

0 commit comments

Comments
 (0)