Skip to content

Commit a38991d

Browse files
authored
Fix integration tests run (#552)
1 parent 956e5b8 commit a38991d

File tree

6 files changed

+225
-200
lines changed

6 files changed

+225
-200
lines changed

integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/AIAgentIntegrationTest.kt

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import ai.koog.integration.tests.utils.TestUtils.readTestGoogleAIKeyFromEnv
3131
import ai.koog.integration.tests.utils.TestUtils.readTestOpenAIKeyFromEnv
3232
import ai.koog.prompt.dsl.prompt
3333
import ai.koog.prompt.executor.clients.anthropic.AnthropicModels
34+
import ai.koog.prompt.executor.clients.google.GoogleModels
3435
import ai.koog.prompt.executor.clients.openai.OpenAIModels
3536
import ai.koog.prompt.executor.llms.SingleLLMPromptExecutor
3637
import ai.koog.prompt.executor.llms.all.simpleAnthropicExecutor
@@ -639,7 +640,7 @@ class AIAgentIntegrationTest {
639640
val bye = "Bye"
640641

641642
val checkpointStrategy = strategy("checkpoint-strategy") {
642-
val nodeHello by node<String, String>(hello) { input ->
643+
val nodeHello by node<String, String>(hello) {
643644
sayHello
644645
}
645646

@@ -656,7 +657,7 @@ class AIAgentIntegrationTest {
656657
savedMessage
657658
}
658659

659-
val nodeBye by node<String, String>(bye) { input ->
660+
val nodeBye by node<String, String>(bye) {
660661
sayBye
661662
}
662663

@@ -744,7 +745,7 @@ class AIAgentIntegrationTest {
744745
val executionLog = StringBuilder()
745746

746747
val rollbackStrategy = strategy("rollback-strategy") {
747-
val nodeHello by node<String, String>(hello) { input ->
748+
val nodeHello by node<String, String>(hello) {
748749
executionLog.append(sayHelloLog)
749750
sayHello
750751
}
@@ -762,12 +763,12 @@ class AIAgentIntegrationTest {
762763
saySave
763764
}
764765

765-
val nodeBye by node<String, String>(bye) { input ->
766+
val nodeBye by node<String, String>(bye) {
766767
executionLog.append(sayByeLog)
767768
sayBye
768769
}
769770

770-
val rollbackNode by node<String, String>(rollback) { input ->
771+
val rollbackNode by node<String, String>(rollback) {
771772
// Use a shared variable to prevent infinite rollbacks
772773
// Only roll back once, then continue
773774
if (!hasRolledBack) {
@@ -856,15 +857,15 @@ class AIAgentIntegrationTest {
856857
val noCheckpointByeError = "No checkpoint for Node Bye"
857858

858859
val simpleStrategy = strategy(strategyName) {
859-
val nodeHello by node<String, String>(hello) { input ->
860+
val nodeHello by node<String, String>(hello) {
860861
sayHello
861862
}
862863

863-
val nodeWorld by node<String, String>(world) { input ->
864+
val nodeWorld by node<String, String>(world) {
864865
sayWorld
865866
}
866867

867-
val node3 by node<String, String>(bye) { input ->
868+
val node3 by node<String, String>(bye) {
868869
sayBye
869870
}
870871

@@ -929,7 +930,7 @@ class AIAgentIntegrationTest {
929930
JVMFilePersistencyStorageProvider(tempDir, "integration_AgentCheckpointStorageProvidersTest")
930931

931932
val simpleStrategy = strategy(strategyName) {
932-
val nodeHello by node<String, String>(hello) { input ->
933+
val nodeHello by node<String, String>(hello) {
933934
sayHello
934935
}
935936

@@ -980,6 +981,10 @@ class AIAgentIntegrationTest {
980981
@MethodSource("openAIModels", "anthropicModels", "googleModels")
981982
fun integration_AgentWithToolsWithoutParamsTest(model: LLModel) = runTest(timeout = 120.seconds) {
982983
assumeTrue(model.capabilities.contains(LLMCapability.Tools), "Model $model does not support tools")
984+
assumeTrue(
985+
model.id != GoogleModels.Gemini2_0Flash.id,
986+
"gemini-2.0-flash-001 returns flaky results and fails to call tools on a permanent basis"
987+
)
983988

984989
val registry = ToolRegistry {
985990
tool(CalculatorToolNoArgs)

integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/MultipleLLMPromptExecutorIntegrationTest.kt

Lines changed: 83 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ import org.junit.jupiter.params.provider.Arguments
4343
import org.junit.jupiter.params.provider.MethodSource
4444
import java.nio.file.Path
4545
import java.nio.file.Paths
46-
import java.util.*
46+
import java.util.Base64
4747
import java.util.stream.Stream
4848
import kotlin.io.path.pathString
4949
import kotlin.io.path.readBytes
@@ -670,8 +670,6 @@ class MultipleLLMPromptExecutorIntegrationTest {
670670
}
671671
}
672672

673-
// ToDo add video & pdf specific scenarios
674-
675673
@ParameterizedTest
676674
@MethodSource("markdownScenarioModelCombinations")
677675
fun integration_testMarkdownProcessingBasic(
@@ -681,35 +679,36 @@ class MultipleLLMPromptExecutorIntegrationTest {
681679
runTest(timeout = 300.seconds) {
682680
Models.assumeAvailable(model.provider)
683681
val file = MediaTestUtils.createMarkdownFileForScenario(scenario, testResourcesDir)
684-
val prompt = if (model.capabilities.contains(LLMCapability.Document)) {
685-
prompt("markdown-test-${scenario.name.lowercase()}") {
686-
system("You are a helpful assistant that can analyze markdown files.")
687-
688-
user {
689-
markdown {
690-
"I'm sending you a markdown file with different markdown elements. "
691-
+"Please list all the markdown elements used in it and describe its structure clearly."
692-
}
682+
val prompt =
683+
if (model.capabilities.contains(LLMCapability.Document) && model.provider != LLMProvider.OpenAI) {
684+
prompt("markdown-test-${scenario.name.lowercase()}") {
685+
system("You are a helpful assistant that can analyze markdown files.")
686+
687+
user {
688+
markdown {
689+
+"I'm sending you a markdown file with different markdown elements. "
690+
+"Please list all the markdown elements used in it and describe its structure clearly."
691+
}
693692

694-
attachments {
695-
file(file.pathString, "text/markdown")
693+
attachments {
694+
textFile(KtPath(file.pathString), "text/plain")
695+
}
696696
}
697697
}
698-
}
699-
} else {
700-
prompt("markdown-test-${scenario.name.lowercase()}") {
701-
system("You are a helpful assistant that can analyze markdown files.")
702-
703-
user {
704-
markdown {
705-
"I'm sending you a markdown file with different markdown elements. "
706-
+"Please list all the markdown elements used in it and describe its structure clearly."
707-
newline()
708-
+file.readText()
698+
} else {
699+
prompt("markdown-test-${scenario.name.lowercase()}") {
700+
system("You are a helpful assistant that can analyze markdown files.")
701+
702+
user {
703+
markdown {
704+
+"I'm sending you a markdown file with different markdown elements. "
705+
+"Please list all the markdown elements used in it and describe its structure clearly."
706+
newline()
707+
+file.readText()
708+
}
709709
}
710710
}
711711
}
712-
}
713712

714713
withRetry {
715714
try {
@@ -787,32 +786,36 @@ class MultipleLLMPromptExecutorIntegrationTest {
787786
// For some edge cases, exceptions are expected
788787
when (scenario) {
789788
ImageTestScenario.LARGE_IMAGE_ANTHROPIC, ImageTestScenario.LARGE_IMAGE -> {
790-
assertTrue(
791-
e.message?.contains("400 Bad Request") == true,
789+
assertEquals(
790+
e.message?.contains("400 Bad Request"),
791+
true,
792792
"Expected exception for a large image [400 Bad Request] was not found, got [${e.message}] instead"
793793
)
794-
assertTrue(
795-
e.message?.contains("image exceeds") == true,
794+
assertEquals(
795+
e.message?.contains("image exceeds"),
796+
true,
796797
"Expected exception for a large image [image exceeds] was not found, got [${e.message}] instead"
797798
)
798799
}
799800

800801
ImageTestScenario.CORRUPTED_IMAGE, ImageTestScenario.EMPTY_IMAGE -> {
801-
assertTrue(
802-
e.message?.contains("400 Bad Request") == true,
802+
assertEquals(
803+
e.message?.contains("400 Bad Request"),
804+
true,
803805
"Expected exception for a corrupted image [400 Bad Request] was not found, got [${e.message}] instead"
804806
)
805807
if (model.provider == LLMProvider.Anthropic) {
806-
assertTrue(
807-
e.message?.contains("Could not process image") == true,
808+
assertEquals(
809+
e.message?.contains("Could not process image"),
810+
true,
808811
"Expected exception for a corrupted image [Could not process image] was not found, got [${e.message}] instead"
809812
)
810813
} else if (model.provider == LLMProvider.OpenAI) {
811-
assertTrue(
814+
assertEquals(
812815
e.message?.contains(
813816
"You uploaded an unsupported image. Please make sure your image is valid."
814-
) ==
815-
true,
817+
),
818+
true,
816819
"Expected exception for a corrupted image [You uploaded an unsupported image. Please make sure your image is valid.] was not found, got [${e.message}] instead"
817820
)
818821
}
@@ -831,37 +834,37 @@ class MultipleLLMPromptExecutorIntegrationTest {
831834
fun integration_testTextProcessingBasic(scenario: TextTestScenario, model: LLModel) =
832835
runTest(timeout = 300.seconds) {
833836
Models.assumeAvailable(model.provider)
834-
assumeTrue(model.provider != LLMProvider.OpenAI, "File format txt not supported for OpenAI")
835837

836838
val file = MediaTestUtils.createTextFileForScenario(scenario, testResourcesDir)
837839

838-
val prompt = if (model.capabilities.contains(LLMCapability.Document)) {
839-
prompt("text-test-${scenario.name.lowercase()}") {
840-
system("You are a helpful assistant that can analyze and process text.")
840+
val prompt =
841+
if (model.capabilities.contains(LLMCapability.Document) && model.provider != LLMProvider.OpenAI) {
842+
prompt("text-test-${scenario.name.lowercase()}") {
843+
system("You are a helpful assistant that can analyze and process text.")
841844

842-
user {
843-
markdown {
844-
"I'm sending you a text file. Please analyze it and summarize its content."
845-
}
845+
user {
846+
markdown {
847+
+"I'm sending you a text file. Please analyze it and summarize its content."
848+
}
846849

847-
attachments {
848-
textFile(KtPath(file.pathString), "text/plain")
850+
attachments {
851+
textFile(KtPath(file.pathString), "text/plain")
852+
}
849853
}
850854
}
851-
}
852-
} else {
853-
prompt("text-test-${scenario.name.lowercase()}") {
854-
system("You are a helpful assistant that can analyze and process text.")
855-
856-
user {
857-
markdown {
858-
+"I'm sending you a text file. Please analyze it and summarize its content."
859-
newline()
860-
+file.readText()
855+
} else {
856+
prompt("text-test-${scenario.name.lowercase()}") {
857+
system("You are a helpful assistant that can analyze and process text.")
858+
859+
user {
860+
markdown {
861+
+"I'm sending you a text file. Please analyze it and summarize its content."
862+
newline()
863+
+file.readText()
864+
}
861865
}
862866
}
863867
}
864-
}
865868

866869
withRetry {
867870
try {
@@ -871,28 +874,31 @@ class MultipleLLMPromptExecutorIntegrationTest {
871874
when (scenario) {
872875
TextTestScenario.EMPTY_TEXT -> {
873876
if (model.provider == LLMProvider.Google) {
874-
assertTrue(
875-
e.message?.contains("400 Bad Request") == true,
877+
assertEquals(
878+
e.message?.contains("400 Bad Request"),
879+
true,
876880
"Expected exception for empty text [400 Bad Request] was not found, got [${e.message}] instead"
877881
)
878-
assertTrue(
882+
assertEquals(
879883
e.message?.contains(
880884
"Unable to submit request because it has an empty inlineData parameter. Add a value to the parameter and try again."
881-
) ==
882-
true,
885+
),
886+
true,
883887
"Expected exception for empty text [Unable to submit request because it has an empty inlineData parameter. Add a value to the parameter and try again] was not found, got [${e.message}] instead"
884888
)
885889
}
886890
}
887891

888892
TextTestScenario.LONG_TEXT_5_MB -> {
889893
if (model.provider == LLMProvider.Anthropic) {
890-
assertTrue(
891-
e.message?.contains("400 Bad Request") == true,
894+
assertEquals(
895+
e.message?.contains("400 Bad Request"),
896+
true,
892897
"Expected exception for long text [400 Bad Request] was not found, got [${e.message}] instead"
893898
)
894-
assertTrue(
895-
e.message?.contains("prompt is too long") == true,
899+
assertEquals(
900+
e.message?.contains("prompt is too long"),
901+
true,
896902
"Expected exception for long text [prompt is too long:] was not found, got [${e.message}] instead"
897903
)
898904
} else if (model.provider == LLMProvider.Google) {
@@ -925,7 +931,7 @@ class MultipleLLMPromptExecutorIntegrationTest {
925931

926932
user {
927933
markdown {
928-
"I'm sending you an audio file. Please tell me a couple of words about it."
934+
+"I'm sending you an audio file. Please tell me a couple of words about it."
929935
}
930936

931937
attachments {
@@ -940,18 +946,21 @@ class MultipleLLMPromptExecutorIntegrationTest {
940946
checkExecutorMediaResponse(response)
941947
} catch (e: Exception) {
942948
if (scenario == AudioTestScenario.CORRUPTED_AUDIO) {
943-
assertTrue(
944-
e.message?.contains("400 Bad Request") == true,
949+
assertEquals(
950+
e.message?.contains("400 Bad Request"),
951+
true,
945952
"Expected exception for empty text [400 Bad Request] was not found, got [${e.message}] instead"
946953
)
947954
if (model.provider == LLMProvider.OpenAI) {
948-
assertTrue(
949-
e.message?.contains("This model does not support the format you provided.") == true,
955+
assertEquals(
956+
e.message?.contains("This model does not support the format you provided."),
957+
true,
950958
"Expected exception for corrupted audio [This model does not support the format you provided.]"
951959
)
952960
} else if (model.provider == LLMProvider.Google) {
953-
assertTrue(
954-
e.message?.contains("Request contains an invalid argument.") == true,
961+
assertEquals(
962+
e.message?.contains("Request contains an invalid argument."),
963+
true,
955964
"Expected exception for corrupted audio [Request contains an invalid argument.]"
956965
)
957966
}

0 commit comments

Comments
 (0)