temporarily disble running etLLM generated phi4 in optimum-et due to missing cache_position support

guangy10 · guangy10 · commit f90c6c39bf97 · 2025-06-23T14:41:12.000-07:00
diff --git a/optimum/executorch/modeling.py b/optimum/executorch/modeling.py
@@ -624,8 +624,14 @@ def forward(
             torch.Tensor: Logits output from the model.
         """
         self.stats.on_model_execution_start()
-        logging.debug(f"{self.model.method_meta('forward')}")
-        logits = self.model.forward((input_ids, cache_position))[0]
+
+        try:
+            logits = self.model.forward((input_ids, cache_position))[0]
+        except Exception as e:
+            shapes = {name: val.shape for name, val in locals().items() if hasattr(val, "shape")}
+            print(f"Exception: {e}.\n{self.model.method_meta('forward')}\narg shapes: {shapes}")
+            raise
+
         self.stats.on_model_execution_end()
         return logits
 
diff --git a/tests/models/test_modeling_phi4.py b/tests/models/test_modeling_phi4.py
@@ -20,6 +20,7 @@
 
 import pytest
 import torchao
+from executorch import version as executorch_version
 from executorch.extension.pybindings.portable_lib import ExecuTorchModule
 from packaging.version import parse
 from transformers import AutoConfig, AutoTokenizer
@@ -76,8 +77,8 @@ def test_phi4_text_generation(self):
     @slow
     @pytest.mark.run_slow
     @pytest.mark.skipif(
-        parse(torchao.__version__) < parse("0.11.0.dev0"),
-        reason="Only available on torchao >= 0.11.0.dev0",
+        parse(executorch_version.__version__) > parse("0.6.0"),
+        reason="Require cache_position support in executorch runtime. Re-enable when available.",
     )
     def test_phi4_text_generation_with_quantized_pte_from_hub(self):
         model_id = "pytorch/Phi-4-mini-instruct-8da4w"