Skip to content

Commit 6dc9aa2

Browse files
guangy10Guang Yang
andauthored
Transformers version bump (#70)
Co-authored-by: Guang Yang <[email protected]>
1 parent 34cece4 commit 6dc9aa2

File tree

9 files changed

+109
-49
lines changed

9 files changed

+109
-49
lines changed

.github/workflows/test_models.yml

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,25 +51,19 @@ jobs:
5151
python-version: ${{ matrix.python-version }}
5252
- name: Install dependencies for ExecuTorch
5353
run: |
54+
pip install '.[tests]'
5455
if [ "${{ matrix.executorch-version }}" == "nightly" ]; then
55-
export NIGHTLY_VERSION=dev20250507
56+
export NIGHTLY_VERSION=dev20250523
5657
pip install executorch==0.7.0.${NIGHTLY_VERSION} \
5758
torch==2.8.0.${NIGHTLY_VERSION} \
5859
torchvision==0.22.0.${NIGHTLY_VERSION} \
5960
torchaudio==2.6.0.${NIGHTLY_VERSION} \
60-
torchao==0.12.0.${NIGHTLY_VERSION} \
61+
torchao==0.12.0.dev20250528 \
6162
--extra-index-url "https://download.pytorch.org/whl/nightly/cpu"
63+
pip install transformers==4.52.4
6264
else
6365
pip install executorch==${{ matrix.executorch-version }}
6466
fi
65-
pip install '.[tests]'
66-
if [ "${{ matrix.test-modeling }}" == "gemma3" ]; then
67-
git clone https://github.com/huggingface/transformers.git
68-
pushd transformers
69-
git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f
70-
pip install -e .
71-
popd
72-
fi
7367
pip list
7468
- name: Run tests
7569
run: |

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
INSTALL_REQUIRE = [
1515
"optimum~=1.24",
1616
"executorch>=0.6.0",
17-
"transformers==4.51.0",
17+
"transformers==4.51.3",
1818
]
1919

2020
TESTS_REQUIRE = [

tests/models/test_modeling_gemma.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,23 @@
1717
import logging
1818
import os
1919
import subprocess
20-
import sys
2120
import tempfile
2221
import unittest
2322

2423
import pytest
24+
import torchao
2525
from executorch.extension.pybindings.portable_lib import ExecuTorchModule
26+
from packaging.version import parse
2627
from transformers import AutoTokenizer
2728
from transformers.testing_utils import slow
2829

2930
from optimum.executorch import ExecuTorchModelForCausalLM
3031

31-
from ..utils import check_causal_lm_output_quality
3232

33-
34-
is_linux_ci = sys.platform.startswith("linux") and os.environ.get("GITHUB_ACTIONS") == "true"
35-
36-
37-
@pytest.mark.skipif(is_linux_ci, reason="OOM on linux runner")
33+
@pytest.mark.skipif(
34+
parse(torchao.__version__) < parse("0.11.0.dev0"),
35+
reason="Only available on torchao >= 0.11.0.dev0",
36+
)
3837
class ExecuTorchModelIntegrationTest(unittest.TestCase):
3938
def __init__(self, *args, **kwargs):
4039
super().__init__(*args, **kwargs)
@@ -48,7 +47,14 @@ def test_gemma_export_to_executorch(self):
4847
with tempfile.TemporaryDirectory() as tempdir:
4948
out_dir = f"{tempdir}/executorch"
5049
subprocess.run(
51-
f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {out_dir}",
50+
f"optimum-cli export executorch \
51+
--model {model_id} \
52+
--task {task} \
53+
--recipe {recipe} \
54+
--output_dir {tempdir}/executorch \
55+
--use_custom_sdpa \
56+
--qlinear \
57+
--qembedding",
5258
shell=True,
5359
check=True,
5460
)
@@ -62,14 +68,17 @@ def test_gemma_export_to_executorch(self):
6268

6369
@slow
6470
@pytest.mark.run_slow
65-
def test_gemma_text_generation_float16(self):
71+
def test_gemma_text_generation_with_custom_sdpa_8da4w_8we(self):
6672
# TODO: Switch to use google/gemma-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
6773
# model_id = "google/gemma-2b"
6874
model_id = "weqweasdas/RM-Gemma-2B"
75+
# ExecuTorch model + custom sdpa + 8da4w linear quantization + int8 embedding quantization
76+
kwargs = {"qlinear": True, "qembedding": True}
6977
model = ExecuTorchModelForCausalLM.from_pretrained(
7078
model_id,
7179
recipe="xnnpack",
72-
**{"dtype": "float16"},
80+
attn_implementation="custom_sdpa",
81+
**kwargs,
7382
)
7483
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
7584
self.assertIsInstance(model.model, ExecuTorchModule)
@@ -81,11 +90,3 @@ def test_gemma_text_generation_float16(self):
8190
max_seq_len=21,
8291
)
8392
logging.info(f"\nGenerated text:\n\t{generated_text}")
84-
generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
85-
86-
# Free memory before loading eager for quality check
87-
del model
88-
del tokenizer
89-
gc.collect()
90-
91-
self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))

tests/models/test_modeling_gemma2.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,13 @@
1717
import logging
1818
import os
1919
import subprocess
20-
import sys
2120
import tempfile
2221
import unittest
2322

2423
import pytest
24+
import torchao
2525
from executorch.extension.pybindings.portable_lib import ExecuTorchModule
26+
from packaging.version import parse
2627
from transformers import AutoTokenizer
2728
from transformers.testing_utils import slow
2829

@@ -31,10 +32,10 @@
3132
from ..utils import check_causal_lm_output_quality
3233

3334

34-
is_linux_ci = sys.platform.startswith("linux") and os.environ.get("GITHUB_ACTIONS") == "true"
35-
36-
37-
@pytest.mark.skipif(is_linux_ci, reason="OOM on linux runner")
35+
@pytest.mark.skipif(
36+
parse(torchao.__version__) < parse("0.11.0.dev0"),
37+
reason="Only available on torchao >= 0.11.0.dev0",
38+
)
3839
class ExecuTorchModelIntegrationTest(unittest.TestCase):
3940
def __init__(self, *args, **kwargs):
4041
super().__init__(*args, **kwargs)
@@ -48,7 +49,14 @@ def test_gemma2_export_to_executorch(self):
4849
with tempfile.TemporaryDirectory() as tempdir:
4950
out_dir = f"{tempdir}/executorch"
5051
subprocess.run(
51-
f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {out_dir}",
52+
f"optimum-cli export executorch \
53+
--model {model_id} \
54+
--task {task} \
55+
--recipe {recipe} \
56+
--output_dir {tempdir}/executorch \
57+
--use_custom_sdpa \
58+
--qlinear \
59+
--qembedding",
5260
shell=True,
5361
check=True,
5462
)
@@ -62,14 +70,17 @@ def test_gemma2_export_to_executorch(self):
6270

6371
@slow
6472
@pytest.mark.run_slow
65-
def test_gemma2_text_generation_float16(self):
73+
def test_gemma2_text_generation_with_custom_sdpa_8da4w_8we(self):
6674
# TODO: Switch to use google/gemma-2-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
6775
# model_id = "google/gemma-2-2b"
6876
model_id = "unsloth/gemma-2-2b-it"
77+
# ExecuTorch model + custom sdpa + 8da4w linear quantization + int8 embedding quantization
78+
kwargs = {"qlinear": True, "qembedding": True}
6979
model = ExecuTorchModelForCausalLM.from_pretrained(
7080
model_id,
7181
recipe="xnnpack",
72-
**{"dtype": "float16"},
82+
attn_implementation="custom_sdpa",
83+
**kwargs,
7384
)
7485
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
7586
self.assertIsInstance(model.model, ExecuTorchModule)

tests/models/test_modeling_gemma3.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,14 @@ def test_gemma3_export_to_executorch(self):
5959
with tempfile.TemporaryDirectory() as tempdir:
6060
out_dir = f"{tempdir}/executorch"
6161
subprocess.run(
62-
f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {out_dir}",
62+
f"optimum-cli export executorch \
63+
--model {model_id} \
64+
--task {task} \
65+
--recipe {recipe} \
66+
--output_dir {tempdir}/executorch \
67+
--use_custom_sdpa \
68+
--qlinear \
69+
--qembedding",
6370
shell=True,
6471
check=True,
6572
)
@@ -176,14 +183,14 @@ def test_gemma3_text_generation_with_custom_sdpa_float16(self):
176183
parse(torchao.__version__) < parse("0.11.0.dev0"),
177184
reason="Only available on torchao >= 0.11.0.dev0",
178185
)
179-
def test_gemma3_text_generation_with_custom_sdpa_8da4w(self):
186+
def test_gemma3_text_generation_with_custom_sdpa_8da4w_8we(self):
180187
# TODO: Until https://github.com/huggingface/optimum/issues/2127 is fixed, have to use non-gated model on CI
181188
# model_id = "google/gemma-3-1b-it"
182189
model_id = "unsloth/gemma-3-1b-it"
183190
prompt = "Write a poem about a machine learning."
184191

185-
# ExecuTorch model + custom sdpa + 8da4w linear quantization
186-
kwargs = {"qlinear": True}
192+
# ExecuTorch model + custom sdpa + 8da4w linear quantization + int8 embedding quantization
193+
kwargs = {"qlinear": True, "qembedding": True}
187194
model = ExecuTorchModelForCausalLM.from_pretrained(
188195
model_id,
189196
recipe="xnnpack",

tests/models/test_modeling_llama.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
import unittest
2323

2424
import pytest
25+
import torchao
2526
from executorch.extension.pybindings.portable_lib import ExecuTorchModule
27+
from packaging.version import parse
2628
from transformers import AutoTokenizer
2729
from transformers.testing_utils import slow
2830

@@ -34,6 +36,10 @@
3436
is_linux_ci = sys.platform.startswith("linux") and os.environ.get("GITHUB_ACTIONS") == "true"
3537

3638

39+
@pytest.mark.skipif(
40+
parse(torchao.__version__) < parse("0.11.0.dev0"),
41+
reason="Only available on torchao >= 0.11.0.dev0",
42+
)
3743
class ExecuTorchModelIntegrationTest(unittest.TestCase):
3844
def __init__(self, *args, **kwargs):
3945
super().__init__(*args, **kwargs)
@@ -47,7 +53,14 @@ def test_llama3_2_1b_export_to_executorch(self):
4753
with tempfile.TemporaryDirectory() as tempdir:
4854
out_dir = f"{tempdir}/executorch"
4955
subprocess.run(
50-
f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {out_dir}",
56+
f"optimum-cli export executorch \
57+
--model {model_id} \
58+
--task {task} \
59+
--recipe {recipe} \
60+
--output_dir {tempdir}/executorch \
61+
--use_custom_sdpa \
62+
--qlinear \
63+
--qembedding",
5164
shell=True,
5265
check=True,
5366
)
@@ -88,13 +101,15 @@ def test_llama3_2_1b_text_generation(self):
88101

89102
@slow
90103
@pytest.mark.run_slow
91-
def test_llama_text_generation_with_custom_sdpa(self):
92-
# ExecuTorch model + custom sdpa
104+
def test_llama_text_generation_with_custom_sdpa_8da4w_8we(self):
105+
# ExecuTorch model + custom sdpa + 8da4w linear quantization + int8 embedding quantization
93106
model_id = "NousResearch/Llama-3.2-1B"
107+
kwargs = {"qlinear": True, "qembedding": True}
94108
model = ExecuTorchModelForCausalLM.from_pretrained(
95109
model_id,
96110
recipe="xnnpack",
97111
attn_implementation="custom_sdpa",
112+
**kwargs,
98113
)
99114
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
100115
self.assertIsInstance(model.model, ExecuTorchModule)

tests/models/test_modeling_olmo.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,14 @@
1717
import logging
1818
import os
1919
import subprocess
20+
import sys
2021
import tempfile
2122
import unittest
2223

2324
import pytest
25+
import torchao
2426
from executorch.extension.pybindings.portable_lib import ExecuTorchModule
27+
from packaging.version import parse
2528
from transformers import AutoTokenizer
2629
from transformers.testing_utils import slow
2730

@@ -30,6 +33,13 @@
3033
from ..utils import check_causal_lm_output_quality
3134

3235

36+
is_linux_ci = sys.platform.startswith("linux") and os.environ.get("GITHUB_ACTIONS") == "true"
37+
38+
39+
@pytest.mark.skipif(
40+
parse(torchao.__version__) < parse("0.11.0.dev0"),
41+
reason="Only available on torchao >= 0.11.0.dev0",
42+
)
3343
class ExecuTorchModelIntegrationTest(unittest.TestCase):
3444
def __init__(self, *args, **kwargs):
3545
super().__init__(*args, **kwargs)
@@ -42,14 +52,22 @@ def test_olmo_export_to_executorch(self):
4252
recipe = "xnnpack"
4353
with tempfile.TemporaryDirectory() as tempdir:
4454
subprocess.run(
45-
f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
55+
f"optimum-cli export executorch \
56+
--model {model_id} \
57+
--task {task} \
58+
--recipe {recipe} \
59+
--output_dir {tempdir}/executorch \
60+
--use_custom_sdpa \
61+
--qlinear \
62+
--qembedding",
4663
shell=True,
4764
check=True,
4865
)
4966
self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
5067

5168
@slow
5269
@pytest.mark.run_slow
70+
@pytest.mark.skipif(is_linux_ci, reason="OOM on linux runner")
5371
def test_olmo_text_generation_with_xnnpack(self):
5472
model_id = "allenai/OLMo-1B-hf"
5573
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
@@ -74,13 +92,15 @@ def test_olmo_text_generation_with_xnnpack(self):
7492

7593
@slow
7694
@pytest.mark.run_slow
77-
def test_olmo_text_generation_with_custom_sdpa(self):
78-
# ExecuTorch model + custom sdpa
95+
def test_olmo_text_generation_with_custom_sdpa_8da4w_8we(self):
96+
# ExecuTorch model + custom sdpa + 8da4w linear quantization + int8 embedding quantization
7997
model_id = "allenai/OLMo-1B-hf"
98+
kwargs = {"qlinear": True, "qembedding": True}
8099
model = ExecuTorchModelForCausalLM.from_pretrained(
81100
model_id,
82101
recipe="xnnpack",
83102
attn_implementation="custom_sdpa",
103+
**kwargs,
84104
)
85105
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
86106
self.assertIsInstance(model.model, ExecuTorchModule)

tests/models/test_modeling_phi4.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ def test_phi4_text_generation_with_quantized_ckp(self):
139139
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
140140
self.assertIsInstance(model.model, ExecuTorchModule)
141141

142-
tokenizer = AutoTokenizer.from_pretrained(model_id)
142+
# Using "pytorch/Phi-4-mini-instruct-8da4w" will end up loading a wrong GPT2Tokenizer
143+
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct")
143144
generated_text = model.text_generation(
144145
tokenizer=tokenizer,
145146
prompt="My favourite condiment is ",

tests/models/test_modeling_qwen3.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,24 @@ def __init__(self, *args, **kwargs):
4141

4242
@slow
4343
@pytest.mark.run_slow
44+
@pytest.mark.skipif(
45+
parse(torchao.__version__) < parse("0.11.0.dev0"),
46+
reason="Only available on torchao >= 0.11.0.dev0",
47+
)
4448
def test_qwen3_export_to_executorch(self):
4549
model_id = "Qwen/Qwen3-0.6B"
4650
task = "text-generation"
4751
recipe = "xnnpack"
4852
with tempfile.TemporaryDirectory() as tempdir:
4953
subprocess.run(
50-
f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
54+
f"optimum-cli export executorch \
55+
--model {model_id} \
56+
--task {task} \
57+
--recipe {recipe} \
58+
--output_dir {tempdir}/executorch \
59+
--use_custom_sdpa \
60+
--qlinear \
61+
--qembedding",
5162
shell=True,
5263
check=True,
5364
)

0 commit comments

Comments
 (0)