Skip to content

Commit 453123f

Browse files
echarlaixguangy10
andauthored
Enable loading model from hub that has already been converted (#13)
* use_auth_token not needed * remove from_pretrained method * Enable loading model from the HF hub * add test * add task * add from_pretrained method * infer if needs export * update setup * add test * remove subfolder * fix model file pattern * remove export from tests * trigger test * fix * fix for offline mode * fix * infer if pte model in subfolder * fix style * Update tests/models/test_modeling.py Co-authored-by: Guang Yang <[email protected]> * Update tests/models/test_modeling.py Co-authored-by: Guang Yang <[email protected]> * fix * style * add test --------- Co-authored-by: Guang Yang <[email protected]>
1 parent 38c9782 commit 453123f

File tree

8 files changed

+237
-257
lines changed

8 files changed

+237
-257
lines changed

optimum/executorch/modeling.py

Lines changed: 188 additions & 204 deletions
Large diffs are not rendered by default.

setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212
assert False, "Error: Could not open '%s' due %s\n" % (filepath, error)
1313

1414
INSTALL_REQUIRE = [
15-
# "optimum~=1.24",
16-
"optimum@git+https://github.com/huggingface/optimum.git",
15+
"optimum~=1.24",
1716
"executorch>=0.4.0",
1817
"transformers>=4.46",
1918
]

tests/models/test_modeling.py

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,52 +16,72 @@
1616
import os
1717
import tempfile
1818
import unittest
19+
from pathlib import Path
20+
from tempfile import TemporaryDirectory
1921

20-
import pytest
2122
from executorch.extension.pybindings.portable_lib import ExecuTorchModule
22-
from transformers.testing_utils import slow
23+
from huggingface_hub import HfApi
2324

2425
from optimum.executorch import ExecuTorchModelForCausalLM
26+
from optimum.executorch.modeling import _FILE_PATTERN
27+
from optimum.exporters.executorch import main_export
28+
from optimum.utils.file_utils import find_files_matching_pattern
2529

2630

2731
class ExecuTorchModelIntegrationTest(unittest.TestCase):
2832
def __init__(self, *args, **kwargs):
2933
super().__init__(*args, **kwargs)
3034

31-
@slow
32-
@pytest.mark.run_slow
33-
def test_load_model_from_hub(self):
34-
model = ExecuTorchModelForCausalLM.from_pretrained(
35-
model_name_or_path="NousResearch/Llama-3.2-1B",
36-
export=True,
37-
recipe="xnnpack",
38-
)
35+
def test_load_cached_model_from_hub(self):
36+
model_id = "optimum-internal-testing/tiny-random-llama"
37+
38+
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
3939
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
4040
self.assertIsInstance(model.model, ExecuTorchModule)
4141

42-
@slow
43-
@pytest.mark.run_slow
44-
def test_load_model_from_local_path(self):
45-
from optimum.exporters.executorch import main_export
42+
def test_load_et_model_from_hub(self):
43+
model_id = "optimum-internal-testing/tiny-random-llama"
44+
45+
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, revision="executorch")
46+
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
47+
self.assertIsInstance(model.model, ExecuTorchModule)
4648

47-
model_id = "NousResearch/Llama-3.2-1B"
48-
task = "text-generation"
49+
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, revision="executorch-subfolder")
50+
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
51+
self.assertIsInstance(model.model, ExecuTorchModule)
52+
53+
def test_load_cached_model_from_local_path(self):
54+
model_id = "optimum-internal-testing/tiny-random-llama"
4955
recipe = "xnnpack"
5056

5157
with tempfile.TemporaryDirectory() as tempdir:
5258
# Export to a local dir
5359
main_export(
5460
model_name_or_path=model_id,
55-
task=task,
5661
recipe=recipe,
5762
output_dir=tempdir,
63+
task="text-generation",
5864
)
5965
self.assertTrue(os.path.exists(f"{tempdir}/model.pte"))
6066

6167
# Load the exported model from a local dir
62-
model = ExecuTorchModelForCausalLM.from_pretrained(
63-
model_name_or_path=tempdir,
64-
export=False,
65-
)
68+
model = ExecuTorchModelForCausalLM.from_pretrained(tempdir)
6669
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
6770
self.assertIsInstance(model.model, ExecuTorchModule)
71+
72+
def test_find_files_matching_pattern(self):
73+
model_id = "optimum-internal-testing/tiny-random-llama"
74+
75+
# hub model
76+
for revision in ("main", "executorch"):
77+
pte_files = find_files_matching_pattern(model_id, pattern=_FILE_PATTERN, revision=revision)
78+
self.assertTrue(len(pte_files) == 0 if revision == "main" else len(pte_files) > 0)
79+
80+
# local model
81+
api = HfApi()
82+
with TemporaryDirectory() as tmpdirname:
83+
for revision in ("main", "executorch"):
84+
local_dir = Path(tmpdirname) / revision
85+
api.snapshot_download(repo_id=model_id, local_dir=local_dir, revision=revision)
86+
pte_files = find_files_matching_pattern(local_dir, pattern=_FILE_PATTERN, revision=revision)
87+
self.assertTrue(len(pte_files) == 0 if revision == "main" else len(pte_files) > 0)

tests/models/test_modeling_gemma.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,7 @@ def test_gemma_text_generation_with_xnnpack(self):
3333
# TODO: Switch to use google/gemma-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
3434
# model_id = "google/gemma-2b"
3535
model_id = "weqweasdas/RM-Gemma-2B"
36-
model = ExecuTorchModelForCausalLM.from_pretrained(
37-
model_name_or_path=model_id,
38-
export=True,
39-
recipe="xnnpack",
40-
)
36+
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
4137
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
4238
self.assertIsInstance(model.model, ExecuTorchModule)
4339

tests/models/test_modeling_gemma2.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,7 @@ def test_gemma2_text_generation_with_xnnpack(self):
3333
# TODO: Switch to use google/gemma-2-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
3434
# model_id = "google/gemma-2-2b"
3535
model_id = "unsloth/gemma-2-2b-it"
36-
model = ExecuTorchModelForCausalLM.from_pretrained(
37-
model_name_or_path=model_id,
38-
export=True,
39-
recipe="xnnpack",
40-
)
36+
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
4137
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
4238
self.assertIsInstance(model.model, ExecuTorchModule)
4339

tests/models/test_modeling_llama.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,7 @@ def test_llama3_2_1b_text_generation_with_xnnpack(self):
3333
# TODO: Switch to use meta-llama/Llama-3.2-1B once https://github.com/huggingface/optimum/issues/2127 is fixed
3434
# model_id = "lama/Llama-3.2-1B"
3535
model_id = "NousResearch/Llama-3.2-1B"
36-
model = ExecuTorchModelForCausalLM.from_pretrained(
37-
model_name_or_path=model_id,
38-
export=True,
39-
recipe="xnnpack",
40-
)
36+
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
4137
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
4238
self.assertIsInstance(model.model, ExecuTorchModule)
4339

@@ -57,11 +53,8 @@ def test_llama3_2_3b_text_generation_with_xnnpack(self):
5753
# TODO: Switch to use meta-llama/Llama-3.2-3B once https://github.com/huggingface/optimum/issues/2127 is fixed
5854
# model_id = "lama/Llama-3.2-3B"
5955
model_id = "NousResearch/Hermes-3-Llama-3.2-3B"
60-
model = ExecuTorchModelForCausalLM.from_pretrained(
61-
model_name_or_path=model_id,
62-
export=True,
63-
recipe="xnnpack",
64-
)
56+
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
57+
6558
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
6659
self.assertIsInstance(model.model, ExecuTorchModule)
6760

tests/models/test_modeling_olmo.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,7 @@ def __init__(self, *args, **kwargs):
3131
@pytest.mark.run_slow
3232
def test_olmo_text_generation_with_xnnpack(self):
3333
model_id = "allenai/OLMo-1B-hf"
34-
model = ExecuTorchModelForCausalLM.from_pretrained(
35-
model_name_or_path=model_id,
36-
export=True,
37-
recipe="xnnpack",
38-
)
34+
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
3935
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
4036
self.assertIsInstance(model.model, ExecuTorchModule)
4137

tests/models/test_modeling_qwen2.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,7 @@ def __init__(self, *args, **kwargs):
3131
@pytest.mark.run_slow
3232
def test_qwen2_5_text_generation_with_xnnpack(self):
3333
model_id = "Qwen/Qwen2.5-0.5B"
34-
model = ExecuTorchModelForCausalLM.from_pretrained(
35-
model_name_or_path=model_id,
36-
export=True,
37-
recipe="xnnpack",
38-
)
34+
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
3935
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
4036
self.assertIsInstance(model.model, ExecuTorchModule)
4137

0 commit comments

Comments
 (0)