Enable loading model from hub that has already been converted (#13)

echarlaix · guangy10 · web-flow · commit 453123f0eb6e · 2025-02-12T13:07:40.000+01:00
* use_auth_token not needed

* remove from_pretrained method

* Enable loading model from the HF hub

* add test

* add task

* add from_pretrained method

* infer if needs export

* update setup

* add test

* remove subfolder

* fix model file pattern

* remove export from tests

* trigger test

* fix

* fix for offline mode

* fix

* infer if pte model in subfolder

* fix style

* Update tests/models/test_modeling.py

Co-authored-by: Guang Yang &lt;42389959+guangy10@users.noreply.github.com&gt;

* Update tests/models/test_modeling.py

Co-authored-by: Guang Yang &lt;42389959+guangy10@users.noreply.github.com&gt;

* fix

* style

* add test

---------

Co-authored-by: Guang Yang &lt;42389959+guangy10@users.noreply.github.com&gt;
diff --git a/optimum/executorch/modeling.py b/optimum/executorch/modeling.py
diff --git a/setup.py b/setup.py
@@ -12,8 +12,7 @@
     assert False, "Error: Could not open '%s' due %s\n" % (filepath, error)
 
 INSTALL_REQUIRE = [
-    # "optimum~=1.24",
-    "optimum@git+https://github.com/huggingface/optimum.git",
+    "optimum~=1.24",
     "executorch>=0.4.0",
     "transformers>=4.46",
 ]
diff --git a/tests/models/test_modeling.py b/tests/models/test_modeling.py
@@ -16,52 +16,72 @@
 import os
 import tempfile
 import unittest
+from pathlib import Path
+from tempfile import TemporaryDirectory
 
-import pytest
 from executorch.extension.pybindings.portable_lib import ExecuTorchModule
-from transformers.testing_utils import slow
+from huggingface_hub import HfApi
 
 from optimum.executorch import ExecuTorchModelForCausalLM
+from optimum.executorch.modeling import _FILE_PATTERN
+from optimum.exporters.executorch import main_export
+from optimum.utils.file_utils import find_files_matching_pattern
 
 
 class ExecuTorchModelIntegrationTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-    @slow
-    @pytest.mark.run_slow
-    def test_load_model_from_hub(self):
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path="NousResearch/Llama-3.2-1B",
-            export=True,
-            recipe="xnnpack",
-        )
+    def test_load_cached_model_from_hub(self):
+        model_id = "optimum-internal-testing/tiny-random-llama"
+
+        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
         self.assertIsInstance(model.model, ExecuTorchModule)
 
-    @slow
-    @pytest.mark.run_slow
-    def test_load_model_from_local_path(self):
-        from optimum.exporters.executorch import main_export
+    def test_load_et_model_from_hub(self):
+        model_id = "optimum-internal-testing/tiny-random-llama"
+
+        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, revision="executorch")
+        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
+        self.assertIsInstance(model.model, ExecuTorchModule)
 
-        model_id = "NousResearch/Llama-3.2-1B"
-        task = "text-generation"
+        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, revision="executorch-subfolder")
+        self.assertIsInstance(model, ExecuTorchModelForCausalLM)
+        self.assertIsInstance(model.model, ExecuTorchModule)
+
+    def test_load_cached_model_from_local_path(self):
+        model_id = "optimum-internal-testing/tiny-random-llama"
         recipe = "xnnpack"
 
         with tempfile.TemporaryDirectory() as tempdir:
             # Export to a local dir
             main_export(
                 model_name_or_path=model_id,
-                task=task,
                 recipe=recipe,
                 output_dir=tempdir,
+                task="text-generation",
             )
             self.assertTrue(os.path.exists(f"{tempdir}/model.pte"))
 
             # Load the exported model from a local dir
-            model = ExecuTorchModelForCausalLM.from_pretrained(
-                model_name_or_path=tempdir,
-                export=False,
-            )
+            model = ExecuTorchModelForCausalLM.from_pretrained(tempdir)
             self.assertIsInstance(model, ExecuTorchModelForCausalLM)
             self.assertIsInstance(model.model, ExecuTorchModule)
+
+    def test_find_files_matching_pattern(self):
+        model_id = "optimum-internal-testing/tiny-random-llama"
+
+        # hub model
+        for revision in ("main", "executorch"):
+            pte_files = find_files_matching_pattern(model_id, pattern=_FILE_PATTERN, revision=revision)
+            self.assertTrue(len(pte_files) == 0 if revision == "main" else len(pte_files) > 0)
+
+        # local model
+        api = HfApi()
+        with TemporaryDirectory() as tmpdirname:
+            for revision in ("main", "executorch"):
+                local_dir = Path(tmpdirname) / revision
+                api.snapshot_download(repo_id=model_id, local_dir=local_dir, revision=revision)
+                pte_files = find_files_matching_pattern(local_dir, pattern=_FILE_PATTERN, revision=revision)
+                self.assertTrue(len(pte_files) == 0 if revision == "main" else len(pte_files) > 0)
diff --git a/tests/models/test_modeling_gemma.py b/tests/models/test_modeling_gemma.py
@@ -33,11 +33,7 @@ def test_gemma_text_generation_with_xnnpack(self):
         # TODO: Switch to use google/gemma-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
         # model_id = "google/gemma-2b"
         model_id = "weqweasdas/RM-Gemma-2B"
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path=model_id,
-            export=True,
-            recipe="xnnpack",
-        )
+        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
         self.assertIsInstance(model.model, ExecuTorchModule)
 
diff --git a/tests/models/test_modeling_gemma2.py b/tests/models/test_modeling_gemma2.py
@@ -33,11 +33,7 @@ def test_gemma2_text_generation_with_xnnpack(self):
         # TODO: Switch to use google/gemma-2-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
         # model_id = "google/gemma-2-2b"
         model_id = "unsloth/gemma-2-2b-it"
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path=model_id,
-            export=True,
-            recipe="xnnpack",
-        )
+        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
         self.assertIsInstance(model.model, ExecuTorchModule)
 
diff --git a/tests/models/test_modeling_llama.py b/tests/models/test_modeling_llama.py
@@ -33,11 +33,7 @@ def test_llama3_2_1b_text_generation_with_xnnpack(self):
         # TODO: Switch to use meta-llama/Llama-3.2-1B once https://github.com/huggingface/optimum/issues/2127 is fixed
         # model_id = "lama/Llama-3.2-1B"
         model_id = "NousResearch/Llama-3.2-1B"
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path=model_id,
-            export=True,
-            recipe="xnnpack",
-        )
+        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
         self.assertIsInstance(model.model, ExecuTorchModule)
 
@@ -57,11 +53,8 @@ def test_llama3_2_3b_text_generation_with_xnnpack(self):
         # TODO: Switch to use meta-llama/Llama-3.2-3B once https://github.com/huggingface/optimum/issues/2127 is fixed
         # model_id = "lama/Llama-3.2-3B"
         model_id = "NousResearch/Hermes-3-Llama-3.2-3B"
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path=model_id,
-            export=True,
-            recipe="xnnpack",
-        )
+        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
+
         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
         self.assertIsInstance(model.model, ExecuTorchModule)
 
diff --git a/tests/models/test_modeling_olmo.py b/tests/models/test_modeling_olmo.py
@@ -31,11 +31,7 @@ def __init__(self, *args, **kwargs):
     @pytest.mark.run_slow
     def test_olmo_text_generation_with_xnnpack(self):
         model_id = "allenai/OLMo-1B-hf"
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path=model_id,
-            export=True,
-            recipe="xnnpack",
-        )
+        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
         self.assertIsInstance(model.model, ExecuTorchModule)
 
diff --git a/tests/models/test_modeling_qwen2.py b/tests/models/test_modeling_qwen2.py
@@ -31,11 +31,7 @@ def __init__(self, *args, **kwargs):
     @pytest.mark.run_slow
     def test_qwen2_5_text_generation_with_xnnpack(self):
         model_id = "Qwen/Qwen2.5-0.5B"
-        model = ExecuTorchModelForCausalLM.from_pretrained(
-            model_name_or_path=model_id,
-            export=True,
-            recipe="xnnpack",
-        )
+        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
         self.assertIsInstance(model.model, ExecuTorchModule)
 

Original file line number	Diff line number	Diff line change
`@@ -12,8 +12,7 @@`
`12`	`12`	`assert False, "Error: Could not open '%s' due %s\n" % (filepath, error)`
`13`	`13`
`14`	`14`	`INSTALL_REQUIRE = [`
`15`		`- # "optimum~=1.24",`
`16`		`- "optimum@git+https://github.com/huggingface/optimum.git",`
	`15`	`+ "optimum~=1.24",`
`17`	`16`	`"executorch>=0.4.0",`
`18`	`17`	`"transformers>=4.46",`
`19`	`18`	`]`