Skip to content

Enable loading model from hub that has already been converted #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
310 changes: 160 additions & 150 deletions optimum/executorch/modeling.py

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
assert False, "Error: Could not open '%s' due %s\n" % (filepath, error)

INSTALL_REQUIRE = [
# "optimum~=1.24",
"optimum@git+https://github.com/huggingface/optimum.git",
"optimum~=1.24",
"executorch>=0.4.0",
"transformers>=4.46",
]
Expand Down
56 changes: 36 additions & 20 deletions tests/models/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,52 +16,68 @@
import os
import tempfile
import unittest
from pathlib import Path
from tempfile import TemporaryDirectory

import pytest
from executorch.extension.pybindings.portable_lib import ExecuTorchModule
from transformers.testing_utils import slow
from huggingface_hub import HfApi

from optimum.executorch import ExecuTorchModelForCausalLM
from optimum.executorch.modeling import _FILE_PATTERN
from optimum.exporters.executorch import main_export
from optimum.utils.file_utils import find_files_matching_pattern


class ExecuTorchModelIntegrationTest(unittest.TestCase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

@slow
@pytest.mark.run_slow
def test_load_model_from_hub(self):
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path="NousResearch/Llama-3.2-1B",
export=True,
recipe="xnnpack",
)
model_id = "optimum-internal-testing/tiny-random-llama"

model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

@slow
@pytest.mark.run_slow
def test_load_model_from_local_path(self):
from optimum.exporters.executorch import main_export
def test_load_et_model_from_hub(self):
model_id = "optimum-internal-testing/tiny-random-llama"

model = ExecuTorchModelForCausalLM.from_pretrained(model_id, revision="executorch", recipe="xnnpack")
Copy link
Collaborator

@guangy10 guangy10 Feb 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused what are the difference w/ and w/o revision="executorch"? I guess the underlying question is what does the "revision" parameter do?

Oh I see what it is. https://huggingface.co/optimum-internal-testing/tiny-random-llama/tree/executorch. Here are the follow up question: When revision="main", the pte doesn't exist there, what would happen?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if no pte files detected then export will be set to True and the model will be converted to ExecuTorch on-the-fly, after this the user can either save the resulting model locally with .save_pretrained(save_dir) or directly push it on the hub with .push_to_hub(repo_id) (both method needs to be implemented, let me know if you're interested by tackling this in a following PR)

self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

model_id = "NousResearch/Llama-3.2-1B"
task = "text-generation"
def test_load_model_from_local_path(self):
model_id = "optimum-internal-testing/tiny-random-llama"
recipe = "xnnpack"

with tempfile.TemporaryDirectory() as tempdir:
# Export to a local dir
main_export(
model_name_or_path=model_id,
task=task,
recipe=recipe,
output_dir=tempdir,
task="text-generation",
)
self.assertTrue(os.path.exists(f"{tempdir}/model.pte"))

# Load the exported model from a local dir
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=tempdir,
export=False,
)
model = ExecuTorchModelForCausalLM.from_pretrained(tempdir)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

def test_find_files_matching_pattern(self):
model_id = "optimum-internal-testing/tiny-random-llama"

# hub model
for revision in ("main", "executorch"):
pte_files = find_files_matching_pattern(model_id, pattern=_FILE_PATTERN, revision=revision)
self.assertTrue(len(pte_files) == 0 if revision == "main" else len(pte_files) > 0)

# local model
api = HfApi()
with TemporaryDirectory() as tmpdirname:
for revision in ("main", "executorch"):
local_dir = Path(tmpdirname) / revision
api.snapshot_download(repo_id=model_id, local_dir=local_dir, revision=revision)
pte_files = find_files_matching_pattern(local_dir, pattern=_FILE_PATTERN, revision=revision)
self.assertTrue(len(pte_files) == 0 if revision == "main" else len(pte_files) > 0)
6 changes: 1 addition & 5 deletions tests/models/test_modeling_gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,7 @@ def test_gemma_text_generation_with_xnnpack(self):
# TODO: Switch to use google/gemma-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
# model_id = "google/gemma-2b"
model_id = "weqweasdas/RM-Gemma-2B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
recipe="xnnpack",
)
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

Expand Down
6 changes: 1 addition & 5 deletions tests/models/test_modeling_gemma2.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,7 @@ def test_gemma2_text_generation_with_xnnpack(self):
# TODO: Switch to use google/gemma-2-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
# model_id = "google/gemma-2-2b"
model_id = "unsloth/gemma-2-2b-it"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
recipe="xnnpack",
)
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

Expand Down
13 changes: 3 additions & 10 deletions tests/models/test_modeling_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,7 @@ def test_llama3_2_1b_text_generation_with_xnnpack(self):
# TODO: Switch to use meta-llama/Llama-3.2-1B once https://github.com/huggingface/optimum/issues/2127 is fixed
# model_id = "lama/Llama-3.2-1B"
model_id = "NousResearch/Llama-3.2-1B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
recipe="xnnpack",
)
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

Expand All @@ -57,11 +53,8 @@ def test_llama3_2_3b_text_generation_with_xnnpack(self):
# TODO: Switch to use meta-llama/Llama-3.2-3B once https://github.com/huggingface/optimum/issues/2127 is fixed
# model_id = "lama/Llama-3.2-3B"
model_id = "NousResearch/Hermes-3-Llama-3.2-3B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
recipe="xnnpack",
)
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")

self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

Expand Down
6 changes: 1 addition & 5 deletions tests/models/test_modeling_olmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,7 @@ def __init__(self, *args, **kwargs):
@pytest.mark.run_slow
def test_olmo_text_generation_with_xnnpack(self):
model_id = "allenai/OLMo-1B-hf"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
recipe="xnnpack",
)
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

Expand Down
6 changes: 1 addition & 5 deletions tests/models/test_modeling_qwen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,7 @@ def __init__(self, *args, **kwargs):
@pytest.mark.run_slow
def test_qwen2_5_text_generation_with_xnnpack(self):
model_id = "Qwen/Qwen2.5-0.5B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
recipe="xnnpack",
)
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

Expand Down
Loading