Update code snippet README (#25)

echarlaix · web-flow · commit 6a7e83f3eee2 · 2025-02-20T15:01:42.000+01:00
* update README

* remove comment doc
diff --git a/README.md b/README.md
@@ -52,10 +52,7 @@ from optimum.executorch import ExecuTorchModelForCausalLM
 from transformers import AutoTokenizer
 
 # Load the exported model
-model = ExecuTorchModelForCausalLM.from_pretrained(
-    "./meta_llama3_2_1b",
-    export=False
-)
+model = ExecuTorchModelForCausalLM.from_pretrained("./meta_llama3_2_1b")
 
 # Initialize tokenizer and generate text
 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
@@ -71,15 +68,12 @@ generated_text = model.text_generation(
 from optimum.executorch import ExecuTorchModelForCausalLM
 from transformers import AutoTokenizer
 
-# Load and export model in one step
-model = ExecuTorchModelForCausalLM.from_pretrained(
-    "meta-llama/Llama-3.2-1B",
-    export=True,
-    recipe="xnnpack"
-)
+# Load and export the model on-the-fly
+model_id = "meta-llama/Llama-3.2-1B"
+model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
 
 # Generate text right away
-tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
+tokenizer = AutoTokenizer.from_pretrained(model_id)
 generated_text = model.text_generation(
     tokenizer=tokenizer,
     prompt="Simply put, the theory of relativity states that",
diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx
@@ -34,7 +34,7 @@ To load a model and run inference, you can just replace your `AutoModelForCausal
 + from optimum.executorch import ExecuTorchModelForCausalLM
   from transformers import AutoTokenizer
 
-  model_id = "meta-llama/Llama-3.2-1B" # you can also load the model that was exported with the CLI
+  model_id = "meta-llama/Llama-3.2-1B"
   tokenizer = AutoTokenizer.from_pretrained(model_id)
 - model = AutoModelForCausalLM.from_pretrained(model_id)
 + model = ExecuTorchModelForCausalLM.from_pretrained(model_id)