From aabef6b34188fb39e4c26a6250f7382e57888ecc Mon Sep 17 00:00:00 2001 From: Maarten Grootendorst Date: Fri, 4 Apr 2025 08:34:35 +0200 Subject: [PATCH] Fix loading Phi-3 (#48) --- chapter01/Chapter 1 - Introduction to Language Models.ipynb | 4 ++-- chapter02/Chapter 2 - Tokens and Token Embeddings.ipynb | 4 ++-- chapter03/Chapter 3 - Looking Inside LLMs.ipynb | 2 +- .../Chapter 5 - Text Clustering and Topic Modeling.ipynb | 2 +- chapter06/Chapter 6 - Prompt Engineering.ipynb | 2 +- chapter12/Chapter 12 - Fine-tuning Generation Models.ipynb | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/chapter01/Chapter 1 - Introduction to Language Models.ipynb b/chapter01/Chapter 1 - Introduction to Language Models.ipynb index 8b4fdd5..97060de 100644 --- a/chapter01/Chapter 1 - Introduction to Language Models.ipynb +++ b/chapter01/Chapter 1 - Introduction to Language Models.ipynb @@ -69,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "id": "RSNalRXZyTTk" }, @@ -82,7 +82,7 @@ " \"microsoft/Phi-3-mini-4k-instruct\",\n", " device_map=\"cuda\",\n", " torch_dtype=\"auto\",\n", - " trust_remote_code=True,\n", + " trust_remote_code=False,\n", ")\n", "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")" ] diff --git a/chapter02/Chapter 2 - Tokens and Token Embeddings.ipynb b/chapter02/Chapter 2 - Tokens and Token Embeddings.ipynb index 20a730b..44ed6a8 100644 --- a/chapter02/Chapter 2 - Tokens and Token Embeddings.ipynb +++ b/chapter02/Chapter 2 - Tokens and Token Embeddings.ipynb @@ -64,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -278,7 +278,7 @@ " \"microsoft/Phi-3-mini-4k-instruct\",\n", " device_map=\"cuda\",\n", " torch_dtype=\"auto\",\n", - " trust_remote_code=True,\n", + " trust_remote_code=False,\n", ")\n", "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")" ] diff --git a/chapter03/Chapter 3 - Looking Inside LLMs.ipynb b/chapter03/Chapter 3 - Looking Inside LLMs.ipynb index 59decf8..8c05fb3 100644 --- a/chapter03/Chapter 3 - Looking Inside LLMs.ipynb +++ b/chapter03/Chapter 3 - Looking Inside LLMs.ipynb @@ -483,7 +483,7 @@ " \"microsoft/Phi-3-mini-4k-instruct\",\n", " device_map=\"cuda\",\n", " torch_dtype=\"auto\",\n", - " trust_remote_code=True,\n", + " trust_remote_code=False,\n", ")\n", "\n", "# Create a pipeline\n", diff --git a/chapter05/Chapter 5 - Text Clustering and Topic Modeling.ipynb b/chapter05/Chapter 5 - Text Clustering and Topic Modeling.ipynb index 5900008..81989e8 100644 --- a/chapter05/Chapter 5 - Text Clustering and Topic Modeling.ipynb +++ b/chapter05/Chapter 5 - Text Clustering and Topic Modeling.ipynb @@ -100,7 +100,7 @@ "\n", "# Extract metadata\n", "abstracts = dataset[\"Abstracts\"]\n", - "titles = dataset[\"Titles\"]" + "titles = dataset[\"Titles\"] " ] }, { diff --git a/chapter06/Chapter 6 - Prompt Engineering.ipynb b/chapter06/Chapter 6 - Prompt Engineering.ipynb index 859f11e..5ad7189 100644 --- a/chapter06/Chapter 6 - Prompt Engineering.ipynb +++ b/chapter06/Chapter 6 - Prompt Engineering.ipynb @@ -142,7 +142,7 @@ " \"microsoft/Phi-3-mini-4k-instruct\",\n", " device_map=\"cuda\",\n", " torch_dtype=\"auto\",\n", - " trust_remote_code=True,\n", + " trust_remote_code=False,\n", ")\n", "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n", "\n", diff --git a/chapter12/Chapter 12 - Fine-tuning Generation Models.ipynb b/chapter12/Chapter 12 - Fine-tuning Generation Models.ipynb index decc439..3b0a6b5 100644 --- a/chapter12/Chapter 12 - Fine-tuning Generation Models.ipynb +++ b/chapter12/Chapter 12 - Fine-tuning Generation Models.ipynb @@ -859,7 +859,7 @@ "model.config.pretraining_tp = 1\n", "\n", "# Load LLaMA tokenizer\n", - "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", + "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=False)\n", "tokenizer.pad_token = \"\"\n", "tokenizer.padding_side = \"left\"" ] @@ -1598,7 +1598,7 @@ "\n", "# Load LLaMA tokenizer\n", "model_name = \"TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T\"\n", - "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", + "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=False)\n", "tokenizer.pad_token = \"\"\n", "tokenizer.padding_side = \"left\"" ]