From aabef6b34188fb39e4c26a6250f7382e57888ecc Mon Sep 17 00:00:00 2001
From: Maarten Grootendorst <maartengrootendorst@gmail.com>
Date: Fri, 4 Apr 2025 08:34:35 +0200
Subject: [PATCH] Fix loading Phi-3 (#48)

---
 chapter01/Chapter 1 - Introduction to Language Models.ipynb   | 4 ++--
 chapter02/Chapter 2 - Tokens and Token Embeddings.ipynb       | 4 ++--
 chapter03/Chapter 3 - Looking Inside LLMs.ipynb               | 2 +-
 .../Chapter 5 - Text Clustering and Topic Modeling.ipynb      | 2 +-
 chapter06/Chapter 6 - Prompt Engineering.ipynb                | 2 +-
 chapter12/Chapter 12 - Fine-tuning Generation Models.ipynb    | 4 ++--
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/chapter01/Chapter 1 - Introduction to Language Models.ipynb b/chapter01/Chapter 1 - Introduction to Language Models.ipynb
index 8b4fdd5..97060de 100644
--- a/chapter01/Chapter 1 - Introduction to Language Models.ipynb	
+++ b/chapter01/Chapter 1 - Introduction to Language Models.ipynb	
@@ -69,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {
     "id": "RSNalRXZyTTk"
    },
@@ -82,7 +82,7 @@
     "    \"microsoft/Phi-3-mini-4k-instruct\",\n",
     "    device_map=\"cuda\",\n",
     "    torch_dtype=\"auto\",\n",
-    "    trust_remote_code=True,\n",
+    "    trust_remote_code=False,\n",
     ")\n",
     "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")"
    ]
diff --git a/chapter02/Chapter 2 - Tokens and Token Embeddings.ipynb b/chapter02/Chapter 2 - Tokens and Token Embeddings.ipynb
index 20a730b..44ed6a8 100644
--- a/chapter02/Chapter 2 - Tokens and Token Embeddings.ipynb	
+++ b/chapter02/Chapter 2 - Tokens and Token Embeddings.ipynb	
@@ -64,7 +64,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -278,7 +278,7 @@
     "    \"microsoft/Phi-3-mini-4k-instruct\",\n",
     "    device_map=\"cuda\",\n",
     "    torch_dtype=\"auto\",\n",
-    "    trust_remote_code=True,\n",
+    "    trust_remote_code=False,\n",
     ")\n",
     "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")"
    ]
diff --git a/chapter03/Chapter 3 - Looking Inside LLMs.ipynb b/chapter03/Chapter 3 - Looking Inside LLMs.ipynb
index 59decf8..8c05fb3 100644
--- a/chapter03/Chapter 3 - Looking Inside LLMs.ipynb	
+++ b/chapter03/Chapter 3 - Looking Inside LLMs.ipynb	
@@ -483,7 +483,7 @@
     "    \"microsoft/Phi-3-mini-4k-instruct\",\n",
     "    device_map=\"cuda\",\n",
     "    torch_dtype=\"auto\",\n",
-    "    trust_remote_code=True,\n",
+    "    trust_remote_code=False,\n",
     ")\n",
     "\n",
     "# Create a pipeline\n",
diff --git a/chapter05/Chapter 5 - Text Clustering and Topic Modeling.ipynb b/chapter05/Chapter 5 - Text Clustering and Topic Modeling.ipynb
index 5900008..81989e8 100644
--- a/chapter05/Chapter 5 - Text Clustering and Topic Modeling.ipynb	
+++ b/chapter05/Chapter 5 - Text Clustering and Topic Modeling.ipynb	
@@ -100,7 +100,7 @@
     "\n",
     "# Extract metadata\n",
     "abstracts = dataset[\"Abstracts\"]\n",
-    "titles = dataset[\"Titles\"]"
+    "titles = dataset[\"Titles\"] "
    ]
   },
   {
diff --git a/chapter06/Chapter 6 - Prompt Engineering.ipynb b/chapter06/Chapter 6 - Prompt Engineering.ipynb
index 859f11e..5ad7189 100644
--- a/chapter06/Chapter 6 - Prompt Engineering.ipynb	
+++ b/chapter06/Chapter 6 - Prompt Engineering.ipynb	
@@ -142,7 +142,7 @@
     "    \"microsoft/Phi-3-mini-4k-instruct\",\n",
     "    device_map=\"cuda\",\n",
     "    torch_dtype=\"auto\",\n",
-    "    trust_remote_code=True,\n",
+    "    trust_remote_code=False,\n",
     ")\n",
     "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-4k-instruct\")\n",
     "\n",
diff --git a/chapter12/Chapter 12 - Fine-tuning Generation Models.ipynb b/chapter12/Chapter 12 - Fine-tuning Generation Models.ipynb
index decc439..3b0a6b5 100644
--- a/chapter12/Chapter 12 - Fine-tuning Generation Models.ipynb	
+++ b/chapter12/Chapter 12 - Fine-tuning Generation Models.ipynb	
@@ -859,7 +859,7 @@
     "model.config.pretraining_tp = 1\n",
     "\n",
     "# Load LLaMA tokenizer\n",
-    "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=False)\n",
     "tokenizer.pad_token = \"<PAD>\"\n",
     "tokenizer.padding_side = \"left\""
    ]
@@ -1598,7 +1598,7 @@
     "\n",
     "# Load LLaMA tokenizer\n",
     "model_name = \"TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T\"\n",
-    "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=False)\n",
     "tokenizer.pad_token = \"<PAD>\"\n",
     "tokenizer.padding_side = \"left\""
    ]