Spaces:

Uday
/

ctm-energy-based-halting

Paused

App Files Files Community

Ciaran Regan commited on Oct 14

Commit

f3f0e2d

1 Parent(s): 7ae2b6b

Add Hugging Face Support (#23)

Browse files

* Make CTM hf compatible

* Add notebook to demo HF usage

* Add changes to all notebooks for easier local runs.

Files changed (9) hide show

.gitignore +3 -0
examples/01_mnist.ipynb +4 -4
examples/02_inference.ipynb +0 -0
examples/03_mazes.ipynb +26 -4
examples/04_parity.ipynb +28 -6
examples/05_huggingface.ipynb +0 -0
models/ctm.py +50 -1
requirements.txt +3 -1
tasks/parity/plotting.py +2 -1

.gitignore CHANGED Viewed

@@ -19,4 +19,7 @@ examples/*
 !examples/02_inference.ipynb
 !examples/03_mazes.ipynb
 !examples/04_parity.ipynb
 checkpoints

 !examples/02_inference.ipynb
 !examples/03_mazes.ipynb
 !examples/04_parity.ipynb
+!examples/05_huggingface.ipynb
+!examples/goldfish.jpg
 checkpoints
+utils/hugging_face/

examples/01_mnist.ipynb CHANGED Viewed

@@ -749,7 +749,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
    "id": "b3fbae96",
    "metadata": {},
    "outputs": [],
@@ -789,7 +789,7 @@
     "             [['certainty'] * 8] + \\\n",
     "             [[f'trace_{ti}'] * 8 for ti in range(n_neurons_to_visualise)]\n",
     "\n",
-    "    for stepi in range(n_steps):\n",
     "        fig_gif, axes_gif = plt.subplot_mosaic(mosaic=mosaic, figsize=(31*figscale*8/4, 76*figscale))\n",
     "        probs = softmax(these_predictions[:, stepi])\n",
     "        colors = [('g' if i == this_target else 'b') for i in range(len(probs))]\n",
@@ -940,7 +940,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -954,7 +954,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.9"
   }
  },
  "nbformat": 4,

   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "b3fbae96",
    "metadata": {},
    "outputs": [],
     "             [['certainty'] * 8] + \\\n",
     "             [[f'trace_{ti}'] * 8 for ti in range(n_neurons_to_visualise)]\n",
     "\n",
+    "    for stepi in tqdm(range(n_steps), desc=\"Processing steps\", unit=\"step\"):\n",
     "        fig_gif, axes_gif = plt.subplot_mosaic(mosaic=mosaic, figsize=(31*figscale*8/4, 76*figscale))\n",
     "        probs = softmax(these_predictions[:, stepi])\n",
     "        colors = [('g' if i == this_target else 'b') for i in range(len(probs))]\n",
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.12.10"
   }
  },
  "nbformat": 4,

examples/02_inference.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

examples/03_mazes.ipynb CHANGED Viewed

@@ -67,6 +67,31 @@
     "In addition to installing some dependencies, we also clone the CTM repo (assuming this tutorial is being ran in Colab), so that we can access the base CTM model."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -108,7 +133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
    "id": "24ffe416",
    "metadata": {},
    "outputs": [
@@ -122,9 +147,6 @@
     }
    ],
    "source": [
-    "import sys\n",
-    "sys.path.append(\"./continuous-thought-machines\")\n",
-    "\n",
     "import os\n",
     "import torch\n",
     "import torch.nn as nn\n",

     "In addition to installing some dependencies, we also clone the CTM repo (assuming this tutorial is being ran in Colab), so that we can access the base CTM model."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "537dd917",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "USE_COLAB = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "\n",
+    "if USE_COLAB:\n",
+    "    !git clone https://github.com/SakanaAI/continuous-thought-machines.git\n",
+    "    sys.path.append(\"./continuous-thought-machines\")\n",
+    "else:\n",
+    "    sys.path.append(\"..\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "24ffe416",
    "metadata": {},
    "outputs": [
     }
    ],
    "source": [
     "import os\n",
     "import torch\n",
     "import torch.nn as nn\n",

examples/04_parity.ipynb CHANGED Viewed

@@ -52,6 +52,32 @@
     "In addition to installing some dependencies, we also clone the CTM repo (assuming this tutorial is being run in Colab), so that we can access the base CTM model."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -60,8 +86,7 @@
    "outputs": [],
    "source": [
     "!pip install gdown\n",
-    "!pip install mediapy\n",
-    "!git clone https://github.com/SakanaAI/continuous-thought-machines.git\n"
    ]
   },
   {
@@ -74,14 +99,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
    "id": "24ffe416",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import sys\n",
-    "sys.path.append(\"./continuous-thought-machines\")\n",
-    "\n",
     "import os\n",
     "import torch\n",
     "import torch.nn as nn\n",

     "In addition to installing some dependencies, we also clone the CTM repo (assuming this tutorial is being run in Colab), so that we can access the base CTM model."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5c06d1e5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "USE_COLAB = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "30ab5f0d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "\n",
+    "if USE_COLAB:\n",
+    "    !git clone https://github.com/SakanaAI/continuous-thought-machines.git\n",
+    "    sys.path.append(\"./continuous-thought-machines\")\n",
+    "else:\n",
+    "    sys.path.append(\"..\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "outputs": [],
    "source": [
     "!pip install gdown\n",
+    "!pip install mediapy"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "24ffe416",
    "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
     "import torch\n",
     "import torch.nn as nn\n",

examples/05_huggingface.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

models/ctm.py CHANGED Viewed

@@ -2,6 +2,7 @@ import torch.nn as nn
 import torch
 import numpy as np
 import math
 from models.modules import ParityBackbone, SynapseUNET, Squeeze, SuperLinear, LearnableFourierPositionalEncoding, MultiLearnableFourierPositionalEncoding, CustomRotationalEmbedding, CustomRotationalEmbedding1D, ShallowWide
 from models.resnet import prepare_resnet_backbone
@@ -13,7 +14,7 @@ from models.constants import (
     VALID_POSITIONAL_EMBEDDING_TYPES
 )
-class ContinuousThoughtMachine(nn.Module):
     """
     Continuous Thought Machine (CTM).
@@ -149,6 +150,53 @@ class ContinuousThoughtMachine(nn.Module):
         # --- Output Procesing ---
         self.output_projector = nn.Sequential(nn.LazyLinear(self.out_dims))
     # --- Core CTM Methods ---
     def compute_synchronisation(self, activated_state, decay_alpha, decay_beta, r, synch_type):
@@ -553,3 +601,4 @@ class ContinuousThoughtMachine(nn.Module):
         if track:
             return predictions, certainties, (np.array(synch_out_tracking), np.array(synch_action_tracking)), np.array(pre_activations_tracking), np.array(post_activations_tracking), np.array(attention_tracking)
         return predictions, certainties, synchronisation_out

 import torch
 import numpy as np
 import math
+from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
 from models.modules import ParityBackbone, SynapseUNET, Squeeze, SuperLinear, LearnableFourierPositionalEncoding, MultiLearnableFourierPositionalEncoding, CustomRotationalEmbedding, CustomRotationalEmbedding1D, ShallowWide
 from models.resnet import prepare_resnet_backbone
     VALID_POSITIONAL_EMBEDDING_TYPES
 )
+class ContinuousThoughtMachine(nn.Module, PyTorchModelHubMixin):
     """
     Continuous Thought Machine (CTM).
         # --- Output Procesing ---
         self.output_projector = nn.Sequential(nn.LazyLinear(self.out_dims))
+    @classmethod
+    def _from_pretrained(
+        cls,
+        *,
+        model_id: str,
+        revision=None,
+        cache_dir=None,
+        force_download=False,
+        proxies=None,
+        resume_download=None,
+        local_files_only=False,
+        token=None,
+        map_location="cpu",
+        strict=False,
+        **model_kwargs,
+    ):
+        """Override to handle lazy weights initialization."""
+        model = cls(**model_kwargs).to(map_location)
+        # The CTM contains Lazy modules, so we must run a dummy forward pass to initialize them
+        if "imagenet" in model_id:
+            dummy_input = torch.randn(1, 3, 224, 224, device=map_location)
+        elif "maze-large" in model_id:
+            dummy_input = torch.randn(1, 3, 99, 99, device=map_location)
+        else:
+            raise NotImplementedError
+        with torch.no_grad():
+            _ = model(dummy_input)
+        model_file = hf_hub_download(
+            repo_id=model_id,
+            filename="model.safetensors",
+            revision=revision,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            proxies=proxies,
+            resume_download=resume_download,
+            token=token,
+            local_files_only=local_files_only,
+        )
+        from safetensors.torch import load_model as load_model_as_safetensor
+        load_model_as_safetensor(model, model_file, strict=strict, device=map_location)
+        model.eval()
+        return model
     # --- Core CTM Methods ---
     def compute_synchronisation(self, activated_state, decay_alpha, decay_beta, r, synch_type):
         if track:
             return predictions, certainties, (np.array(synch_out_tracking), np.array(synch_action_tracking)), np.array(pre_activations_tracking), np.array(post_activations_tracking), np.array(attention_tracking)
         return predictions, certainties, synchronisation_out

requirements.txt CHANGED Viewed

@@ -12,4 +12,6 @@ python-dotenv
 gymnasium
 minigrid
 datasets
-autoclip

 gymnasium
 minigrid
 datasets
+autoclip
+huggingface_hub
+safetensors

tasks/parity/plotting.py CHANGED Viewed

@@ -13,6 +13,7 @@ import imageio.v2 as imageio
 from PIL import Image
 import math
 import re
 sns.set_style('darkgrid')
 mpl.use('Agg')
@@ -43,7 +44,7 @@ def make_parity_gif(predictions, certainties, targets, pre_activations, post_act
              [['certainty', 'certainty', 'certainty', 'certainty', 'certainty', 'certainty', 'certainty', 'certainty']] + \
              [[f'trace_{ti}', f'trace_{ti}', f'trace_{ti}', f'trace_{ti}', f'trace_{ti}', f'trace_{ti}', f'trace_{ti}', f'trace_{ti}'] for ti in range(n_neurons_to_visualise)]
-    for stepi in range(n_steps):
         fig_gif, axes_gif = plt.subplot_mosaic(mosaic=mosaic, figsize=(31*figscale*8/4, 76*figscale))
         # Plot predictions

 from PIL import Image
 import math
 import re
+from tqdm import tqdm
 sns.set_style('darkgrid')
 mpl.use('Agg')
              [['certainty', 'certainty', 'certainty', 'certainty', 'certainty', 'certainty', 'certainty', 'certainty']] + \
              [[f'trace_{ti}', f'trace_{ti}', f'trace_{ti}', f'trace_{ti}', f'trace_{ti}', f'trace_{ti}', f'trace_{ti}', f'trace_{ti}'] for ti in range(n_neurons_to_visualise)]
+    for stepi in tqdm(range(n_steps), desc="Processing steps", unit="step"):
         fig_gif, axes_gif = plt.subplot_mosaic(mosaic=mosaic, figsize=(31*figscale*8/4, 76*figscale))
         # Plot predictions