{ "cells": [ { "cell_type": "markdown", "id": "1d0665ce", "metadata": { "papermill": { "duration": 0.004409, "end_time": "2024-09-05T18:30:34.421709", "exception": false, "start_time": "2024-09-05T18:30:34.417300", "status": "completed" }, "tags": [] }, "source": [ "# Neural network module" ] }, { "cell_type": "markdown", "id": "e51bfef8", "metadata": { "papermill": { "duration": 0.01941, "end_time": "2024-09-05T18:30:34.446073", "exception": false, "start_time": "2024-09-05T18:30:34.426663", "status": "completed" }, "tags": [] }, "source": [ "Here we construct the neural network module. The `Module` class defines an abstract class that maintains a list of the parameters used in forward pass implemented in `__call__`. The decorator `@final` is to prevent any inheriting class from overriding the methods as doing so would result in a warning (or an error with a type checker)." ] }, { "cell_type": "code", "execution_count": 1, "id": "ae2cc403", "metadata": { "execution": { "iopub.execute_input": "2024-09-05T18:30:34.461626Z", "iopub.status.busy": "2024-09-05T18:30:34.461229Z", "iopub.status.idle": "2024-09-05T18:30:34.492764Z", "shell.execute_reply": "2024-09-05T18:30:34.492267Z" }, "papermill": { "duration": 0.045357, "end_time": "2024-09-05T18:30:34.495126", "exception": false, "start_time": "2024-09-05T18:30:34.449769", "status": "completed" }, "tags": [ "remove-cell" ] }, "outputs": [], "source": [ "from chapter import *" ] }, { "cell_type": "code", "execution_count": 2, "id": "4a432dbd", "metadata": { "execution": { "iopub.execute_input": "2024-09-05T18:30:34.504787Z", "iopub.status.busy": "2024-09-05T18:30:34.504091Z", "iopub.status.idle": "2024-09-05T18:30:34.604889Z", "shell.execute_reply": "2024-09-05T18:30:34.604458Z" }, "papermill": { "duration": 0.108303, "end_time": "2024-09-05T18:30:34.606828", "exception": false, "start_time": "2024-09-05T18:30:34.498525", "status": "completed" }, "tags": [ "remove-input" ] }, "outputs": [ { "data": { "text/html": [ "
from abc import ABC, abstractmethod\n",
"\n",
"class Module(ABC):\n",
" def __init__(self):\n",
" self._parameters = []\n",
"\n",
" @final\n",
" def parameters(self) -> list:\n",
" return self._parameters\n",
"\n",
" @abstractmethod\n",
" def __call__(self, x: list):\n",
" pass\n",
"\n",
" @final\n",
" def zero_grad(self):\n",
" for p in self.parameters():\n",
" p.grad = 0\n",
"
class Neuron(Module):\n",
" def __init__(self, n_in, activation=None):\n",
" self.n_in = n_in\n",
" self.act = activation\n",
"\n",
" self.w = [Node(random.random()) for _ in range(n_in)]\n",
" self.b = Node(0.0)\n",
" self._parameters = self.w + [self.b]\n",
"\n",
" def __call__(self, x: list):\n",
" assert len(x) == self.n_in\n",
" out = sum((x[j] * self.w[j] for j in range(self.n_in)), start=self.b)\n",
" if self.act is not None:\n",
" if self.act == "tanh":\n",
" out = out.tanh()\n",
" elif self.act == "relu":\n",
" out = out.relu()\n",
" else:\n",
" raise NotImplementedError("Activation not supported.")\n",
" return out\n",
"\n",
" def __repr__(self):\n",
" return f"{self.act if self.act is not None else 'linear'}({len(self.w)})"\n",
"\n",
"\n",
"class Layer(Module):\n",
" def __init__(self, n_in, n_out, *args):\n",
" self.neurons = [Neuron(n_in, *args) for _ in range(n_out)]\n",
" self._parameters = [p for n in self.neurons for p in n.parameters()]\n",
"\n",
" def __call__(self, x: list):\n",
" out = [n(x) for n in self.neurons]\n",
" return out[0] if len(out) == 1 else out\n",
"\n",
" def __repr__(self):\n",
" return f"Layer[{', '.join(str(n) for n in self.neurons)}]"\n",
"\n",
"\n",
"class MLP(Module):\n",
" def __init__(self, n_in, n_outs, activation=None):\n",
" sizes = [n_in] + n_outs\n",
" self.layers = []\n",
" for i in range(len(n_outs)):\n",
" act = activation if i < len(n_outs) - 1 else None\n",
" layer = Layer(sizes[i], sizes[i + 1], act)\n",
" self.layers.append(layer)\n",
"\n",
" self._parameters = [p for layer in self.layers for p in layer.parameters()]\n",
"\n",
" def __call__(self, x):\n",
" for layer in self.layers:\n",
" x = layer(x)\n",
" return x\n",
"\n",
" def __repr__(self):\n",
" return f"MLP[{', '.join(str(layer) for layer in self.layers)}]"\n",
"