{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "d58644a3",
   "metadata": {
    "papermill": {
     "duration": 0.007135,
     "end_time": "2024-09-05T18:30:30.865440",
     "exception": false,
     "start_time": "2024-09-05T18:30:30.858305",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Defined operations"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e54b0381",
   "metadata": {
    "papermill": {
     "duration": 0.004333,
     "end_time": "2024-09-05T18:30:30.875104",
     "exception": false,
     "start_time": "2024-09-05T18:30:30.870771",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "Recall that all operations must be defined with specific local gradient computation for BP to work. In this section, we will implement a minimal **autograd engine** for creating computational graphs. This starts with the base `Node` class which has a `data` attribute for storing output and a `grad` attribute for storing the global gradient. Furthermore, the base class defines a `backward` method to solve for `grad` as described above."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "967a8e6d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-05T18:30:30.884439Z",
     "iopub.status.busy": "2024-09-05T18:30:30.884096Z",
     "iopub.status.idle": "2024-09-05T18:30:30.938215Z",
     "shell.execute_reply": "2024-09-05T18:30:30.937799Z"
    },
    "papermill": {
     "duration": 0.060396,
     "end_time": "2024-09-05T18:30:30.939730",
     "exception": false,
     "start_time": "2024-09-05T18:30:30.879334",
     "status": "completed"
    },
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>pre { line-height: 125%; }\n",
       "td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\n",
       "span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\n",
       "td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\n",
       "span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\n",
       ".output_html .hll { background-color: #ffffcc }\n",
       ".output_html { background: #f8f8f8; }\n",
       ".output_html .c { color: #3D7B7B; font-style: italic } /* Comment */\n",
       ".output_html .err { border: 1px solid #FF0000 } /* Error */\n",
       ".output_html .k { color: #008000; font-weight: bold } /* Keyword */\n",
       ".output_html .o { color: #666666 } /* Operator */\n",
       ".output_html .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */\n",
       ".output_html .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */\n",
       ".output_html .cp { color: #9C6500 } /* Comment.Preproc */\n",
       ".output_html .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */\n",
       ".output_html .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */\n",
       ".output_html .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */\n",
       ".output_html .gd { color: #A00000 } /* Generic.Deleted */\n",
       ".output_html .ge { font-style: italic } /* Generic.Emph */\n",
       ".output_html .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */\n",
       ".output_html .gr { color: #E40000 } /* Generic.Error */\n",
       ".output_html .gh { color: #000080; font-weight: bold } /* Generic.Heading */\n",
       ".output_html .gi { color: #008400 } /* Generic.Inserted */\n",
       ".output_html .go { color: #717171 } /* Generic.Output */\n",
       ".output_html .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
       ".output_html .gs { font-weight: bold } /* Generic.Strong */\n",
       ".output_html .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
       ".output_html .gt { color: #0044DD } /* Generic.Traceback */\n",
       ".output_html .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
       ".output_html .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
       ".output_html .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
       ".output_html .kp { color: #008000 } /* Keyword.Pseudo */\n",
       ".output_html .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
       ".output_html .kt { color: #B00040 } /* Keyword.Type */\n",
       ".output_html .m { color: #666666 } /* Literal.Number */\n",
       ".output_html .s { color: #BA2121 } /* Literal.String */\n",
       ".output_html .na { color: #687822 } /* Name.Attribute */\n",
       ".output_html .nb { color: #008000 } /* Name.Builtin */\n",
       ".output_html .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
       ".output_html .no { color: #880000 } /* Name.Constant */\n",
       ".output_html .nd { color: #AA22FF } /* Name.Decorator */\n",
       ".output_html .ni { color: #717171; font-weight: bold } /* Name.Entity */\n",
       ".output_html .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */\n",
       ".output_html .nf { color: #0000FF } /* Name.Function */\n",
       ".output_html .nl { color: #767600 } /* Name.Label */\n",
       ".output_html .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
       ".output_html .nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
       ".output_html .nv { color: #19177C } /* Name.Variable */\n",
       ".output_html .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
       ".output_html .w { color: #bbbbbb } /* Text.Whitespace */\n",
       ".output_html .mb { color: #666666 } /* Literal.Number.Bin */\n",
       ".output_html .mf { color: #666666 } /* Literal.Number.Float */\n",
       ".output_html .mh { color: #666666 } /* Literal.Number.Hex */\n",
       ".output_html .mi { color: #666666 } /* Literal.Number.Integer */\n",
       ".output_html .mo { color: #666666 } /* Literal.Number.Oct */\n",
       ".output_html .sa { color: #BA2121 } /* Literal.String.Affix */\n",
       ".output_html .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
       ".output_html .sc { color: #BA2121 } /* Literal.String.Char */\n",
       ".output_html .dl { color: #BA2121 } /* Literal.String.Delimiter */\n",
       ".output_html .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n",
       ".output_html .s2 { color: #BA2121 } /* Literal.String.Double */\n",
       ".output_html .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */\n",
       ".output_html .sh { color: #BA2121 } /* Literal.String.Heredoc */\n",
       ".output_html .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */\n",
       ".output_html .sx { color: #008000 } /* Literal.String.Other */\n",
       ".output_html .sr { color: #A45A77 } /* Literal.String.Regex */\n",
       ".output_html .s1 { color: #BA2121 } /* Literal.String.Single */\n",
       ".output_html .ss { color: #19177C } /* Literal.String.Symbol */\n",
       ".output_html .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
       ".output_html .fm { color: #0000FF } /* Name.Function.Magic */\n",
       ".output_html .vc { color: #19177C } /* Name.Variable.Class */\n",
       ".output_html .vg { color: #19177C } /* Name.Variable.Global */\n",
       ".output_html .vi { color: #19177C } /* Name.Variable.Instance */\n",
       ".output_html .vm { color: #19177C } /* Name.Variable.Magic */\n",
       ".output_html .il { color: #666666 } /* Literal.Number.Integer.Long */</style><div class=\"highlight\"><pre><span></span><span class=\"kn\">import</span> <span class=\"nn\">math</span>\n",
       "<span class=\"kn\">import</span> <span class=\"nn\">random</span>\n",
       "<span class=\"n\">random</span><span class=\"o\">.</span><span class=\"n\">seed</span><span class=\"p\">(</span><span class=\"mi\">42</span><span class=\"p\">)</span>\n",
       "\n",
       "<span class=\"kn\">from</span> <span class=\"nn\">typing</span> <span class=\"kn\">import</span> <span class=\"n\">final</span>\n",
       "<span class=\"kn\">from</span> <span class=\"nn\">collections</span> <span class=\"kn\">import</span> <span class=\"n\">OrderedDict</span>\n",
       "\n",
       "\n",
       "<span class=\"k\">class</span> <span class=\"nc\">Node</span><span class=\"p\">:</span>\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">data</span><span class=\"p\">,</span> <span class=\"n\">parents</span><span class=\"o\">=</span><span class=\"p\">()):</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">data</span> <span class=\"o\">=</span> <span class=\"n\">data</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">grad</span> <span class=\"o\">=</span> <span class=\"mi\">0</span>               <span class=\"c1\"># ∂loss / ∂self</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">_parents</span> <span class=\"o\">=</span> <span class=\"n\">parents</span>     <span class=\"c1\"># parent -&gt; self</span>\n",
       "\n",
       "    <span class=\"nd\">@final</span>\n",
       "    <span class=\"k\">def</span> <span class=\"nf\">sorted_nodes</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n",
       "<span class=\"w\">        </span><span class=\"sd\">&quot;&quot;&quot;Return topologically sorted nodes with self as root.&quot;&quot;&quot;</span>\n",
       "        <span class=\"n\">topo</span> <span class=\"o\">=</span> <span class=\"n\">OrderedDict</span><span class=\"p\">()</span>\n",
       "\n",
       "        <span class=\"k\">def</span> <span class=\"nf\">dfs</span><span class=\"p\">(</span><span class=\"n\">node</span><span class=\"p\">):</span>\n",
       "            <span class=\"k\">if</span> <span class=\"n\">node</span> <span class=\"ow\">not</span> <span class=\"ow\">in</span> <span class=\"n\">topo</span><span class=\"p\">:</span>\n",
       "                <span class=\"k\">for</span> <span class=\"n\">parent</span> <span class=\"ow\">in</span> <span class=\"n\">node</span><span class=\"o\">.</span><span class=\"n\">_parents</span><span class=\"p\">:</span>\n",
       "                    <span class=\"n\">dfs</span><span class=\"p\">(</span><span class=\"n\">parent</span><span class=\"p\">)</span>\n",
       "\n",
       "                <span class=\"n\">topo</span><span class=\"p\">[</span><span class=\"n\">node</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"kc\">None</span>\n",
       "\n",
       "        <span class=\"n\">dfs</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">)</span>\n",
       "        <span class=\"k\">return</span> <span class=\"nb\">reversed</span><span class=\"p\">(</span><span class=\"n\">topo</span><span class=\"p\">)</span>\n",
       "\n",
       "\n",
       "    <span class=\"nd\">@final</span>\n",
       "    <span class=\"k\">def</span> <span class=\"nf\">backward</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n",
       "<span class=\"w\">        </span><span class=\"sd\">&quot;&quot;&quot;Send global grads backward to parent nodes.&quot;&quot;&quot;</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">grad</span> <span class=\"o\">=</span> <span class=\"mf\">1.0</span>\n",
       "        <span class=\"k\">for</span> <span class=\"n\">node</span> <span class=\"ow\">in</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">sorted_nodes</span><span class=\"p\">():</span>\n",
       "            <span class=\"k\">for</span> <span class=\"n\">parent</span> <span class=\"ow\">in</span> <span class=\"n\">node</span><span class=\"o\">.</span><span class=\"n\">_parents</span><span class=\"p\">:</span>\n",
       "                <span class=\"n\">parent</span><span class=\"o\">.</span><span class=\"n\">grad</span> <span class=\"o\">+=</span> <span class=\"n\">node</span><span class=\"o\">.</span><span class=\"n\">grad</span> <span class=\"o\">*</span> <span class=\"n\">node</span><span class=\"o\">.</span><span class=\"n\">_local_grad</span><span class=\"p\">(</span><span class=\"n\">parent</span><span class=\"p\">)</span>\n",
       "\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"nf\">_local_grad</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">parent</span><span class=\"p\">)</span> <span class=\"o\">-&gt;</span> <span class=\"nb\">float</span><span class=\"p\">:</span>\n",
       "<span class=\"w\">        </span><span class=\"sd\">&quot;&quot;&quot;Calculate local grads ∂self / ∂parent.&quot;&quot;&quot;</span>\n",
       "        <span class=\"k\">raise</span> <span class=\"ne\">NotImplementedError</span><span class=\"p\">(</span><span class=\"s2\">&quot;Base node has no parents.&quot;</span><span class=\"p\">)</span>\n",
       "\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__add__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">node</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"n\">BinaryOpNode</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">node</span><span class=\"p\">,</span> <span class=\"n\">op</span><span class=\"o\">=</span><span class=\"s2\">&quot;+&quot;</span><span class=\"p\">)</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__mul__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">node</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"n\">BinaryOpNode</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">node</span><span class=\"p\">,</span> <span class=\"n\">op</span><span class=\"o\">=</span><span class=\"s2\">&quot;*&quot;</span><span class=\"p\">)</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__pow__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">n</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">assert</span> <span class=\"nb\">isinstance</span><span class=\"p\">(</span><span class=\"n\">n</span><span class=\"p\">,</span> <span class=\"p\">(</span><span class=\"nb\">int</span><span class=\"p\">,</span> <span class=\"nb\">float</span><span class=\"p\">))</span> <span class=\"ow\">and</span> <span class=\"n\">n</span> <span class=\"o\">!=</span> <span class=\"mi\">1</span>\n",
       "        <span class=\"k\">return</span> <span class=\"n\">PowOp</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">n</span><span class=\"p\">)</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"nf\">relu</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"n\">ReLUNode</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">)</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"nf\">tanh</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"n\">TanhNode</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">)</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__neg__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"bp\">self</span> <span class=\"o\">*</span> <span class=\"n\">Node</span><span class=\"p\">(</span><span class=\"o\">-</span><span class=\"mi\">1</span><span class=\"p\">)</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__sub__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">node</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"bp\">self</span> <span class=\"o\">+</span> <span class=\"p\">(</span><span class=\"o\">-</span><span class=\"n\">node</span><span class=\"p\">)</span>\n",
       "</pre></div>\n"
      ],
      "text/latex": [
       "\\begin{Verbatim}[commandchars=\\\\\\{\\}]\n",
       "\\PY{k+kn}{import} \\PY{n+nn}{math}\n",
       "\\PY{k+kn}{import} \\PY{n+nn}{random}\n",
       "\\PY{n}{random}\\PY{o}{.}\\PY{n}{seed}\\PY{p}{(}\\PY{l+m+mi}{42}\\PY{p}{)}\n",
       "\n",
       "\\PY{k+kn}{from} \\PY{n+nn}{typing} \\PY{k+kn}{import} \\PY{n}{final}\n",
       "\\PY{k+kn}{from} \\PY{n+nn}{collections} \\PY{k+kn}{import} \\PY{n}{OrderedDict}\n",
       "\n",
       "\n",
       "\\PY{k}{class} \\PY{n+nc}{Node}\\PY{p}{:}\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}init\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{data}\\PY{p}{,} \\PY{n}{parents}\\PY{o}{=}\\PY{p}{(}\\PY{p}{)}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{data} \\PY{o}{=} \\PY{n}{data}\n",
       "        \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{grad} \\PY{o}{=} \\PY{l+m+mi}{0}               \\PY{c+c1}{\\PYZsh{} ∂loss / ∂self}\n",
       "        \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{\\PYZus{}parents} \\PY{o}{=} \\PY{n}{parents}     \\PY{c+c1}{\\PYZsh{} parent \\PYZhy{}\\PYZgt{} self}\n",
       "\n",
       "    \\PY{n+nd}{@final}\n",
       "    \\PY{k}{def} \\PY{n+nf}{sorted\\PYZus{}nodes}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{)}\\PY{p}{:}\n",
       "\\PY{+w}{        }\\PY{l+s+sd}{\\PYZdq{}\\PYZdq{}\\PYZdq{}Return topologically sorted nodes with self as root.\\PYZdq{}\\PYZdq{}\\PYZdq{}}\n",
       "        \\PY{n}{topo} \\PY{o}{=} \\PY{n}{OrderedDict}\\PY{p}{(}\\PY{p}{)}\n",
       "\n",
       "        \\PY{k}{def} \\PY{n+nf}{dfs}\\PY{p}{(}\\PY{n}{node}\\PY{p}{)}\\PY{p}{:}\n",
       "            \\PY{k}{if} \\PY{n}{node} \\PY{o+ow}{not} \\PY{o+ow}{in} \\PY{n}{topo}\\PY{p}{:}\n",
       "                \\PY{k}{for} \\PY{n}{parent} \\PY{o+ow}{in} \\PY{n}{node}\\PY{o}{.}\\PY{n}{\\PYZus{}parents}\\PY{p}{:}\n",
       "                    \\PY{n}{dfs}\\PY{p}{(}\\PY{n}{parent}\\PY{p}{)}\n",
       "\n",
       "                \\PY{n}{topo}\\PY{p}{[}\\PY{n}{node}\\PY{p}{]} \\PY{o}{=} \\PY{k+kc}{None}\n",
       "\n",
       "        \\PY{n}{dfs}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{)}\n",
       "        \\PY{k}{return} \\PY{n+nb}{reversed}\\PY{p}{(}\\PY{n}{topo}\\PY{p}{)}\n",
       "\n",
       "\n",
       "    \\PY{n+nd}{@final}\n",
       "    \\PY{k}{def} \\PY{n+nf}{backward}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{)}\\PY{p}{:}\n",
       "\\PY{+w}{        }\\PY{l+s+sd}{\\PYZdq{}\\PYZdq{}\\PYZdq{}Send global grads backward to parent nodes.\\PYZdq{}\\PYZdq{}\\PYZdq{}}\n",
       "        \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{grad} \\PY{o}{=} \\PY{l+m+mf}{1.0}\n",
       "        \\PY{k}{for} \\PY{n}{node} \\PY{o+ow}{in} \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{sorted\\PYZus{}nodes}\\PY{p}{(}\\PY{p}{)}\\PY{p}{:}\n",
       "            \\PY{k}{for} \\PY{n}{parent} \\PY{o+ow}{in} \\PY{n}{node}\\PY{o}{.}\\PY{n}{\\PYZus{}parents}\\PY{p}{:}\n",
       "                \\PY{n}{parent}\\PY{o}{.}\\PY{n}{grad} \\PY{o}{+}\\PY{o}{=} \\PY{n}{node}\\PY{o}{.}\\PY{n}{grad} \\PY{o}{*} \\PY{n}{node}\\PY{o}{.}\\PY{n}{\\PYZus{}local\\PYZus{}grad}\\PY{p}{(}\\PY{n}{parent}\\PY{p}{)}\n",
       "\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf}{\\PYZus{}local\\PYZus{}grad}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{parent}\\PY{p}{)} \\PY{o}{\\PYZhy{}}\\PY{o}{\\PYZgt{}} \\PY{n+nb}{float}\\PY{p}{:}\n",
       "\\PY{+w}{        }\\PY{l+s+sd}{\\PYZdq{}\\PYZdq{}\\PYZdq{}Calculate local grads ∂self / ∂parent.\\PYZdq{}\\PYZdq{}\\PYZdq{}}\n",
       "        \\PY{k}{raise} \\PY{n+ne}{NotImplementedError}\\PY{p}{(}\\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{Base node has no parents.}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{)}\n",
       "\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}add\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{node}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{n}{BinaryOpNode}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{node}\\PY{p}{,} \\PY{n}{op}\\PY{o}{=}\\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{+}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}mul\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{node}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{n}{BinaryOpNode}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{node}\\PY{p}{,} \\PY{n}{op}\\PY{o}{=}\\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{*}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}pow\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{n}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{assert} \\PY{n+nb}{isinstance}\\PY{p}{(}\\PY{n}{n}\\PY{p}{,} \\PY{p}{(}\\PY{n+nb}{int}\\PY{p}{,} \\PY{n+nb}{float}\\PY{p}{)}\\PY{p}{)} \\PY{o+ow}{and} \\PY{n}{n} \\PY{o}{!=} \\PY{l+m+mi}{1}\n",
       "        \\PY{k}{return} \\PY{n}{PowOp}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{n}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf}{relu}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{n}{ReLUNode}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf}{tanh}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{n}{TanhNode}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}neg\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{n+nb+bp}{self} \\PY{o}{*} \\PY{n}{Node}\\PY{p}{(}\\PY{o}{\\PYZhy{}}\\PY{l+m+mi}{1}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}sub\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{node}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{n+nb+bp}{self} \\PY{o}{+} \\PY{p}{(}\\PY{o}{\\PYZhy{}}\\PY{n}{node}\\PY{p}{)}\n",
       "\\end{Verbatim}\n"
      ],
      "text/plain": [
       "import math\n",
       "import random\n",
       "random.seed(42)\n",
       "\n",
       "from typing import final\n",
       "from collections import OrderedDict\n",
       "\n",
       "\n",
       "class Node:\n",
       "    def __init__(self, data, parents=()):\n",
       "        self.data = data\n",
       "        self.grad = 0               # ∂loss / ∂self\n",
       "        self._parents = parents     # parent -> self\n",
       "\n",
       "    @final\n",
       "    def sorted_nodes(self):\n",
       "        \"\"\"Return topologically sorted nodes with self as root.\"\"\"\n",
       "        topo = OrderedDict()\n",
       "\n",
       "        def dfs(node):\n",
       "            if node not in topo:\n",
       "                for parent in node._parents:\n",
       "                    dfs(parent)\n",
       "\n",
       "                topo[node] = None\n",
       "\n",
       "        dfs(self)\n",
       "        return reversed(topo)\n",
       "\n",
       "\n",
       "    @final\n",
       "    def backward(self):\n",
       "        \"\"\"Send global grads backward to parent nodes.\"\"\"\n",
       "        self.grad = 1.0\n",
       "        for node in self.sorted_nodes():\n",
       "            for parent in node._parents:\n",
       "                parent.grad += node.grad * node._local_grad(parent)\n",
       "\n",
       "\n",
       "    def _local_grad(self, parent) -> float:\n",
       "        \"\"\"Calculate local grads ∂self / ∂parent.\"\"\"\n",
       "        raise NotImplementedError(\"Base node has no parents.\")\n",
       "\n",
       "\n",
       "    def __add__(self, node):\n",
       "        return BinaryOpNode(self, node, op=\"+\")\n",
       "\n",
       "    def __mul__(self, node):\n",
       "        return BinaryOpNode(self, node, op=\"*\")\n",
       "\n",
       "    def __pow__(self, n):\n",
       "        assert isinstance(n, (int, float)) and n != 1\n",
       "        return PowOp(self, n)\n",
       "\n",
       "    def relu(self):\n",
       "        return ReLUNode(self)\n",
       "\n",
       "    def tanh(self):\n",
       "        return TanhNode(self)\n",
       "\n",
       "    def __neg__(self):\n",
       "        return self * Node(-1)\n",
       "\n",
       "    def __sub__(self, node):\n",
       "        return self + (-node)"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%save\n",
    "import math\n",
    "import random\n",
    "random.seed(42)\n",
    "\n",
    "from typing import final\n",
    "from collections import OrderedDict\n",
    "\n",
    "\n",
    "class Node:\n",
    "    def __init__(self, data, parents=()):\n",
    "        self.data = data\n",
    "        self.grad = 0               # ∂loss / ∂self\n",
    "        self._parents = parents     # parent -> self\n",
    "\n",
    "    @final\n",
    "    def sorted_nodes(self):\n",
    "        \"\"\"Return topologically sorted nodes with self as root.\"\"\"\n",
    "        topo = OrderedDict()\n",
    "\n",
    "        def dfs(node):\n",
    "            if node not in topo:\n",
    "                for parent in node._parents:\n",
    "                    dfs(parent)\n",
    "\n",
    "                topo[node] = None\n",
    "\n",
    "        dfs(self)\n",
    "        return reversed(topo)\n",
    "\n",
    "\n",
    "    @final\n",
    "    def backward(self):\n",
    "        \"\"\"Send global grads backward to parent nodes.\"\"\"\n",
    "        self.grad = 1.0\n",
    "        for node in self.sorted_nodes():\n",
    "            for parent in node._parents:\n",
    "                parent.grad += node.grad * node._local_grad(parent)\n",
    "\n",
    "\n",
    "    def _local_grad(self, parent) -> float:\n",
    "        \"\"\"Calculate local grads ∂self / ∂parent.\"\"\"\n",
    "        raise NotImplementedError(\"Base node has no parents.\")\n",
    "\n",
    "\n",
    "    def __add__(self, node):\n",
    "        return BinaryOpNode(self, node, op=\"+\")\n",
    "\n",
    "    def __mul__(self, node):\n",
    "        return BinaryOpNode(self, node, op=\"*\")\n",
    "\n",
    "    def __pow__(self, n):\n",
    "        assert isinstance(n, (int, float)) and n != 1\n",
    "        return PowOp(self, n)\n",
    "\n",
    "    def relu(self):\n",
    "        return ReLUNode(self)\n",
    "\n",
    "    def tanh(self):\n",
    "        return TanhNode(self)\n",
    "\n",
    "    def __neg__(self):\n",
    "        return self * Node(-1)\n",
    "\n",
    "    def __sub__(self, node):\n",
    "        return self + (-node)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3eb3b46f",
   "metadata": {
    "papermill": {
     "duration": 0.003248,
     "end_time": "2024-09-05T18:30:30.946263",
     "exception": false,
     "start_time": "2024-09-05T18:30:30.943015",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "Next, we define the **supported operations**. Observe that only a handful are needed to implement a fully-connected neural net:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "a9ee7e91",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-05T18:30:30.952894Z",
     "iopub.status.busy": "2024-09-05T18:30:30.952688Z",
     "iopub.status.idle": "2024-09-05T18:30:31.067666Z",
     "shell.execute_reply": "2024-09-05T18:30:31.058235Z"
    },
    "papermill": {
     "duration": 0.193747,
     "end_time": "2024-09-05T18:30:31.142917",
     "exception": false,
     "start_time": "2024-09-05T18:30:30.949170",
     "status": "completed"
    },
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>pre { line-height: 125%; }\n",
       "td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\n",
       "span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\n",
       "td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\n",
       "span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\n",
       ".output_html .hll { background-color: #ffffcc }\n",
       ".output_html { background: #f8f8f8; }\n",
       ".output_html .c { color: #3D7B7B; font-style: italic } /* Comment */\n",
       ".output_html .err { border: 1px solid #FF0000 } /* Error */\n",
       ".output_html .k { color: #008000; font-weight: bold } /* Keyword */\n",
       ".output_html .o { color: #666666 } /* Operator */\n",
       ".output_html .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */\n",
       ".output_html .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */\n",
       ".output_html .cp { color: #9C6500 } /* Comment.Preproc */\n",
       ".output_html .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */\n",
       ".output_html .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */\n",
       ".output_html .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */\n",
       ".output_html .gd { color: #A00000 } /* Generic.Deleted */\n",
       ".output_html .ge { font-style: italic } /* Generic.Emph */\n",
       ".output_html .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */\n",
       ".output_html .gr { color: #E40000 } /* Generic.Error */\n",
       ".output_html .gh { color: #000080; font-weight: bold } /* Generic.Heading */\n",
       ".output_html .gi { color: #008400 } /* Generic.Inserted */\n",
       ".output_html .go { color: #717171 } /* Generic.Output */\n",
       ".output_html .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
       ".output_html .gs { font-weight: bold } /* Generic.Strong */\n",
       ".output_html .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
       ".output_html .gt { color: #0044DD } /* Generic.Traceback */\n",
       ".output_html .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
       ".output_html .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
       ".output_html .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
       ".output_html .kp { color: #008000 } /* Keyword.Pseudo */\n",
       ".output_html .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
       ".output_html .kt { color: #B00040 } /* Keyword.Type */\n",
       ".output_html .m { color: #666666 } /* Literal.Number */\n",
       ".output_html .s { color: #BA2121 } /* Literal.String */\n",
       ".output_html .na { color: #687822 } /* Name.Attribute */\n",
       ".output_html .nb { color: #008000 } /* Name.Builtin */\n",
       ".output_html .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
       ".output_html .no { color: #880000 } /* Name.Constant */\n",
       ".output_html .nd { color: #AA22FF } /* Name.Decorator */\n",
       ".output_html .ni { color: #717171; font-weight: bold } /* Name.Entity */\n",
       ".output_html .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */\n",
       ".output_html .nf { color: #0000FF } /* Name.Function */\n",
       ".output_html .nl { color: #767600 } /* Name.Label */\n",
       ".output_html .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
       ".output_html .nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
       ".output_html .nv { color: #19177C } /* Name.Variable */\n",
       ".output_html .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
       ".output_html .w { color: #bbbbbb } /* Text.Whitespace */\n",
       ".output_html .mb { color: #666666 } /* Literal.Number.Bin */\n",
       ".output_html .mf { color: #666666 } /* Literal.Number.Float */\n",
       ".output_html .mh { color: #666666 } /* Literal.Number.Hex */\n",
       ".output_html .mi { color: #666666 } /* Literal.Number.Integer */\n",
       ".output_html .mo { color: #666666 } /* Literal.Number.Oct */\n",
       ".output_html .sa { color: #BA2121 } /* Literal.String.Affix */\n",
       ".output_html .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
       ".output_html .sc { color: #BA2121 } /* Literal.String.Char */\n",
       ".output_html .dl { color: #BA2121 } /* Literal.String.Delimiter */\n",
       ".output_html .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n",
       ".output_html .s2 { color: #BA2121 } /* Literal.String.Double */\n",
       ".output_html .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */\n",
       ".output_html .sh { color: #BA2121 } /* Literal.String.Heredoc */\n",
       ".output_html .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */\n",
       ".output_html .sx { color: #008000 } /* Literal.String.Other */\n",
       ".output_html .sr { color: #A45A77 } /* Literal.String.Regex */\n",
       ".output_html .s1 { color: #BA2121 } /* Literal.String.Single */\n",
       ".output_html .ss { color: #19177C } /* Literal.String.Symbol */\n",
       ".output_html .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
       ".output_html .fm { color: #0000FF } /* Name.Function.Magic */\n",
       ".output_html .vc { color: #19177C } /* Name.Variable.Class */\n",
       ".output_html .vg { color: #19177C } /* Name.Variable.Global */\n",
       ".output_html .vi { color: #19177C } /* Name.Variable.Instance */\n",
       ".output_html .vm { color: #19177C } /* Name.Variable.Magic */\n",
       ".output_html .il { color: #666666 } /* Literal.Number.Integer.Long */</style><div class=\"highlight\"><pre><span></span><span class=\"k\">class</span> <span class=\"nc\">BinaryOpNode</span><span class=\"p\">(</span><span class=\"n\">Node</span><span class=\"p\">):</span>\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">x</span><span class=\"p\">,</span> <span class=\"n\">y</span><span class=\"p\">,</span> <span class=\"n\">op</span><span class=\"p\">:</span> <span class=\"nb\">str</span><span class=\"p\">):</span>\n",
       "<span class=\"w\">        </span><span class=\"sd\">&quot;&quot;&quot;Binary operation between two nodes.&quot;&quot;&quot;</span>\n",
       "        <span class=\"n\">ops</span> <span class=\"o\">=</span> <span class=\"p\">{</span><span class=\"s2\">&quot;+&quot;</span><span class=\"p\">:</span> <span class=\"k\">lambda</span> <span class=\"n\">x</span><span class=\"p\">,</span> <span class=\"n\">y</span><span class=\"p\">:</span> <span class=\"n\">x</span> <span class=\"o\">+</span> <span class=\"n\">y</span><span class=\"p\">,</span> <span class=\"s2\">&quot;*&quot;</span><span class=\"p\">:</span> <span class=\"k\">lambda</span> <span class=\"n\">x</span><span class=\"p\">,</span> <span class=\"n\">y</span><span class=\"p\">:</span> <span class=\"n\">x</span> <span class=\"o\">*</span> <span class=\"n\">y</span><span class=\"p\">}</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">_op</span> <span class=\"o\">=</span> <span class=\"n\">op</span>\n",
       "        <span class=\"nb\">super</span><span class=\"p\">()</span><span class=\"o\">.</span><span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"n\">ops</span><span class=\"p\">[</span><span class=\"n\">op</span><span class=\"p\">](</span><span class=\"n\">x</span><span class=\"o\">.</span><span class=\"n\">data</span><span class=\"p\">,</span> <span class=\"n\">y</span><span class=\"o\">.</span><span class=\"n\">data</span><span class=\"p\">),</span> <span class=\"p\">(</span><span class=\"n\">x</span><span class=\"p\">,</span> <span class=\"n\">y</span><span class=\"p\">))</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"nf\">_local_grad</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">parent</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">if</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">_op</span> <span class=\"o\">==</span> <span class=\"s2\">&quot;+&quot;</span><span class=\"p\">:</span>\n",
       "            <span class=\"k\">return</span> <span class=\"mf\">1.0</span>\n",
       "\n",
       "        <span class=\"k\">elif</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">_op</span> <span class=\"o\">==</span> <span class=\"s2\">&quot;*&quot;</span><span class=\"p\">:</span>\n",
       "            <span class=\"n\">i</span> <span class=\"o\">=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">_parents</span><span class=\"o\">.</span><span class=\"n\">index</span><span class=\"p\">(</span><span class=\"n\">parent</span><span class=\"p\">)</span>\n",
       "            <span class=\"n\">coparent</span> <span class=\"o\">=</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">_parents</span><span class=\"p\">[</span><span class=\"mi\">1</span> <span class=\"o\">-</span> <span class=\"n\">i</span><span class=\"p\">]</span>\n",
       "            <span class=\"k\">return</span> <span class=\"n\">coparent</span><span class=\"o\">.</span><span class=\"n\">data</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__repr__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">_op</span>\n",
       "\n",
       "\n",
       "<span class=\"k\">class</span> <span class=\"nc\">ReLUNode</span><span class=\"p\">(</span><span class=\"n\">Node</span><span class=\"p\">):</span>\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">x</span><span class=\"p\">):</span>\n",
       "        <span class=\"n\">data</span> <span class=\"o\">=</span> <span class=\"n\">x</span><span class=\"o\">.</span><span class=\"n\">data</span> <span class=\"o\">*</span> <span class=\"nb\">int</span><span class=\"p\">(</span><span class=\"n\">x</span><span class=\"o\">.</span><span class=\"n\">data</span> <span class=\"o\">&gt;</span> <span class=\"mf\">0.0</span><span class=\"p\">)</span>\n",
       "        <span class=\"nb\">super</span><span class=\"p\">()</span><span class=\"o\">.</span><span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"n\">data</span><span class=\"p\">,</span> <span class=\"p\">(</span><span class=\"n\">x</span><span class=\"p\">,))</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"nf\">_local_grad</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">parent</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"nb\">float</span><span class=\"p\">(</span><span class=\"n\">parent</span><span class=\"o\">.</span><span class=\"n\">data</span> <span class=\"o\">&gt;</span> <span class=\"mi\">0</span><span class=\"p\">)</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__repr__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"s2\">&quot;relu&quot;</span>\n",
       "\n",
       "\n",
       "<span class=\"k\">class</span> <span class=\"nc\">TanhNode</span><span class=\"p\">(</span><span class=\"n\">Node</span><span class=\"p\">):</span>\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">x</span><span class=\"p\">):</span>\n",
       "        <span class=\"n\">data</span> <span class=\"o\">=</span> <span class=\"n\">math</span><span class=\"o\">.</span><span class=\"n\">tanh</span><span class=\"p\">(</span><span class=\"n\">x</span><span class=\"o\">.</span><span class=\"n\">data</span><span class=\"p\">)</span>\n",
       "        <span class=\"nb\">super</span><span class=\"p\">()</span><span class=\"o\">.</span><span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"n\">data</span><span class=\"p\">,</span> <span class=\"p\">(</span><span class=\"n\">x</span><span class=\"p\">,))</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"nf\">_local_grad</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">parent</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"mi\">1</span> <span class=\"o\">-</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">data</span><span class=\"o\">**</span><span class=\"mi\">2</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__repr__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"s2\">&quot;tanh&quot;</span>\n",
       "\n",
       "\n",
       "<span class=\"k\">class</span> <span class=\"nc\">PowOp</span><span class=\"p\">(</span><span class=\"n\">Node</span><span class=\"p\">):</span>\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">x</span><span class=\"p\">,</span> <span class=\"n\">n</span><span class=\"p\">):</span>\n",
       "        <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">n</span> <span class=\"o\">=</span> <span class=\"n\">n</span>\n",
       "        <span class=\"n\">data</span> <span class=\"o\">=</span> <span class=\"n\">x</span><span class=\"o\">.</span><span class=\"n\">data</span><span class=\"o\">**</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">n</span>\n",
       "        <span class=\"nb\">super</span><span class=\"p\">()</span><span class=\"o\">.</span><span class=\"fm\">__init__</span><span class=\"p\">(</span><span class=\"n\">data</span><span class=\"p\">,</span> <span class=\"p\">(</span><span class=\"n\">x</span><span class=\"p\">,))</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"nf\">_local_grad</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">,</span> <span class=\"n\">parent</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">n</span> <span class=\"o\">*</span> <span class=\"n\">parent</span><span class=\"o\">.</span><span class=\"n\">data</span> <span class=\"o\">**</span> <span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">n</span> <span class=\"o\">-</span> <span class=\"mi\">1</span><span class=\"p\">)</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"fm\">__repr__</span><span class=\"p\">(</span><span class=\"bp\">self</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">return</span> <span class=\"sa\">f</span><span class=\"s2\">&quot;** </span><span class=\"si\">{</span><span class=\"bp\">self</span><span class=\"o\">.</span><span class=\"n\">n</span><span class=\"si\">}</span><span class=\"s2\">&quot;</span>\n",
       "</pre></div>\n"
      ],
      "text/latex": [
       "\\begin{Verbatim}[commandchars=\\\\\\{\\}]\n",
       "\\PY{k}{class} \\PY{n+nc}{BinaryOpNode}\\PY{p}{(}\\PY{n}{Node}\\PY{p}{)}\\PY{p}{:}\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}init\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{x}\\PY{p}{,} \\PY{n}{y}\\PY{p}{,} \\PY{n}{op}\\PY{p}{:} \\PY{n+nb}{str}\\PY{p}{)}\\PY{p}{:}\n",
       "\\PY{+w}{        }\\PY{l+s+sd}{\\PYZdq{}\\PYZdq{}\\PYZdq{}Binary operation between two nodes.\\PYZdq{}\\PYZdq{}\\PYZdq{}}\n",
       "        \\PY{n}{ops} \\PY{o}{=} \\PY{p}{\\PYZob{}}\\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{+}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{:} \\PY{k}{lambda} \\PY{n}{x}\\PY{p}{,} \\PY{n}{y}\\PY{p}{:} \\PY{n}{x} \\PY{o}{+} \\PY{n}{y}\\PY{p}{,} \\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{*}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{:} \\PY{k}{lambda} \\PY{n}{x}\\PY{p}{,} \\PY{n}{y}\\PY{p}{:} \\PY{n}{x} \\PY{o}{*} \\PY{n}{y}\\PY{p}{\\PYZcb{}}\n",
       "        \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{\\PYZus{}op} \\PY{o}{=} \\PY{n}{op}\n",
       "        \\PY{n+nb}{super}\\PY{p}{(}\\PY{p}{)}\\PY{o}{.}\\PY{n+nf+fm}{\\PYZus{}\\PYZus{}init\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n}{ops}\\PY{p}{[}\\PY{n}{op}\\PY{p}{]}\\PY{p}{(}\\PY{n}{x}\\PY{o}{.}\\PY{n}{data}\\PY{p}{,} \\PY{n}{y}\\PY{o}{.}\\PY{n}{data}\\PY{p}{)}\\PY{p}{,} \\PY{p}{(}\\PY{n}{x}\\PY{p}{,} \\PY{n}{y}\\PY{p}{)}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf}{\\PYZus{}local\\PYZus{}grad}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{parent}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{if} \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{\\PYZus{}op} \\PY{o}{==} \\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{+}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{:}\n",
       "            \\PY{k}{return} \\PY{l+m+mf}{1.0}\n",
       "\n",
       "        \\PY{k}{elif} \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{\\PYZus{}op} \\PY{o}{==} \\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{*}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{:}\n",
       "            \\PY{n}{i} \\PY{o}{=} \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{\\PYZus{}parents}\\PY{o}{.}\\PY{n}{index}\\PY{p}{(}\\PY{n}{parent}\\PY{p}{)}\n",
       "            \\PY{n}{coparent} \\PY{o}{=} \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{\\PYZus{}parents}\\PY{p}{[}\\PY{l+m+mi}{1} \\PY{o}{\\PYZhy{}} \\PY{n}{i}\\PY{p}{]}\n",
       "            \\PY{k}{return} \\PY{n}{coparent}\\PY{o}{.}\\PY{n}{data}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}repr\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{\\PYZus{}op}\n",
       "\n",
       "\n",
       "\\PY{k}{class} \\PY{n+nc}{ReLUNode}\\PY{p}{(}\\PY{n}{Node}\\PY{p}{)}\\PY{p}{:}\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}init\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{x}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{n}{data} \\PY{o}{=} \\PY{n}{x}\\PY{o}{.}\\PY{n}{data} \\PY{o}{*} \\PY{n+nb}{int}\\PY{p}{(}\\PY{n}{x}\\PY{o}{.}\\PY{n}{data} \\PY{o}{\\PYZgt{}} \\PY{l+m+mf}{0.0}\\PY{p}{)}\n",
       "        \\PY{n+nb}{super}\\PY{p}{(}\\PY{p}{)}\\PY{o}{.}\\PY{n+nf+fm}{\\PYZus{}\\PYZus{}init\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n}{data}\\PY{p}{,} \\PY{p}{(}\\PY{n}{x}\\PY{p}{,}\\PY{p}{)}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf}{\\PYZus{}local\\PYZus{}grad}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{parent}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{n+nb}{float}\\PY{p}{(}\\PY{n}{parent}\\PY{o}{.}\\PY{n}{data} \\PY{o}{\\PYZgt{}} \\PY{l+m+mi}{0}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}repr\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{relu}\\PY{l+s+s2}{\\PYZdq{}}\n",
       "\n",
       "\n",
       "\\PY{k}{class} \\PY{n+nc}{TanhNode}\\PY{p}{(}\\PY{n}{Node}\\PY{p}{)}\\PY{p}{:}\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}init\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{x}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{n}{data} \\PY{o}{=} \\PY{n}{math}\\PY{o}{.}\\PY{n}{tanh}\\PY{p}{(}\\PY{n}{x}\\PY{o}{.}\\PY{n}{data}\\PY{p}{)}\n",
       "        \\PY{n+nb}{super}\\PY{p}{(}\\PY{p}{)}\\PY{o}{.}\\PY{n+nf+fm}{\\PYZus{}\\PYZus{}init\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n}{data}\\PY{p}{,} \\PY{p}{(}\\PY{n}{x}\\PY{p}{,}\\PY{p}{)}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf}{\\PYZus{}local\\PYZus{}grad}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{parent}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{l+m+mi}{1} \\PY{o}{\\PYZhy{}} \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{data}\\PY{o}{*}\\PY{o}{*}\\PY{l+m+mi}{2}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}repr\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{tanh}\\PY{l+s+s2}{\\PYZdq{}}\n",
       "\n",
       "\n",
       "\\PY{k}{class} \\PY{n+nc}{PowOp}\\PY{p}{(}\\PY{n}{Node}\\PY{p}{)}\\PY{p}{:}\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}init\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{x}\\PY{p}{,} \\PY{n}{n}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{n} \\PY{o}{=} \\PY{n}{n}\n",
       "        \\PY{n}{data} \\PY{o}{=} \\PY{n}{x}\\PY{o}{.}\\PY{n}{data}\\PY{o}{*}\\PY{o}{*}\\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{n}\n",
       "        \\PY{n+nb}{super}\\PY{p}{(}\\PY{p}{)}\\PY{o}{.}\\PY{n+nf+fm}{\\PYZus{}\\PYZus{}init\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n}{data}\\PY{p}{,} \\PY{p}{(}\\PY{n}{x}\\PY{p}{,}\\PY{p}{)}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf}{\\PYZus{}local\\PYZus{}grad}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{,} \\PY{n}{parent}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{n} \\PY{o}{*} \\PY{n}{parent}\\PY{o}{.}\\PY{n}{data} \\PY{o}{*}\\PY{o}{*} \\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{n} \\PY{o}{\\PYZhy{}} \\PY{l+m+mi}{1}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf+fm}{\\PYZus{}\\PYZus{}repr\\PYZus{}\\PYZus{}}\\PY{p}{(}\\PY{n+nb+bp}{self}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{return} \\PY{l+s+sa}{f}\\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{** }\\PY{l+s+si}{\\PYZob{}}\\PY{n+nb+bp}{self}\\PY{o}{.}\\PY{n}{n}\\PY{l+s+si}{\\PYZcb{}}\\PY{l+s+s2}{\\PYZdq{}}\n",
       "\\end{Verbatim}\n"
      ],
      "text/plain": [
       "\n",
       "class BinaryOpNode(Node):\n",
       "    def __init__(self, x, y, op: str):\n",
       "        \"\"\"Binary operation between two nodes.\"\"\"\n",
       "        ops = {\"+\": lambda x, y: x + y, \"*\": lambda x, y: x * y}\n",
       "        self._op = op\n",
       "        super().__init__(ops[op](x.data, y.data), (x, y))\n",
       "\n",
       "    def _local_grad(self, parent):\n",
       "        if self._op == \"+\":\n",
       "            return 1.0\n",
       "\n",
       "        elif self._op == \"*\":\n",
       "            i = self._parents.index(parent)\n",
       "            coparent = self._parents[1 - i]\n",
       "            return coparent.data\n",
       "\n",
       "    def __repr__(self):\n",
       "        return self._op\n",
       "\n",
       "\n",
       "class ReLUNode(Node):\n",
       "    def __init__(self, x):\n",
       "        data = x.data * int(x.data > 0.0)\n",
       "        super().__init__(data, (x,))\n",
       "\n",
       "    def _local_grad(self, parent):\n",
       "        return float(parent.data > 0)\n",
       "\n",
       "    def __repr__(self):\n",
       "        return \"relu\"\n",
       "\n",
       "\n",
       "class TanhNode(Node):\n",
       "    def __init__(self, x):\n",
       "        data = math.tanh(x.data)\n",
       "        super().__init__(data, (x,))\n",
       "\n",
       "    def _local_grad(self, parent):\n",
       "        return 1 - self.data**2\n",
       "\n",
       "    def __repr__(self):\n",
       "        return \"tanh\"\n",
       "\n",
       "\n",
       "class PowOp(Node):\n",
       "    def __init__(self, x, n):\n",
       "        self.n = n\n",
       "        data = x.data**self.n\n",
       "        super().__init__(data, (x,))\n",
       "\n",
       "    def _local_grad(self, parent):\n",
       "        return self.n * parent.data ** (self.n - 1)\n",
       "\n",
       "    def __repr__(self):\n",
       "        return f\"** {self.n}\""
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%save\n",
    "\n",
    "class BinaryOpNode(Node):\n",
    "    def __init__(self, x, y, op: str):\n",
    "        \"\"\"Binary operation between two nodes.\"\"\"\n",
    "        ops = {\"+\": lambda x, y: x + y, \"*\": lambda x, y: x * y}\n",
    "        self._op = op\n",
    "        super().__init__(ops[op](x.data, y.data), (x, y))\n",
    "\n",
    "    def _local_grad(self, parent):\n",
    "        if self._op == \"+\":\n",
    "            return 1.0\n",
    "\n",
    "        elif self._op == \"*\":\n",
    "            i = self._parents.index(parent)\n",
    "            coparent = self._parents[1 - i]\n",
    "            return coparent.data\n",
    "\n",
    "    def __repr__(self):\n",
    "        return self._op\n",
    "\n",
    "\n",
    "class ReLUNode(Node):\n",
    "    def __init__(self, x):\n",
    "        data = x.data * int(x.data > 0.0)\n",
    "        super().__init__(data, (x,))\n",
    "\n",
    "    def _local_grad(self, parent):\n",
    "        return float(parent.data > 0)\n",
    "\n",
    "    def __repr__(self):\n",
    "        return \"relu\"\n",
    "\n",
    "\n",
    "class TanhNode(Node):\n",
    "    def __init__(self, x):\n",
    "        data = math.tanh(x.data)\n",
    "        super().__init__(data, (x,))\n",
    "\n",
    "    def _local_grad(self, parent):\n",
    "        return 1 - self.data**2\n",
    "\n",
    "    def __repr__(self):\n",
    "        return \"tanh\"\n",
    "\n",
    "\n",
    "class PowOp(Node):\n",
    "    def __init__(self, x, n):\n",
    "        self.n = n\n",
    "        data = x.data**self.n\n",
    "        super().__init__(data, (x,))\n",
    "\n",
    "    def _local_grad(self, parent):\n",
    "        return self.n * parent.data ** (self.n - 1)\n",
    "\n",
    "    def __repr__(self):\n",
    "        return f\"** {self.n}\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8897ade3",
   "metadata": {
    "papermill": {
     "duration": 0.003549,
     "end_time": "2024-09-05T18:30:31.150529",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.146980",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "**Remark.** Note circular definition is okay since references are resolved at runtime."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "971f3877",
   "metadata": {
    "papermill": {
     "duration": 0.004848,
     "end_time": "2024-09-05T18:30:31.159065",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.154217",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "<br>\n",
    "\n",
    "## Graph vizualization"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b38fce39",
   "metadata": {
    "papermill": {
     "duration": 0.028164,
     "end_time": "2024-09-05T18:30:31.210379",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.182215",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "The next two functions help to visualize networks. The `trace` function just walks backward into the graph to collect all nodes and edges. This is used by the `draw_graph` which first draws all nodes, then draws all edges. For compute nodes we add a small juncture node which contains the name of the operation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "dbbec280",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-05T18:30:31.223011Z",
     "iopub.status.busy": "2024-09-05T18:30:31.222512Z",
     "iopub.status.idle": "2024-09-05T18:30:31.263302Z",
     "shell.execute_reply": "2024-09-05T18:30:31.262616Z"
    },
    "papermill": {
     "duration": 0.049435,
     "end_time": "2024-09-05T18:30:31.265381",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.215946",
     "status": "completed"
    },
    "tags": [
     "remove-input",
     "hide-output"
    ]
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>pre { line-height: 125%; }\n",
       "td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\n",
       "span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }\n",
       "td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\n",
       "span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }\n",
       ".output_html .hll { background-color: #ffffcc }\n",
       ".output_html { background: #f8f8f8; }\n",
       ".output_html .c { color: #3D7B7B; font-style: italic } /* Comment */\n",
       ".output_html .err { border: 1px solid #FF0000 } /* Error */\n",
       ".output_html .k { color: #008000; font-weight: bold } /* Keyword */\n",
       ".output_html .o { color: #666666 } /* Operator */\n",
       ".output_html .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */\n",
       ".output_html .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */\n",
       ".output_html .cp { color: #9C6500 } /* Comment.Preproc */\n",
       ".output_html .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */\n",
       ".output_html .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */\n",
       ".output_html .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */\n",
       ".output_html .gd { color: #A00000 } /* Generic.Deleted */\n",
       ".output_html .ge { font-style: italic } /* Generic.Emph */\n",
       ".output_html .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */\n",
       ".output_html .gr { color: #E40000 } /* Generic.Error */\n",
       ".output_html .gh { color: #000080; font-weight: bold } /* Generic.Heading */\n",
       ".output_html .gi { color: #008400 } /* Generic.Inserted */\n",
       ".output_html .go { color: #717171 } /* Generic.Output */\n",
       ".output_html .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
       ".output_html .gs { font-weight: bold } /* Generic.Strong */\n",
       ".output_html .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
       ".output_html .gt { color: #0044DD } /* Generic.Traceback */\n",
       ".output_html .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
       ".output_html .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
       ".output_html .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
       ".output_html .kp { color: #008000 } /* Keyword.Pseudo */\n",
       ".output_html .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
       ".output_html .kt { color: #B00040 } /* Keyword.Type */\n",
       ".output_html .m { color: #666666 } /* Literal.Number */\n",
       ".output_html .s { color: #BA2121 } /* Literal.String */\n",
       ".output_html .na { color: #687822 } /* Name.Attribute */\n",
       ".output_html .nb { color: #008000 } /* Name.Builtin */\n",
       ".output_html .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
       ".output_html .no { color: #880000 } /* Name.Constant */\n",
       ".output_html .nd { color: #AA22FF } /* Name.Decorator */\n",
       ".output_html .ni { color: #717171; font-weight: bold } /* Name.Entity */\n",
       ".output_html .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */\n",
       ".output_html .nf { color: #0000FF } /* Name.Function */\n",
       ".output_html .nl { color: #767600 } /* Name.Label */\n",
       ".output_html .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
       ".output_html .nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
       ".output_html .nv { color: #19177C } /* Name.Variable */\n",
       ".output_html .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
       ".output_html .w { color: #bbbbbb } /* Text.Whitespace */\n",
       ".output_html .mb { color: #666666 } /* Literal.Number.Bin */\n",
       ".output_html .mf { color: #666666 } /* Literal.Number.Float */\n",
       ".output_html .mh { color: #666666 } /* Literal.Number.Hex */\n",
       ".output_html .mi { color: #666666 } /* Literal.Number.Integer */\n",
       ".output_html .mo { color: #666666 } /* Literal.Number.Oct */\n",
       ".output_html .sa { color: #BA2121 } /* Literal.String.Affix */\n",
       ".output_html .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
       ".output_html .sc { color: #BA2121 } /* Literal.String.Char */\n",
       ".output_html .dl { color: #BA2121 } /* Literal.String.Delimiter */\n",
       ".output_html .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n",
       ".output_html .s2 { color: #BA2121 } /* Literal.String.Double */\n",
       ".output_html .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */\n",
       ".output_html .sh { color: #BA2121 } /* Literal.String.Heredoc */\n",
       ".output_html .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */\n",
       ".output_html .sx { color: #008000 } /* Literal.String.Other */\n",
       ".output_html .sr { color: #A45A77 } /* Literal.String.Regex */\n",
       ".output_html .s1 { color: #BA2121 } /* Literal.String.Single */\n",
       ".output_html .ss { color: #19177C } /* Literal.String.Symbol */\n",
       ".output_html .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
       ".output_html .fm { color: #0000FF } /* Name.Function.Magic */\n",
       ".output_html .vc { color: #19177C } /* Name.Variable.Class */\n",
       ".output_html .vg { color: #19177C } /* Name.Variable.Global */\n",
       ".output_html .vi { color: #19177C } /* Name.Variable.Instance */\n",
       ".output_html .vm { color: #19177C } /* Name.Variable.Magic */\n",
       ".output_html .il { color: #666666 } /* Literal.Number.Integer.Long */</style><div class=\"highlight\"><pre><span></span><span class=\"kn\">from</span> <span class=\"nn\">graphviz</span> <span class=\"kn\">import</span> <span class=\"n\">Digraph</span>\n",
       "\n",
       "\n",
       "<span class=\"k\">def</span> <span class=\"nf\">trace</span><span class=\"p\">(</span><span class=\"n\">root</span><span class=\"p\">):</span>\n",
       "<span class=\"w\">    </span><span class=\"sd\">&quot;&quot;&quot;Builds a set of all nodes and edges in a graph.&quot;&quot;&quot;</span>\n",
       "    <span class=\"c1\"># https://github.com/karpathy/micrograd/blob/master/trace_graph.ipynb</span>\n",
       "\n",
       "    <span class=\"n\">nodes</span> <span class=\"o\">=</span> <span class=\"nb\">set</span><span class=\"p\">()</span>\n",
       "    <span class=\"n\">edges</span> <span class=\"o\">=</span> <span class=\"nb\">set</span><span class=\"p\">()</span>\n",
       "\n",
       "    <span class=\"k\">def</span> <span class=\"nf\">build</span><span class=\"p\">(</span><span class=\"n\">v</span><span class=\"p\">):</span>\n",
       "        <span class=\"k\">if</span> <span class=\"n\">v</span> <span class=\"ow\">not</span> <span class=\"ow\">in</span> <span class=\"n\">nodes</span><span class=\"p\">:</span>\n",
       "            <span class=\"n\">nodes</span><span class=\"o\">.</span><span class=\"n\">add</span><span class=\"p\">(</span><span class=\"n\">v</span><span class=\"p\">)</span>\n",
       "            <span class=\"k\">for</span> <span class=\"n\">parent</span> <span class=\"ow\">in</span> <span class=\"n\">v</span><span class=\"o\">.</span><span class=\"n\">_parents</span><span class=\"p\">:</span>\n",
       "                <span class=\"n\">edges</span><span class=\"o\">.</span><span class=\"n\">add</span><span class=\"p\">((</span><span class=\"n\">parent</span><span class=\"p\">,</span> <span class=\"n\">v</span><span class=\"p\">))</span>\n",
       "                <span class=\"n\">build</span><span class=\"p\">(</span><span class=\"n\">parent</span><span class=\"p\">)</span>\n",
       "\n",
       "    <span class=\"n\">build</span><span class=\"p\">(</span><span class=\"n\">root</span><span class=\"p\">)</span>\n",
       "    <span class=\"k\">return</span> <span class=\"n\">nodes</span><span class=\"p\">,</span> <span class=\"n\">edges</span>\n",
       "\n",
       "\n",
       "<span class=\"k\">def</span> <span class=\"nf\">draw_graph</span><span class=\"p\">(</span><span class=\"n\">root</span><span class=\"p\">):</span>\n",
       "<span class=\"w\">    </span><span class=\"sd\">&quot;&quot;&quot;Build diagram of computational graph.&quot;&quot;&quot;</span>\n",
       "\n",
       "    <span class=\"n\">dot</span> <span class=\"o\">=</span> <span class=\"n\">Digraph</span><span class=\"p\">(</span><span class=\"nb\">format</span><span class=\"o\">=</span><span class=\"s2\">&quot;svg&quot;</span><span class=\"p\">,</span> <span class=\"n\">graph_attr</span><span class=\"o\">=</span><span class=\"p\">{</span><span class=\"s2\">&quot;rankdir&quot;</span><span class=\"p\">:</span> <span class=\"s2\">&quot;LR&quot;</span><span class=\"p\">})</span>  <span class=\"c1\"># LR = left to right</span>\n",
       "    <span class=\"n\">nodes</span><span class=\"p\">,</span> <span class=\"n\">edges</span> <span class=\"o\">=</span> <span class=\"n\">trace</span><span class=\"p\">(</span><span class=\"n\">root</span><span class=\"p\">)</span>\n",
       "    <span class=\"k\">for</span> <span class=\"n\">n</span> <span class=\"ow\">in</span> <span class=\"n\">nodes</span><span class=\"p\">:</span>\n",
       "        <span class=\"c1\"># Add node to graph</span>\n",
       "        <span class=\"n\">uid</span> <span class=\"o\">=</span> <span class=\"nb\">str</span><span class=\"p\">(</span><span class=\"nb\">id</span><span class=\"p\">(</span><span class=\"n\">n</span><span class=\"p\">))</span>\n",
       "        <span class=\"n\">dot</span><span class=\"o\">.</span><span class=\"n\">node</span><span class=\"p\">(</span><span class=\"n\">name</span><span class=\"o\">=</span><span class=\"n\">uid</span><span class=\"p\">,</span> <span class=\"n\">label</span><span class=\"o\">=</span><span class=\"sa\">f</span><span class=\"s2\">&quot;data=</span><span class=\"si\">{</span><span class=\"n\">n</span><span class=\"o\">.</span><span class=\"n\">data</span><span class=\"si\">:</span><span class=\"s2\">.3f</span><span class=\"si\">}</span><span class=\"s2\"> | grad=</span><span class=\"si\">{</span><span class=\"n\">n</span><span class=\"o\">.</span><span class=\"n\">grad</span><span class=\"si\">:</span><span class=\"s2\">.4f</span><span class=\"si\">}</span><span class=\"s2\">&quot;</span><span class=\"p\">,</span> <span class=\"n\">shape</span><span class=\"o\">=</span><span class=\"s2\">&quot;record&quot;</span><span class=\"p\">)</span>\n",
       "\n",
       "        <span class=\"c1\"># Connect node to op node if operation</span>\n",
       "        <span class=\"c1\"># e.g. if (5) = (2) + (3), then draw (5) as (+) -&gt; (5).</span>\n",
       "        <span class=\"k\">if</span> <span class=\"nb\">len</span><span class=\"p\">(</span><span class=\"n\">n</span><span class=\"o\">.</span><span class=\"n\">_parents</span><span class=\"p\">)</span> <span class=\"o\">&gt;</span> <span class=\"mi\">0</span><span class=\"p\">:</span>\n",
       "            <span class=\"n\">dot</span><span class=\"o\">.</span><span class=\"n\">node</span><span class=\"p\">(</span><span class=\"n\">name</span><span class=\"o\">=</span><span class=\"n\">uid</span> <span class=\"o\">+</span> <span class=\"nb\">str</span><span class=\"p\">(</span><span class=\"n\">n</span><span class=\"p\">),</span> <span class=\"n\">label</span><span class=\"o\">=</span><span class=\"nb\">str</span><span class=\"p\">(</span><span class=\"n\">n</span><span class=\"p\">))</span>\n",
       "            <span class=\"n\">dot</span><span class=\"o\">.</span><span class=\"n\">edge</span><span class=\"p\">(</span><span class=\"n\">uid</span> <span class=\"o\">+</span> <span class=\"nb\">str</span><span class=\"p\">(</span><span class=\"n\">n</span><span class=\"p\">),</span> <span class=\"n\">uid</span><span class=\"p\">)</span>\n",
       "\n",
       "    <span class=\"k\">for</span> <span class=\"n\">child</span><span class=\"p\">,</span> <span class=\"n\">v</span> <span class=\"ow\">in</span> <span class=\"n\">edges</span><span class=\"p\">:</span>\n",
       "        <span class=\"c1\"># Connect child to the op node of v</span>\n",
       "        <span class=\"n\">dot</span><span class=\"o\">.</span><span class=\"n\">edge</span><span class=\"p\">(</span><span class=\"nb\">str</span><span class=\"p\">(</span><span class=\"nb\">id</span><span class=\"p\">(</span><span class=\"n\">child</span><span class=\"p\">)),</span> <span class=\"nb\">str</span><span class=\"p\">(</span><span class=\"nb\">id</span><span class=\"p\">(</span><span class=\"n\">v</span><span class=\"p\">))</span> <span class=\"o\">+</span> <span class=\"nb\">str</span><span class=\"p\">(</span><span class=\"n\">v</span><span class=\"p\">))</span>\n",
       "\n",
       "    <span class=\"k\">return</span> <span class=\"n\">dot</span>\n",
       "</pre></div>\n"
      ],
      "text/latex": [
       "\\begin{Verbatim}[commandchars=\\\\\\{\\}]\n",
       "\\PY{k+kn}{from} \\PY{n+nn}{graphviz} \\PY{k+kn}{import} \\PY{n}{Digraph}\n",
       "\n",
       "\n",
       "\\PY{k}{def} \\PY{n+nf}{trace}\\PY{p}{(}\\PY{n}{root}\\PY{p}{)}\\PY{p}{:}\n",
       "\\PY{+w}{    }\\PY{l+s+sd}{\\PYZdq{}\\PYZdq{}\\PYZdq{}Builds a set of all nodes and edges in a graph.\\PYZdq{}\\PYZdq{}\\PYZdq{}}\n",
       "    \\PY{c+c1}{\\PYZsh{} https://github.com/karpathy/micrograd/blob/master/trace\\PYZus{}graph.ipynb}\n",
       "\n",
       "    \\PY{n}{nodes} \\PY{o}{=} \\PY{n+nb}{set}\\PY{p}{(}\\PY{p}{)}\n",
       "    \\PY{n}{edges} \\PY{o}{=} \\PY{n+nb}{set}\\PY{p}{(}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{def} \\PY{n+nf}{build}\\PY{p}{(}\\PY{n}{v}\\PY{p}{)}\\PY{p}{:}\n",
       "        \\PY{k}{if} \\PY{n}{v} \\PY{o+ow}{not} \\PY{o+ow}{in} \\PY{n}{nodes}\\PY{p}{:}\n",
       "            \\PY{n}{nodes}\\PY{o}{.}\\PY{n}{add}\\PY{p}{(}\\PY{n}{v}\\PY{p}{)}\n",
       "            \\PY{k}{for} \\PY{n}{parent} \\PY{o+ow}{in} \\PY{n}{v}\\PY{o}{.}\\PY{n}{\\PYZus{}parents}\\PY{p}{:}\n",
       "                \\PY{n}{edges}\\PY{o}{.}\\PY{n}{add}\\PY{p}{(}\\PY{p}{(}\\PY{n}{parent}\\PY{p}{,} \\PY{n}{v}\\PY{p}{)}\\PY{p}{)}\n",
       "                \\PY{n}{build}\\PY{p}{(}\\PY{n}{parent}\\PY{p}{)}\n",
       "\n",
       "    \\PY{n}{build}\\PY{p}{(}\\PY{n}{root}\\PY{p}{)}\n",
       "    \\PY{k}{return} \\PY{n}{nodes}\\PY{p}{,} \\PY{n}{edges}\n",
       "\n",
       "\n",
       "\\PY{k}{def} \\PY{n+nf}{draw\\PYZus{}graph}\\PY{p}{(}\\PY{n}{root}\\PY{p}{)}\\PY{p}{:}\n",
       "\\PY{+w}{    }\\PY{l+s+sd}{\\PYZdq{}\\PYZdq{}\\PYZdq{}Build diagram of computational graph.\\PYZdq{}\\PYZdq{}\\PYZdq{}}\n",
       "\n",
       "    \\PY{n}{dot} \\PY{o}{=} \\PY{n}{Digraph}\\PY{p}{(}\\PY{n+nb}{format}\\PY{o}{=}\\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{svg}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{,} \\PY{n}{graph\\PYZus{}attr}\\PY{o}{=}\\PY{p}{\\PYZob{}}\\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{rankdir}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{:} \\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{LR}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{\\PYZcb{}}\\PY{p}{)}  \\PY{c+c1}{\\PYZsh{} LR = left to right}\n",
       "    \\PY{n}{nodes}\\PY{p}{,} \\PY{n}{edges} \\PY{o}{=} \\PY{n}{trace}\\PY{p}{(}\\PY{n}{root}\\PY{p}{)}\n",
       "    \\PY{k}{for} \\PY{n}{n} \\PY{o+ow}{in} \\PY{n}{nodes}\\PY{p}{:}\n",
       "        \\PY{c+c1}{\\PYZsh{} Add node to graph}\n",
       "        \\PY{n}{uid} \\PY{o}{=} \\PY{n+nb}{str}\\PY{p}{(}\\PY{n+nb}{id}\\PY{p}{(}\\PY{n}{n}\\PY{p}{)}\\PY{p}{)}\n",
       "        \\PY{n}{dot}\\PY{o}{.}\\PY{n}{node}\\PY{p}{(}\\PY{n}{name}\\PY{o}{=}\\PY{n}{uid}\\PY{p}{,} \\PY{n}{label}\\PY{o}{=}\\PY{l+s+sa}{f}\\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{data=}\\PY{l+s+si}{\\PYZob{}}\\PY{n}{n}\\PY{o}{.}\\PY{n}{data}\\PY{l+s+si}{:}\\PY{l+s+s2}{.3f}\\PY{l+s+si}{\\PYZcb{}}\\PY{l+s+s2}{ | grad=}\\PY{l+s+si}{\\PYZob{}}\\PY{n}{n}\\PY{o}{.}\\PY{n}{grad}\\PY{l+s+si}{:}\\PY{l+s+s2}{.4f}\\PY{l+s+si}{\\PYZcb{}}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{,} \\PY{n}{shape}\\PY{o}{=}\\PY{l+s+s2}{\\PYZdq{}}\\PY{l+s+s2}{record}\\PY{l+s+s2}{\\PYZdq{}}\\PY{p}{)}\n",
       "\n",
       "        \\PY{c+c1}{\\PYZsh{} Connect node to op node if operation}\n",
       "        \\PY{c+c1}{\\PYZsh{} e.g. if (5) = (2) + (3), then draw (5) as (+) \\PYZhy{}\\PYZgt{} (5).}\n",
       "        \\PY{k}{if} \\PY{n+nb}{len}\\PY{p}{(}\\PY{n}{n}\\PY{o}{.}\\PY{n}{\\PYZus{}parents}\\PY{p}{)} \\PY{o}{\\PYZgt{}} \\PY{l+m+mi}{0}\\PY{p}{:}\n",
       "            \\PY{n}{dot}\\PY{o}{.}\\PY{n}{node}\\PY{p}{(}\\PY{n}{name}\\PY{o}{=}\\PY{n}{uid} \\PY{o}{+} \\PY{n+nb}{str}\\PY{p}{(}\\PY{n}{n}\\PY{p}{)}\\PY{p}{,} \\PY{n}{label}\\PY{o}{=}\\PY{n+nb}{str}\\PY{p}{(}\\PY{n}{n}\\PY{p}{)}\\PY{p}{)}\n",
       "            \\PY{n}{dot}\\PY{o}{.}\\PY{n}{edge}\\PY{p}{(}\\PY{n}{uid} \\PY{o}{+} \\PY{n+nb}{str}\\PY{p}{(}\\PY{n}{n}\\PY{p}{)}\\PY{p}{,} \\PY{n}{uid}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{for} \\PY{n}{child}\\PY{p}{,} \\PY{n}{v} \\PY{o+ow}{in} \\PY{n}{edges}\\PY{p}{:}\n",
       "        \\PY{c+c1}{\\PYZsh{} Connect child to the op node of v}\n",
       "        \\PY{n}{dot}\\PY{o}{.}\\PY{n}{edge}\\PY{p}{(}\\PY{n+nb}{str}\\PY{p}{(}\\PY{n+nb}{id}\\PY{p}{(}\\PY{n}{child}\\PY{p}{)}\\PY{p}{)}\\PY{p}{,} \\PY{n+nb}{str}\\PY{p}{(}\\PY{n+nb}{id}\\PY{p}{(}\\PY{n}{v}\\PY{p}{)}\\PY{p}{)} \\PY{o}{+} \\PY{n+nb}{str}\\PY{p}{(}\\PY{n}{v}\\PY{p}{)}\\PY{p}{)}\n",
       "\n",
       "    \\PY{k}{return} \\PY{n}{dot}\n",
       "\\end{Verbatim}\n"
      ],
      "text/plain": [
       "\n",
       "from graphviz import Digraph\n",
       "\n",
       "\n",
       "def trace(root):\n",
       "    \"\"\"Builds a set of all nodes and edges in a graph.\"\"\"\n",
       "    # https://github.com/karpathy/micrograd/blob/master/trace_graph.ipynb\n",
       "\n",
       "    nodes = set()\n",
       "    edges = set()\n",
       "\n",
       "    def build(v):\n",
       "        if v not in nodes:\n",
       "            nodes.add(v)\n",
       "            for parent in v._parents:\n",
       "                edges.add((parent, v))\n",
       "                build(parent)\n",
       "\n",
       "    build(root)\n",
       "    return nodes, edges\n",
       "\n",
       "\n",
       "def draw_graph(root):\n",
       "    \"\"\"Build diagram of computational graph.\"\"\"\n",
       "\n",
       "    dot = Digraph(format=\"svg\", graph_attr={\"rankdir\": \"LR\"})  # LR = left to right\n",
       "    nodes, edges = trace(root)\n",
       "    for n in nodes:\n",
       "        # Add node to graph\n",
       "        uid = str(id(n))\n",
       "        dot.node(name=uid, label=f\"data={n.data:.3f} | grad={n.grad:.4f}\", shape=\"record\")\n",
       "\n",
       "        # Connect node to op node if operation\n",
       "        # e.g. if (5) = (2) + (3), then draw (5) as (+) -> (5).\n",
       "        if len(n._parents) > 0:\n",
       "            dot.node(name=uid + str(n), label=str(n))\n",
       "            dot.edge(uid + str(n), uid)\n",
       "\n",
       "    for child, v in edges:\n",
       "        # Connect child to the op node of v\n",
       "        dot.edge(str(id(child)), str(id(v)) + str(v))\n",
       "\n",
       "    return dot"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%save\n",
    "\n",
    "from graphviz import Digraph\n",
    "\n",
    "\n",
    "def trace(root):\n",
    "    \"\"\"Builds a set of all nodes and edges in a graph.\"\"\"\n",
    "    # https://github.com/karpathy/micrograd/blob/master/trace_graph.ipynb\n",
    "\n",
    "    nodes = set()\n",
    "    edges = set()\n",
    "\n",
    "    def build(v):\n",
    "        if v not in nodes:\n",
    "            nodes.add(v)\n",
    "            for parent in v._parents:\n",
    "                edges.add((parent, v))\n",
    "                build(parent)\n",
    "\n",
    "    build(root)\n",
    "    return nodes, edges\n",
    "\n",
    "\n",
    "def draw_graph(root):\n",
    "    \"\"\"Build diagram of computational graph.\"\"\"\n",
    "\n",
    "    dot = Digraph(format=\"svg\", graph_attr={\"rankdir\": \"LR\"})  # LR = left to right\n",
    "    nodes, edges = trace(root)\n",
    "    for n in nodes:\n",
    "        # Add node to graph\n",
    "        uid = str(id(n))\n",
    "        dot.node(name=uid, label=f\"data={n.data:.3f} | grad={n.grad:.4f}\", shape=\"record\")\n",
    "\n",
    "        # Connect node to op node if operation\n",
    "        # e.g. if (5) = (2) + (3), then draw (5) as (+) -> (5).\n",
    "        if len(n._parents) > 0:\n",
    "            dot.node(name=uid + str(n), label=str(n))\n",
    "            dot.edge(uid + str(n), uid)\n",
    "\n",
    "    for child, v in edges:\n",
    "        # Connect child to the op node of v\n",
    "        dot.edge(str(id(child)), str(id(v)) + str(v))\n",
    "\n",
    "    return dot"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "79342ba0",
   "metadata": {
    "papermill": {
     "duration": 0.006336,
     "end_time": "2024-09-05T18:30:31.278730",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.272394",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "Creating graph for a dense unit. Observe that `x1` has a degree of 2 since it has two children."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "bbdb1782",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-05T18:30:31.293915Z",
     "iopub.status.busy": "2024-09-05T18:30:31.293543Z",
     "iopub.status.idle": "2024-09-05T18:30:31.652324Z",
     "shell.execute_reply": "2024-09-05T18:30:31.651500Z"
    },
    "papermill": {
     "duration": 0.373569,
     "end_time": "2024-09-05T18:30:31.659509",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.285940",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 12.1.0 (20240811.2233)\n",
       " -->\n",
       "<!-- Pages: 1 -->\n",
       "<svg width=\"941pt\" height=\"194pt\"\n",
       " viewBox=\"0.00 0.00 940.75 194.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 190)\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-190 936.75,-190 936.75,4 -4,4\"/>\n",
       "<!-- 4379506720 -->\n",
       "<g id=\"node1\" class=\"node\">\n",
       "<title>4379506720</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"423.5,-136.5 423.5,-185.5 509.25,-185.5 509.25,-136.5 423.5,-136.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"466.38\" y=\"-168.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=4.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"423.5,-161 509.25,-161\"/>\n",
       "<text text-anchor=\"middle\" x=\"466.38\" y=\"-143.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=0.0000</text>\n",
       "</g>\n",
       "<!-- 4379411648+ -->\n",
       "<g id=\"node6\" class=\"node\">\n",
       "<title>4379411648+</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"572.25\" cy=\"-127\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"572.25\" y=\"-121.95\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- 4379506720&#45;&gt;4379411648+ -->\n",
       "<g id=\"edge7\" class=\"edge\">\n",
       "<title>4379506720&#45;&gt;4379411648+</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M509.62,-147.17C518.61,-144.23 527.99,-141.16 536.65,-138.33\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"537.66,-141.68 546.08,-135.24 535.49,-135.02 537.66,-141.68\"/>\n",
       "</g>\n",
       "<!-- 4379506768 -->\n",
       "<g id=\"node2\" class=\"node\">\n",
       "<title>4379506768</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-136.5 0,-185.5 85.75,-185.5 85.75,-136.5 0,-136.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-168.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=&#45;1.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"0,-161 85.75,-161\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-143.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=0.0000</text>\n",
       "</g>\n",
       "<!-- 4379415200* -->\n",
       "<g id=\"node4\" class=\"node\">\n",
       "<title>4379415200*</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"148.75\" cy=\"-127\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"148.75\" y=\"-121.95\" font-family=\"Times,serif\" font-size=\"14.00\">*</text>\n",
       "</g>\n",
       "<!-- 4379506768&#45;&gt;4379415200* -->\n",
       "<g id=\"edge9\" class=\"edge\">\n",
       "<title>4379506768&#45;&gt;4379415200*</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M86.12,-147.17C95.11,-144.23 104.49,-141.16 113.15,-138.33\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"114.16,-141.68 122.58,-135.24 111.99,-135.02 114.16,-141.68\"/>\n",
       "</g>\n",
       "<!-- 4379415200 -->\n",
       "<g id=\"node3\" class=\"node\">\n",
       "<title>4379415200</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"211.75,-102.5 211.75,-151.5 297.5,-151.5 297.5,-102.5 211.75,-102.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"254.62\" y=\"-134.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=&#45;2.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"211.75,-127 297.5,-127\"/>\n",
       "<text text-anchor=\"middle\" x=\"254.62\" y=\"-109.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=0.0000</text>\n",
       "</g>\n",
       "<!-- 4379411744+ -->\n",
       "<g id=\"node10\" class=\"node\">\n",
       "<title>4379411744+</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"360.5\" cy=\"-93\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"360.5\" y=\"-87.95\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- 4379415200&#45;&gt;4379411744+ -->\n",
       "<g id=\"edge11\" class=\"edge\">\n",
       "<title>4379415200&#45;&gt;4379411744+</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M297.87,-113.17C306.86,-110.23 316.24,-107.16 324.9,-104.33\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"325.91,-107.68 334.33,-101.24 323.74,-101.02 325.91,-107.68\"/>\n",
       "</g>\n",
       "<!-- 4379415200*&#45;&gt;4379415200 -->\n",
       "<g id=\"edge1\" class=\"edge\">\n",
       "<title>4379415200*&#45;&gt;4379415200</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M176.23,-127C183.59,-127 191.85,-127 200.16,-127\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"199.94,-130.5 209.94,-127 199.94,-123.5 199.94,-130.5\"/>\n",
       "</g>\n",
       "<!-- 4379411648 -->\n",
       "<g id=\"node5\" class=\"node\">\n",
       "<title>4379411648</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"635.25,-102.5 635.25,-151.5 721,-151.5 721,-102.5 635.25,-102.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"678.12\" y=\"-134.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=6.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"635.25,-127 721,-127\"/>\n",
       "<text text-anchor=\"middle\" x=\"678.12\" y=\"-109.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=0.0000</text>\n",
       "</g>\n",
       "<!-- 4379412944relu -->\n",
       "<g id=\"node13\" class=\"node\">\n",
       "<title>4379412944relu</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"784\" cy=\"-127\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"784\" y=\"-121.95\" font-family=\"Times,serif\" font-size=\"14.00\">relu</text>\n",
       "</g>\n",
       "<!-- 4379411648&#45;&gt;4379412944relu -->\n",
       "<g id=\"edge13\" class=\"edge\">\n",
       "<title>4379411648&#45;&gt;4379412944relu</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M721.37,-127C729.28,-127 737.5,-127 745.26,-127\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"745.07,-130.5 755.07,-127 745.07,-123.5 745.07,-130.5\"/>\n",
       "</g>\n",
       "<!-- 4379411648+&#45;&gt;4379411648 -->\n",
       "<g id=\"edge2\" class=\"edge\">\n",
       "<title>4379411648+&#45;&gt;4379411648</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M599.73,-127C607.09,-127 615.35,-127 623.66,-127\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"623.44,-130.5 633.44,-127 623.44,-123.5 623.44,-130.5\"/>\n",
       "</g>\n",
       "<!-- 4379414720 -->\n",
       "<g id=\"node7\" class=\"node\">\n",
       "<title>4379414720</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"211.75,-34.5 211.75,-83.5 297.5,-83.5 297.5,-34.5 211.75,-34.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"254.62\" y=\"-66.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=4.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"211.75,-59 297.5,-59\"/>\n",
       "<text text-anchor=\"middle\" x=\"254.62\" y=\"-41.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=0.0000</text>\n",
       "</g>\n",
       "<!-- 4379414720&#45;&gt;4379411744+ -->\n",
       "<g id=\"edge8\" class=\"edge\">\n",
       "<title>4379414720&#45;&gt;4379411744+</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M297.87,-72.83C306.86,-75.77 316.24,-78.84 324.9,-81.67\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"323.74,-84.98 334.33,-84.76 325.91,-78.32 323.74,-84.98\"/>\n",
       "</g>\n",
       "<!-- 4379414720* -->\n",
       "<g id=\"node8\" class=\"node\">\n",
       "<title>4379414720*</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"148.75\" cy=\"-59\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"148.75\" y=\"-53.95\" font-family=\"Times,serif\" font-size=\"14.00\">*</text>\n",
       "</g>\n",
       "<!-- 4379414720*&#45;&gt;4379414720 -->\n",
       "<g id=\"edge3\" class=\"edge\">\n",
       "<title>4379414720*&#45;&gt;4379414720</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M176.23,-59C183.59,-59 191.85,-59 200.16,-59\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"199.94,-62.5 209.94,-59 199.94,-55.5 199.94,-62.5\"/>\n",
       "</g>\n",
       "<!-- 4379411744 -->\n",
       "<g id=\"node9\" class=\"node\">\n",
       "<title>4379411744</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"423.5,-68.5 423.5,-117.5 509.25,-117.5 509.25,-68.5 423.5,-68.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"466.38\" y=\"-100.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=2.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"423.5,-93 509.25,-93\"/>\n",
       "<text text-anchor=\"middle\" x=\"466.38\" y=\"-75.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=0.0000</text>\n",
       "</g>\n",
       "<!-- 4379411744&#45;&gt;4379411648+ -->\n",
       "<g id=\"edge14\" class=\"edge\">\n",
       "<title>4379411744&#45;&gt;4379411648+</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M509.62,-106.83C518.61,-109.77 527.99,-112.84 536.65,-115.67\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"535.49,-118.98 546.08,-118.76 537.66,-112.32 535.49,-118.98\"/>\n",
       "</g>\n",
       "<!-- 4379411744+&#45;&gt;4379411744 -->\n",
       "<g id=\"edge4\" class=\"edge\">\n",
       "<title>4379411744+&#45;&gt;4379411744</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M387.98,-93C395.34,-93 403.6,-93 411.91,-93\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"411.69,-96.5 421.69,-93 411.69,-89.5 411.69,-96.5\"/>\n",
       "</g>\n",
       "<!-- 4379424560 -->\n",
       "<g id=\"node11\" class=\"node\">\n",
       "<title>4379424560</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-68.5 0,-117.5 85.75,-117.5 85.75,-68.5 0,-68.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-100.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=2.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"0,-93 85.75,-93\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-75.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=0.0000</text>\n",
       "</g>\n",
       "<!-- 4379424560&#45;&gt;4379415200* -->\n",
       "<g id=\"edge6\" class=\"edge\">\n",
       "<title>4379424560&#45;&gt;4379415200*</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M86.12,-106.83C95.11,-109.77 104.49,-112.84 113.15,-115.67\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"111.99,-118.98 122.58,-118.76 114.16,-112.32 111.99,-118.98\"/>\n",
       "</g>\n",
       "<!-- 4379424560&#45;&gt;4379414720* -->\n",
       "<g id=\"edge10\" class=\"edge\">\n",
       "<title>4379424560&#45;&gt;4379414720*</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M86.12,-79.17C95.11,-76.23 104.49,-73.16 113.15,-70.33\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"114.16,-73.68 122.58,-67.24 111.99,-67.02 114.16,-73.68\"/>\n",
       "</g>\n",
       "<!-- 4379412944 -->\n",
       "<g id=\"node12\" class=\"node\">\n",
       "<title>4379412944</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"847,-102.5 847,-151.5 932.75,-151.5 932.75,-102.5 847,-102.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"889.88\" y=\"-134.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=6.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"847,-127 932.75,-127\"/>\n",
       "<text text-anchor=\"middle\" x=\"889.88\" y=\"-109.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=0.0000</text>\n",
       "</g>\n",
       "<!-- 4379412944relu&#45;&gt;4379412944 -->\n",
       "<g id=\"edge5\" class=\"edge\">\n",
       "<title>4379412944relu&#45;&gt;4379412944</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M811.48,-127C818.84,-127 827.1,-127 835.41,-127\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"835.19,-130.5 845.19,-127 835.19,-123.5 835.19,-130.5\"/>\n",
       "</g>\n",
       "<!-- 4379506672 -->\n",
       "<g id=\"node14\" class=\"node\">\n",
       "<title>4379506672</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-0.5 0,-49.5 85.75,-49.5 85.75,-0.5 0,-0.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-32.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=2.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"0,-25 85.75,-25\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-7.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=0.0000</text>\n",
       "</g>\n",
       "<!-- 4379506672&#45;&gt;4379414720* -->\n",
       "<g id=\"edge12\" class=\"edge\">\n",
       "<title>4379506672&#45;&gt;4379414720*</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M86.12,-38.83C95.11,-41.77 104.49,-44.84 113.15,-47.67\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"111.99,-50.98 122.58,-50.76 114.16,-44.32 111.99,-50.98\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.graphs.Digraph at 0x10508b6a0>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "w0 = Node(-1.0)\n",
    "w1 = Node(2.0)\n",
    "b  = Node(4.0)\n",
    "x  = Node(2.0)\n",
    "t  = Node(3.0)\n",
    "\n",
    "z = w0 * x + w1 * x + b\n",
    "u = z.tanh()\n",
    "y = z.relu()\n",
    "draw_graph(y)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "699d87d8",
   "metadata": {
    "papermill": {
     "duration": 0.005196,
     "end_time": "2024-09-05T18:30:31.671643",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.666447",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "Gradients all check out:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7e3ede9e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-05T18:30:31.681248Z",
     "iopub.status.busy": "2024-09-05T18:30:31.680772Z",
     "iopub.status.idle": "2024-09-05T18:30:31.816814Z",
     "shell.execute_reply": "2024-09-05T18:30:31.816224Z"
    },
    "papermill": {
     "duration": 0.143049,
     "end_time": "2024-09-05T18:30:31.818431",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.675382",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 12.1.0 (20240811.2233)\n",
       " -->\n",
       "<!-- Pages: 1 -->\n",
       "<svg width=\"941pt\" height=\"194pt\"\n",
       " viewBox=\"0.00 0.00 940.75 194.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 190)\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-190 936.75,-190 936.75,4 -4,4\"/>\n",
       "<!-- 4379506720 -->\n",
       "<g id=\"node1\" class=\"node\">\n",
       "<title>4379506720</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"423.5,-136.5 423.5,-185.5 509.25,-185.5 509.25,-136.5 423.5,-136.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"466.38\" y=\"-168.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=4.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"423.5,-161 509.25,-161\"/>\n",
       "<text text-anchor=\"middle\" x=\"466.38\" y=\"-143.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379411648+ -->\n",
       "<g id=\"node6\" class=\"node\">\n",
       "<title>4379411648+</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"572.25\" cy=\"-127\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"572.25\" y=\"-121.95\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- 4379506720&#45;&gt;4379411648+ -->\n",
       "<g id=\"edge7\" class=\"edge\">\n",
       "<title>4379506720&#45;&gt;4379411648+</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M509.62,-147.17C518.61,-144.23 527.99,-141.16 536.65,-138.33\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"537.66,-141.68 546.08,-135.24 535.49,-135.02 537.66,-141.68\"/>\n",
       "</g>\n",
       "<!-- 4379506768 -->\n",
       "<g id=\"node2\" class=\"node\">\n",
       "<title>4379506768</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-136.5 0,-185.5 85.75,-185.5 85.75,-136.5 0,-136.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-168.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=&#45;1.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"0,-161 85.75,-161\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-143.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=2.0000</text>\n",
       "</g>\n",
       "<!-- 4379415200* -->\n",
       "<g id=\"node4\" class=\"node\">\n",
       "<title>4379415200*</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"148.75\" cy=\"-127\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"148.75\" y=\"-121.95\" font-family=\"Times,serif\" font-size=\"14.00\">*</text>\n",
       "</g>\n",
       "<!-- 4379506768&#45;&gt;4379415200* -->\n",
       "<g id=\"edge9\" class=\"edge\">\n",
       "<title>4379506768&#45;&gt;4379415200*</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M86.12,-147.17C95.11,-144.23 104.49,-141.16 113.15,-138.33\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"114.16,-141.68 122.58,-135.24 111.99,-135.02 114.16,-141.68\"/>\n",
       "</g>\n",
       "<!-- 4379415200 -->\n",
       "<g id=\"node3\" class=\"node\">\n",
       "<title>4379415200</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"211.75,-102.5 211.75,-151.5 297.5,-151.5 297.5,-102.5 211.75,-102.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"254.62\" y=\"-134.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=&#45;2.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"211.75,-127 297.5,-127\"/>\n",
       "<text text-anchor=\"middle\" x=\"254.62\" y=\"-109.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379411744+ -->\n",
       "<g id=\"node10\" class=\"node\">\n",
       "<title>4379411744+</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"360.5\" cy=\"-93\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"360.5\" y=\"-87.95\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- 4379415200&#45;&gt;4379411744+ -->\n",
       "<g id=\"edge11\" class=\"edge\">\n",
       "<title>4379415200&#45;&gt;4379411744+</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M297.87,-113.17C306.86,-110.23 316.24,-107.16 324.9,-104.33\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"325.91,-107.68 334.33,-101.24 323.74,-101.02 325.91,-107.68\"/>\n",
       "</g>\n",
       "<!-- 4379415200*&#45;&gt;4379415200 -->\n",
       "<g id=\"edge1\" class=\"edge\">\n",
       "<title>4379415200*&#45;&gt;4379415200</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M176.23,-127C183.59,-127 191.85,-127 200.16,-127\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"199.94,-130.5 209.94,-127 199.94,-123.5 199.94,-130.5\"/>\n",
       "</g>\n",
       "<!-- 4379411648 -->\n",
       "<g id=\"node5\" class=\"node\">\n",
       "<title>4379411648</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"635.25,-102.5 635.25,-151.5 721,-151.5 721,-102.5 635.25,-102.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"678.12\" y=\"-134.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=6.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"635.25,-127 721,-127\"/>\n",
       "<text text-anchor=\"middle\" x=\"678.12\" y=\"-109.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379412944relu -->\n",
       "<g id=\"node13\" class=\"node\">\n",
       "<title>4379412944relu</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"784\" cy=\"-127\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"784\" y=\"-121.95\" font-family=\"Times,serif\" font-size=\"14.00\">relu</text>\n",
       "</g>\n",
       "<!-- 4379411648&#45;&gt;4379412944relu -->\n",
       "<g id=\"edge13\" class=\"edge\">\n",
       "<title>4379411648&#45;&gt;4379412944relu</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M721.37,-127C729.28,-127 737.5,-127 745.26,-127\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"745.07,-130.5 755.07,-127 745.07,-123.5 745.07,-130.5\"/>\n",
       "</g>\n",
       "<!-- 4379411648+&#45;&gt;4379411648 -->\n",
       "<g id=\"edge2\" class=\"edge\">\n",
       "<title>4379411648+&#45;&gt;4379411648</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M599.73,-127C607.09,-127 615.35,-127 623.66,-127\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"623.44,-130.5 633.44,-127 623.44,-123.5 623.44,-130.5\"/>\n",
       "</g>\n",
       "<!-- 4379414720 -->\n",
       "<g id=\"node7\" class=\"node\">\n",
       "<title>4379414720</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"211.75,-34.5 211.75,-83.5 297.5,-83.5 297.5,-34.5 211.75,-34.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"254.62\" y=\"-66.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=4.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"211.75,-59 297.5,-59\"/>\n",
       "<text text-anchor=\"middle\" x=\"254.62\" y=\"-41.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379414720&#45;&gt;4379411744+ -->\n",
       "<g id=\"edge8\" class=\"edge\">\n",
       "<title>4379414720&#45;&gt;4379411744+</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M297.87,-72.83C306.86,-75.77 316.24,-78.84 324.9,-81.67\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"323.74,-84.98 334.33,-84.76 325.91,-78.32 323.74,-84.98\"/>\n",
       "</g>\n",
       "<!-- 4379414720* -->\n",
       "<g id=\"node8\" class=\"node\">\n",
       "<title>4379414720*</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"148.75\" cy=\"-59\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"148.75\" y=\"-53.95\" font-family=\"Times,serif\" font-size=\"14.00\">*</text>\n",
       "</g>\n",
       "<!-- 4379414720*&#45;&gt;4379414720 -->\n",
       "<g id=\"edge3\" class=\"edge\">\n",
       "<title>4379414720*&#45;&gt;4379414720</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M176.23,-59C183.59,-59 191.85,-59 200.16,-59\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"199.94,-62.5 209.94,-59 199.94,-55.5 199.94,-62.5\"/>\n",
       "</g>\n",
       "<!-- 4379411744 -->\n",
       "<g id=\"node9\" class=\"node\">\n",
       "<title>4379411744</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"423.5,-68.5 423.5,-117.5 509.25,-117.5 509.25,-68.5 423.5,-68.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"466.38\" y=\"-100.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=2.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"423.5,-93 509.25,-93\"/>\n",
       "<text text-anchor=\"middle\" x=\"466.38\" y=\"-75.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379411744&#45;&gt;4379411648+ -->\n",
       "<g id=\"edge14\" class=\"edge\">\n",
       "<title>4379411744&#45;&gt;4379411648+</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M509.62,-106.83C518.61,-109.77 527.99,-112.84 536.65,-115.67\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"535.49,-118.98 546.08,-118.76 537.66,-112.32 535.49,-118.98\"/>\n",
       "</g>\n",
       "<!-- 4379411744+&#45;&gt;4379411744 -->\n",
       "<g id=\"edge4\" class=\"edge\">\n",
       "<title>4379411744+&#45;&gt;4379411744</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M387.98,-93C395.34,-93 403.6,-93 411.91,-93\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"411.69,-96.5 421.69,-93 411.69,-89.5 411.69,-96.5\"/>\n",
       "</g>\n",
       "<!-- 4379424560 -->\n",
       "<g id=\"node11\" class=\"node\">\n",
       "<title>4379424560</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-68.5 0,-117.5 85.75,-117.5 85.75,-68.5 0,-68.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-100.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=2.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"0,-93 85.75,-93\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-75.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379424560&#45;&gt;4379415200* -->\n",
       "<g id=\"edge6\" class=\"edge\">\n",
       "<title>4379424560&#45;&gt;4379415200*</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M86.12,-106.83C95.11,-109.77 104.49,-112.84 113.15,-115.67\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"111.99,-118.98 122.58,-118.76 114.16,-112.32 111.99,-118.98\"/>\n",
       "</g>\n",
       "<!-- 4379424560&#45;&gt;4379414720* -->\n",
       "<g id=\"edge10\" class=\"edge\">\n",
       "<title>4379424560&#45;&gt;4379414720*</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M86.12,-79.17C95.11,-76.23 104.49,-73.16 113.15,-70.33\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"114.16,-73.68 122.58,-67.24 111.99,-67.02 114.16,-73.68\"/>\n",
       "</g>\n",
       "<!-- 4379412944 -->\n",
       "<g id=\"node12\" class=\"node\">\n",
       "<title>4379412944</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"847,-102.5 847,-151.5 932.75,-151.5 932.75,-102.5 847,-102.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"889.88\" y=\"-134.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=6.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"847,-127 932.75,-127\"/>\n",
       "<text text-anchor=\"middle\" x=\"889.88\" y=\"-109.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379412944relu&#45;&gt;4379412944 -->\n",
       "<g id=\"edge5\" class=\"edge\">\n",
       "<title>4379412944relu&#45;&gt;4379412944</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M811.48,-127C818.84,-127 827.1,-127 835.41,-127\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"835.19,-130.5 845.19,-127 835.19,-123.5 835.19,-130.5\"/>\n",
       "</g>\n",
       "<!-- 4379506672 -->\n",
       "<g id=\"node14\" class=\"node\">\n",
       "<title>4379506672</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-0.5 0,-49.5 85.75,-49.5 85.75,-0.5 0,-0.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-32.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=2.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"0,-25 85.75,-25\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-7.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=2.0000</text>\n",
       "</g>\n",
       "<!-- 4379506672&#45;&gt;4379414720* -->\n",
       "<g id=\"edge12\" class=\"edge\">\n",
       "<title>4379506672&#45;&gt;4379414720*</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M86.12,-38.83C95.11,-41.77 104.49,-44.84 113.15,-47.67\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"111.99,-50.98 122.58,-50.76 114.16,-44.32 111.99,-50.98\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.graphs.Digraph at 0x10509f4f0>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.backward()\n",
    "draw_graph(y)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ab79a91",
   "metadata": {
    "papermill": {
     "duration": 0.00413,
     "end_time": "2024-09-05T18:30:31.827553",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.823423",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "Note that `u` is not shown in the graph and `u.grad` is zero since `y` has no dependence on `u`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8f8eac44",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-05T18:30:31.838625Z",
     "iopub.status.busy": "2024-09-05T18:30:31.838394Z",
     "iopub.status.idle": "2024-09-05T18:30:31.842133Z",
     "shell.execute_reply": "2024-09-05T18:30:31.841774Z"
    },
    "papermill": {
     "duration": 0.010971,
     "end_time": "2024-09-05T18:30:31.843340",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.832369",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "u.grad"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4d2b65dc",
   "metadata": {
    "papermill": {
     "duration": 0.003324,
     "end_time": "2024-09-05T18:30:31.850726",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.847402",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "Moreover, gradients on shared parameters **accumulate** with multiple inputs:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "58aa8949",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-05T18:30:31.859213Z",
     "iopub.status.busy": "2024-09-05T18:30:31.859005Z",
     "iopub.status.idle": "2024-09-05T18:30:31.976780Z",
     "shell.execute_reply": "2024-09-05T18:30:31.975991Z"
    },
    "papermill": {
     "duration": 0.123475,
     "end_time": "2024-09-05T18:30:31.978153",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.854678",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 12.1.0 (20240811.2233)\n",
       " -->\n",
       "<!-- Pages: 1 -->\n",
       "<svg width=\"941pt\" height=\"194pt\"\n",
       " viewBox=\"0.00 0.00 940.75 194.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 190)\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-190 936.75,-190 936.75,4 -4,4\"/>\n",
       "<!-- 4379142672 -->\n",
       "<g id=\"node1\" class=\"node\">\n",
       "<title>4379142672</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"423.5,-68.5 423.5,-117.5 509.25,-117.5 509.25,-68.5 423.5,-68.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"466.38\" y=\"-100.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=1.700</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"423.5,-93 509.25,-93\"/>\n",
       "<text text-anchor=\"middle\" x=\"466.38\" y=\"-75.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379501920+ -->\n",
       "<g id=\"node13\" class=\"node\">\n",
       "<title>4379501920+</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"572.25\" cy=\"-127\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"572.25\" y=\"-121.95\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- 4379142672&#45;&gt;4379501920+ -->\n",
       "<g id=\"edge9\" class=\"edge\">\n",
       "<title>4379142672&#45;&gt;4379501920+</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M509.62,-106.83C518.61,-109.77 527.99,-112.84 536.65,-115.67\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"535.49,-118.98 546.08,-118.76 537.66,-112.32 535.49,-118.98\"/>\n",
       "</g>\n",
       "<!-- 4379142672+ -->\n",
       "<g id=\"node2\" class=\"node\">\n",
       "<title>4379142672+</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"360.5\" cy=\"-93\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"360.5\" y=\"-87.95\" font-family=\"Times,serif\" font-size=\"14.00\">+</text>\n",
       "</g>\n",
       "<!-- 4379142672+&#45;&gt;4379142672 -->\n",
       "<g id=\"edge1\" class=\"edge\">\n",
       "<title>4379142672+&#45;&gt;4379142672</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M387.98,-93C395.34,-93 403.6,-93 411.91,-93\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"411.69,-96.5 421.69,-93 411.69,-89.5 411.69,-96.5\"/>\n",
       "</g>\n",
       "<!-- 4379506720 -->\n",
       "<g id=\"node3\" class=\"node\">\n",
       "<title>4379506720</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"423.5,-136.5 423.5,-185.5 509.25,-185.5 509.25,-136.5 423.5,-136.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"466.38\" y=\"-168.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=4.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"423.5,-161 509.25,-161\"/>\n",
       "<text text-anchor=\"middle\" x=\"466.38\" y=\"-143.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=2.0000</text>\n",
       "</g>\n",
       "<!-- 4379506720&#45;&gt;4379501920+ -->\n",
       "<g id=\"edge12\" class=\"edge\">\n",
       "<title>4379506720&#45;&gt;4379501920+</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M509.62,-147.17C518.61,-144.23 527.99,-141.16 536.65,-138.33\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"537.66,-141.68 546.08,-135.24 535.49,-135.02 537.66,-141.68\"/>\n",
       "</g>\n",
       "<!-- 4379142192 -->\n",
       "<g id=\"node4\" class=\"node\">\n",
       "<title>4379142192</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-68.5 0,-117.5 85.75,-117.5 85.75,-68.5 0,-68.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-100.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=1.700</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"0,-93 85.75,-93\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-75.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379141856* -->\n",
       "<g id=\"node9\" class=\"node\">\n",
       "<title>4379141856*</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"148.75\" cy=\"-127\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"148.75\" y=\"-121.95\" font-family=\"Times,serif\" font-size=\"14.00\">*</text>\n",
       "</g>\n",
       "<!-- 4379142192&#45;&gt;4379141856* -->\n",
       "<g id=\"edge13\" class=\"edge\">\n",
       "<title>4379142192&#45;&gt;4379141856*</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M86.12,-106.83C95.11,-109.77 104.49,-112.84 113.15,-115.67\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"111.99,-118.98 122.58,-118.76 114.16,-112.32 111.99,-118.98\"/>\n",
       "</g>\n",
       "<!-- 4379141472* -->\n",
       "<g id=\"node11\" class=\"node\">\n",
       "<title>4379141472*</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"148.75\" cy=\"-59\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"148.75\" y=\"-53.95\" font-family=\"Times,serif\" font-size=\"14.00\">*</text>\n",
       "</g>\n",
       "<!-- 4379142192&#45;&gt;4379141472* -->\n",
       "<g id=\"edge8\" class=\"edge\">\n",
       "<title>4379142192&#45;&gt;4379141472*</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M86.12,-79.17C95.11,-76.23 104.49,-73.16 113.15,-70.33\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"114.16,-73.68 122.58,-67.24 111.99,-67.02 114.16,-73.68\"/>\n",
       "</g>\n",
       "<!-- 4379506768 -->\n",
       "<g id=\"node5\" class=\"node\">\n",
       "<title>4379506768</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-0.5 0,-49.5 85.75,-49.5 85.75,-0.5 0,-0.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-32.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=&#45;1.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"0,-25 85.75,-25\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-7.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=3.7000</text>\n",
       "</g>\n",
       "<!-- 4379506768&#45;&gt;4379141472* -->\n",
       "<g id=\"edge10\" class=\"edge\">\n",
       "<title>4379506768&#45;&gt;4379141472*</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M86.12,-38.83C95.11,-41.77 104.49,-44.84 113.15,-47.67\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"111.99,-50.98 122.58,-50.76 114.16,-44.32 111.99,-50.98\"/>\n",
       "</g>\n",
       "<!-- 4379142000 -->\n",
       "<g id=\"node6\" class=\"node\">\n",
       "<title>4379142000</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"847,-102.5 847,-151.5 932.75,-151.5 932.75,-102.5 847,-102.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"889.88\" y=\"-134.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=5.700</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"847,-127 932.75,-127\"/>\n",
       "<text text-anchor=\"middle\" x=\"889.88\" y=\"-109.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379142000relu -->\n",
       "<g id=\"node7\" class=\"node\">\n",
       "<title>4379142000relu</title>\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"784\" cy=\"-127\" rx=\"27\" ry=\"18\"/>\n",
       "<text text-anchor=\"middle\" x=\"784\" y=\"-121.95\" font-family=\"Times,serif\" font-size=\"14.00\">relu</text>\n",
       "</g>\n",
       "<!-- 4379142000relu&#45;&gt;4379142000 -->\n",
       "<g id=\"edge2\" class=\"edge\">\n",
       "<title>4379142000relu&#45;&gt;4379142000</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M811.48,-127C818.84,-127 827.1,-127 835.41,-127\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"835.19,-130.5 845.19,-127 835.19,-123.5 835.19,-130.5\"/>\n",
       "</g>\n",
       "<!-- 4379141856 -->\n",
       "<g id=\"node8\" class=\"node\">\n",
       "<title>4379141856</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"211.75,-102.5 211.75,-151.5 297.5,-151.5 297.5,-102.5 211.75,-102.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"254.62\" y=\"-134.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=3.400</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"211.75,-127 297.5,-127\"/>\n",
       "<text text-anchor=\"middle\" x=\"254.62\" y=\"-109.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379141856&#45;&gt;4379142672+ -->\n",
       "<g id=\"edge14\" class=\"edge\">\n",
       "<title>4379141856&#45;&gt;4379142672+</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M297.87,-113.17C306.86,-110.23 316.24,-107.16 324.9,-104.33\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"325.91,-107.68 334.33,-101.24 323.74,-101.02 325.91,-107.68\"/>\n",
       "</g>\n",
       "<!-- 4379141856*&#45;&gt;4379141856 -->\n",
       "<g id=\"edge3\" class=\"edge\">\n",
       "<title>4379141856*&#45;&gt;4379141856</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M176.23,-127C183.59,-127 191.85,-127 200.16,-127\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"199.94,-130.5 209.94,-127 199.94,-123.5 199.94,-130.5\"/>\n",
       "</g>\n",
       "<!-- 4379141472 -->\n",
       "<g id=\"node10\" class=\"node\">\n",
       "<title>4379141472</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"211.75,-34.5 211.75,-83.5 297.5,-83.5 297.5,-34.5 211.75,-34.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"254.62\" y=\"-66.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=&#45;1.700</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"211.75,-59 297.5,-59\"/>\n",
       "<text text-anchor=\"middle\" x=\"254.62\" y=\"-41.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379141472&#45;&gt;4379142672+ -->\n",
       "<g id=\"edge11\" class=\"edge\">\n",
       "<title>4379141472&#45;&gt;4379142672+</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M297.87,-72.83C306.86,-75.77 316.24,-78.84 324.9,-81.67\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"323.74,-84.98 334.33,-84.76 325.91,-78.32 323.74,-84.98\"/>\n",
       "</g>\n",
       "<!-- 4379141472*&#45;&gt;4379141472 -->\n",
       "<g id=\"edge4\" class=\"edge\">\n",
       "<title>4379141472*&#45;&gt;4379141472</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M176.23,-59C183.59,-59 191.85,-59 200.16,-59\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"199.94,-62.5 209.94,-59 199.94,-55.5 199.94,-62.5\"/>\n",
       "</g>\n",
       "<!-- 4379501920 -->\n",
       "<g id=\"node12\" class=\"node\">\n",
       "<title>4379501920</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"635.25,-102.5 635.25,-151.5 721,-151.5 721,-102.5 635.25,-102.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"678.12\" y=\"-134.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=5.700</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"635.25,-127 721,-127\"/>\n",
       "<text text-anchor=\"middle\" x=\"678.12\" y=\"-109.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=1.0000</text>\n",
       "</g>\n",
       "<!-- 4379501920&#45;&gt;4379142000relu -->\n",
       "<g id=\"edge7\" class=\"edge\">\n",
       "<title>4379501920&#45;&gt;4379142000relu</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M721.37,-127C729.28,-127 737.5,-127 745.26,-127\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"745.07,-130.5 755.07,-127 745.07,-123.5 745.07,-130.5\"/>\n",
       "</g>\n",
       "<!-- 4379501920+&#45;&gt;4379501920 -->\n",
       "<g id=\"edge5\" class=\"edge\">\n",
       "<title>4379501920+&#45;&gt;4379501920</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M599.73,-127C607.09,-127 615.35,-127 623.66,-127\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"623.44,-130.5 633.44,-127 623.44,-123.5 623.44,-130.5\"/>\n",
       "</g>\n",
       "<!-- 4379506672 -->\n",
       "<g id=\"node14\" class=\"node\">\n",
       "<title>4379506672</title>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-136.5 0,-185.5 85.75,-185.5 85.75,-136.5 0,-136.5\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-168.2\" font-family=\"Times,serif\" font-size=\"14.00\">data=2.000</text>\n",
       "<polyline fill=\"none\" stroke=\"black\" points=\"0,-161 85.75,-161\"/>\n",
       "<text text-anchor=\"middle\" x=\"42.88\" y=\"-143.7\" font-family=\"Times,serif\" font-size=\"14.00\">grad=3.7000</text>\n",
       "</g>\n",
       "<!-- 4379506672&#45;&gt;4379141856* -->\n",
       "<g id=\"edge6\" class=\"edge\">\n",
       "<title>4379506672&#45;&gt;4379141856*</title>\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M86.12,-147.17C95.11,-144.23 104.49,-141.16 113.15,-138.33\"/>\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"114.16,-141.68 122.58,-135.24 111.99,-135.02 114.16,-141.68\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<graphviz.graphs.Digraph at 0x10509f940>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1 = Node(1.7)\n",
    "z1 = w0 * x1 + w1 * x1 + b\n",
    "y1 = z1.relu()\n",
    "y1.backward()\n",
    "draw_graph(y1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "320a04cc",
   "metadata": {
    "papermill": {
     "duration": 0.00499,
     "end_time": "2024-09-05T18:30:31.987557",
     "exception": false,
     "start_time": "2024-09-05T18:30:31.982567",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 2.246177,
   "end_time": "2024-09-05T18:30:32.210832",
   "environment_variables": {},
   "exception": null,
   "input_path": "00a-compute-nodes.ipynb",
   "output_path": "00a-compute-nodes.ipynb",
   "parameters": {},
   "start_time": "2024-09-05T18:30:29.964655",
   "version": "2.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}