Neural network module

Here we construct the neural network module. The Module class defines an abstract class that maintains a list of the parameters used in forward pass implemented in __call__. The decorator @final is to prevent any inheriting class from overriding the methods as doing so would result in a warning (or an error with a type checker).

from abc import ABC, abstractmethod

class Module(ABC):
    def __init__(self):
        self._parameters = []

    @final
    def parameters(self) -> list:
        return self._parameters

    @abstractmethod
    def __call__(self, x: list):
        pass

    @final
    def zero_grad(self):
        for p in self.parameters():
            p.grad = 0

The _parameters attribute is defined so that the parameter list is not constructed at each call of the parameters() method. Implementing layers from neurons:

class Neuron(Module):
    def __init__(self, n_in, activation=None):
        self.n_in = n_in
        self.act = activation

        self.w = [Node(random.random()) for _ in range(n_in)]
        self.b = Node(0.0)
        self._parameters = self.w + [self.b]

    def __call__(self, x: list):
        assert len(x) == self.n_in
        out = sum((x[j] * self.w[j] for j in range(self.n_in)), start=self.b)
        if self.act is not None:
            if self.act == "tanh":
                out = out.tanh()
            elif self.act == "relu":
                out = out.relu()
            else:
                raise NotImplementedError("Activation not supported.")
        return out

    def __repr__(self):
        return f"{self.act if self.act is not None else 'linear'}({len(self.w)})"


class Layer(Module):
    def __init__(self, n_in, n_out, *args):
        self.neurons = [Neuron(n_in, *args) for _ in range(n_out)]
        self._parameters = [p for n in self.neurons for p in n.parameters()]

    def __call__(self, x: list):
        out = [n(x) for n in self.neurons]
        return out[0] if len(out) == 1 else out

    def __repr__(self):
        return f"Layer[{', '.join(str(n) for n in self.neurons)}]"


class MLP(Module):
    def __init__(self, n_in, n_outs, activation=None):
        sizes = [n_in] + n_outs
        self.layers = []
        for i in range(len(n_outs)):
            act = activation if i < len(n_outs) - 1 else None
            layer = Layer(sizes[i], sizes[i + 1], act)
            self.layers.append(layer)

        self._parameters = [p for layer in self.layers for p in layer.parameters()]

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def __repr__(self):
        return f"MLP[{', '.join(str(layer) for layer in self.layers)}]"

Testing model init and model call. Note that the final node has no activation:

model = MLP(n_in=1, n_outs=[2, 4, 1], activation="relu")
x = Node(1.0)
pred = model([x])
pred.backward()

print(model)
print(pred.data)
MLP[Layer[relu(1), relu(1)], Layer[relu(2), relu(2), relu(2), relu(2)], Layer[linear(4)]]
0.3551933990615996
draw_graph(pred)
../../../_images/ba08742b385fdf40d988808559063876ce7cc76f8417aa58ba5372bb148aec1f.svg