From fc14381ad0ec82c0d469089718a85318936d2cac Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 6 Dec 2025 18:55:48 +0200
Subject: [PATCH 01/36] ML-392 Added `Layer` and `Hidden` interfaces to define
 contracts for neural network layers.

---
 .../Layers/Base/Contracts/Hidden.php          | 29 ++++++++++
 src/NeuralNet/Layers/Base/Contracts/Layer.php | 57 +++++++++++++++++++
 2 files changed, 86 insertions(+)
 create mode 100644 src/NeuralNet/Layers/Base/Contracts/Hidden.php
 create mode 100644 src/NeuralNet/Layers/Base/Contracts/Layer.php
diff --git a/src/NeuralNet/Layers/Base/Contracts/Hidden.php b/src/NeuralNet/Layers/Base/Contracts/Hidden.php
new file mode 100644
index 000000000..b73b63521
--- /dev/null
+++ b/src/NeuralNet/Layers/Base/Contracts/Hidden.php
@@ -0,0 +1,29 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Base\Contracts;
+
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Layer;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+
+/**
+ * Hidden
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+interface Hidden extends Layer
+{
+    /**
+     * Calculate the gradient and update the parameters of the layer.
+     *
+     * @internal
+     *
+     * @param Deferred $prevGradient
+     * @param Optimizer $optimizer
+     * @return Deferred
+     */
+    public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred;
+}
diff --git a/src/NeuralNet/Layers/Base/Contracts/Layer.php b/src/NeuralNet/Layers/Base/Contracts/Layer.php
new file mode 100644
index 000000000..10cf17b6e
--- /dev/null
+++ b/src/NeuralNet/Layers/Base/Contracts/Layer.php
@@ -0,0 +1,57 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Base\Contracts;
+
+use NDArray;
+use Stringable;
+
+/**
+ * Hidden
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+interface Layer extends Stringable
+{
+    /**
+     * The width of the layer. i.e. the number of neurons or computation nodes.
+     *
+     * @internal
+     *
+     * @return positive-int
+     */
+    public function width() : int;
+
+    /**
+     * Initialize the layer with the fan in from the previous layer and return
+     * the fan out for this layer.
+     *
+     * @internal
+     *
+     * @param positive-int $fanIn
+     * @return positive-int
+     */
+    public function initialize(int $fanIn) : int;
+
+    /**
+     * Feed the input forward to the next layer in the network.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function forward(NDArray $input) : NDArray;
+
+    /**
+     * Forward pass during inference.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function infer(NDArray $input) : NDArray;
+}

From 9546c9d84fce16f02efd3db0475352083eea8212 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 6 Dec 2025 19:14:33 +0200
Subject: [PATCH 02/36] ML-393 Added standalone `Activation` layer
 implementation with comprehensive unit tests and fixed broken source file
 link in the documentation

---
 .../hidden-layers/activation.md               |   8 +-
 .../Layers/Activation/Activation.php          | 184 ++++++++++++++++++
 .../Layers/Base/Contracts/Hidden.php          |   1 -
 .../Layers/Activation/ActivationTest.php      | 181 +++++++++++++++++
 4 files changed, 369 insertions(+), 5 deletions(-)
 create mode 100644 src/NeuralNet/Layers/Activation/Activation.php
 create mode 100644 tests/NeuralNet/Layers/Activation/ActivationTest.php

diff --git a/docs/neural-network/hidden-layers/activation.md b/docs/neural-network/hidden-layers/activation.md
index a4e4cde73..57d4dc46c 100644
--- a/docs/neural-network/hidden-layers/activation.md
+++ b/docs/neural-network/hidden-layers/activation.md
@@ -1,4 +1,4 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/Activation.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/Activation/Activation.php">[source]</a></span>
 
 # Activation
 Activation layers apply a user-defined non-linear activation function to their inputs. They often work in conjunction with [Dense](dense.md) layers as a way to transform their output.
@@ -10,8 +10,8 @@ Activation layers apply a user-defined non-linear activation function to their i
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Layers\Activation;
-use Rubix\ML\NeuralNet\ActivationFunctions\ReLU;
+use Rubix\ML\NeuralNet\Layers\Activation\Activation;
+use Rubix\ML\NeuralNet\ActivationFunctions\ReLU\ReLU;
 
 $layer = new Activation(new ReLU());
-```
\ No newline at end of file
+```
diff --git a/src/NeuralNet/Layers/Activation/Activation.php b/src/NeuralNet/Layers/Activation/Activation.php
new file mode 100644
index 000000000..4394350b4
--- /dev/null
+++ b/src/NeuralNet/Layers/Activation/Activation.php
@@ -0,0 +1,184 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Activation;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\Deferred;
+use Rubix\ML\Exceptions\RuntimeException;
+use Rubix\ML\NeuralNet\ActivationFunctions\Base\Contracts\ActivationFunction;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+
+/**
+ * Activation
+ *
+ * Activation layers apply a user-defined non-linear activation function to their
+ * inputs.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Activation implements Hidden
+{
+    /**
+     * The function that computes the output of the layer.
+     *
+     * @var ActivationFunction
+     */
+    protected ActivationFunction $activationFn;
+
+    /**
+     * The width of the layer.
+     *
+     * @var positive-int|null
+     */
+    protected ?int $width = null;
+
+    /**
+     * The memorized input matrix.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $input = null;
+
+    /**
+     * The memorized activation matrix.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $output = null;
+
+    /**
+     * @param ActivationFunction $activationFn
+     */
+    public function __construct(ActivationFunction $activationFn)
+    {
+        $this->activationFn = $activationFn;
+    }
+
+    /**
+     * Return the width of the layer.
+     *
+     * @internal
+     *
+     * @throws RuntimeException
+     * @return positive-int
+     */
+    public function width() : int
+    {
+        if ($this->width === null) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        return $this->width;
+    }
+
+    /**
+     * Initialize the layer with the fan in from the previous layer and return
+     * the fan out for this layer.
+     *
+     * @internal
+     *
+     * @param positive-int $fanIn
+     * @return positive-int
+     */
+    public function initialize(int $fanIn) : int
+    {
+        $fanOut = $fanIn;
+
+        $this->width = $fanOut;
+
+        return $fanOut;
+    }
+
+    /**
+     * Compute a forward pass through the layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function forward(NDArray $input) : NDArray
+    {
+        $output = $this->activationFn->activate($input);
+
+        $this->input = $input;
+        $this->output = $output;
+
+        return $output;
+    }
+
+    /**
+     * Compute an inferential pass through the layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function infer(NDArray $input) : NDArray
+    {
+        return $this->activationFn->activate($input);
+    }
+
+    /**
+     * Calculate the gradient and update the parameters of the layer.
+     *
+     * @internal
+     *
+     * @param Deferred $prevGradient
+     * @param Optimizer $optimizer
+     * @throws RuntimeException
+     * @return Deferred
+     */
+    public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
+    {
+        if (!$this->input or !$this->output) {
+            throw new RuntimeException('Must perform forward pass before backpropagating.');
+        }
+
+        $input = $this->input;
+        $output = $this->output;
+
+        $this->input = $this->output = null;
+
+        return new Deferred(
+            [$this, 'gradient'],
+            [$input, $output, $prevGradient]
+        );
+    }
+
+    /**
+     * Calculate the gradient for the previous layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @param NDArray $output
+     * @param Deferred $prevGradient
+     * @return NDArray
+     */
+    public function gradient(NDArray $input, NDArray $output, Deferred $prevGradient) : NDArray
+    {
+        return NumPower::multiply(
+            $this->activationFn->differentiate($input),
+            $prevGradient()
+        );
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Activation (activation fn: {$this->activationFn})";
+    }
+}
diff --git a/src/NeuralNet/Layers/Base/Contracts/Hidden.php b/src/NeuralNet/Layers/Base/Contracts/Hidden.php
index b73b63521..f903e3916 100644
--- a/src/NeuralNet/Layers/Base/Contracts/Hidden.php
+++ b/src/NeuralNet/Layers/Base/Contracts/Hidden.php
@@ -3,7 +3,6 @@
 namespace Rubix\ML\NeuralNet\Layers\Base\Contracts;
 
 use Rubix\ML\Deferred;
-use Rubix\ML\NeuralNet\Layers\Base\Contracts\Layer;
 use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
 
 /**
diff --git a/tests/NeuralNet/Layers/Activation/ActivationTest.php b/tests/NeuralNet/Layers/Activation/ActivationTest.php
new file mode 100644
index 000000000..2c203ad18
--- /dev/null
+++ b/tests/NeuralNet/Layers/Activation/ActivationTest.php
@@ -0,0 +1,181 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Layers\Activation;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use PHPUnit\Framework\Attributes\DataProvider;
+use NDArray;
+use NumPower;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Layers\Activation\Activation;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use Rubix\ML\NeuralNet\ActivationFunctions\ReLU\ReLU;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Layers')]
+#[CoversClass(Activation::class)]
+class ActivationTest extends TestCase
+{
+    /**
+     * @var positive-int
+     */
+    protected int $fanIn;
+
+    protected NDArray $input;
+
+    protected Deferred $prevGrad;
+
+    protected Optimizer $optimizer;
+
+    protected Activation $layer;
+
+    /**
+     * @return array<int, array{NDArray,array}>
+     */
+    public static function forwardProvider() : array
+    {
+        return [
+            [
+                NumPower::array([
+                    [1.0, 2.5, -0.1],
+                    [0.1, 0.0, 3.0],
+                    [0.002, -6.0, -0.5],
+                ]),
+                [
+                    [1.0, 2.5, 0.0],
+                    [0.1, 0.0, 3.0],
+                    [0.002, 0.0, 0.0],
+                ],
+            ],
+        ];
+    }
+
+    /**
+     * @return array<int, array{NDArray,NDArray,array}>
+     */
+    public static function backProvider() : array
+    {
+        return [
+            [
+                NumPower::array([
+                    [1.0, 2.5, -0.1],
+                    [0.1, 0.0, 3.0],
+                    [0.002, -6.0, -0.5],
+                ]),
+                NumPower::array([
+                    [0.25, 0.7, 0.1],
+                    [0.50, 0.2, 0.01],
+                    [0.25, 0.1, 0.89],
+                ]),
+                [
+                    [0.25, 0.7, 0.0],
+                    [0.5, 0.0, 0.01],
+                    [0.25, 0, 0.0],
+                ],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->fanIn = 3;
+
+        $this->input = NumPower::array([
+            [1.0, 2.5, -0.1],
+            [0.1, 0.0, 3.0],
+            [0.002, -6.0, -0.5],
+        ]);
+
+        $this->prevGrad = new Deferred(fn: function () : NDArray {
+            return NumPower::array([
+                [0.25, 0.7, 0.1],
+                [0.50, 0.2, 0.01],
+                [0.25, 0.1, 0.89],
+            ]);
+        });
+
+        $this->optimizer = new Stochastic(0.001);
+
+        $this->layer = new Activation(new ReLU());
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('Activation (activation fn: ReLU)', (string) $this->layer);
+    }
+
+    #[Test]
+    #[TestDox('Initializes width equal to fan-in')]
+    public function testInitializeSetsWidth() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        self::assertEquals($this->fanIn, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Computes forward activations')]
+    #[DataProvider('forwardProvider')]
+    public function testForward(NDArray $input, array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $forward = $this->layer->forward($input);
+        self::assertEqualsWithDelta($expected, $forward->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes backpropagated gradients after forward pass')]
+    #[DataProvider('backProvider')]
+    public function testBack(NDArray $input, NDArray $prevGrad, array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        // Forward pass to set internal input/output state
+        $this->layer->forward($input);
+
+        $gradient = $this->layer
+            ->back(prevGradient: new Deferred(fn: fn () => $prevGrad), optimizer: $this->optimizer)
+            ->compute();
+
+        self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes inference activations')]
+    #[DataProvider('forwardProvider')]
+    public function testInfer(NDArray $input, array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $infer = $this->layer->infer($input);
+        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes gradient correctly given input, output, and previous gradient')]
+    #[DataProvider('backProvider')]
+    public function testGradient(NDArray $input, NDArray $prevGrad, array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        // Produce output to pass explicitly to gradient
+        $output = $this->layer->forward($input);
+
+        $gradient = $this->layer->gradient(
+            $input,
+            $output,
+            new Deferred(fn: fn () => $prevGrad)
+        );
+
+        self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
+    }
+}

From 14a2b6f51d142a339a69fa9492a354138133611f Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 7 Dec 2025 00:41:57 +0200
Subject: [PATCH 03/36] ML-394 Implemented `BatchNorm` layer with comprehensive
 unit tests and updated documentation with fixed source file link. Added
 `Parametric` interface to define parameterized layers.

---
 .../hidden-layers/batch-norm.md               |  10 +-
 .../Layers/Base/Contracts/Parametric.php      |  33 ++
 src/NeuralNet/Layers/BatchNorm/BatchNorm.php  | 424 ++++++++++++++++++
 .../Layers/BatchNorm/BatchNormTest.php        | 103 +++++
 4 files changed, 565 insertions(+), 5 deletions(-)
 create mode 100644 src/NeuralNet/Layers/Base/Contracts/Parametric.php
 create mode 100644 src/NeuralNet/Layers/BatchNorm/BatchNorm.php
 create mode 100644 tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php

diff --git a/docs/neural-network/hidden-layers/batch-norm.md b/docs/neural-network/hidden-layers/batch-norm.md
index 99fdefd22..373113e14 100644
--- a/docs/neural-network/hidden-layers/batch-norm.md
+++ b/docs/neural-network/hidden-layers/batch-norm.md
@@ -1,4 +1,4 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/BatchNorm.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/BatchNorm/BatchNorm.php">[source]</a></span>
 
 # Batch Norm
 Batch Norm layers normalize the activations of the previous layer such that the mean activation is *close* to 0 and the standard deviation is *close* to 1. Adding Batch Norm reduces the amount of covariate shift within the network which makes it possible to use higher learning rates and thus converge faster under some circumstances.
@@ -12,12 +12,12 @@ Batch Norm layers normalize the activations of the previous layer such that the
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Layers\BatchNorm;
-use Rubix\ML\NeuralNet\Initializers\Constant;
-use Rubix\ML\NeuralNet\Initializers\Normal;
+use Rubix\ML\NeuralNet\Layers\BatchNorm\BatchNorm;
+use Rubix\ML\NeuralNet\Initializers\Constant\Constant;
+use Rubix\ML\NeuralNet\Initializers\Normal\Normal;
 
 $layer = new BatchNorm(0.7, new Constant(0.), new Normal(1.));
 ```
 
 ## References
-[^1]: S. Ioffe et al. (2015). Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift.
\ No newline at end of file
+[^1]: S. Ioffe et al. (2015). Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift.
diff --git a/src/NeuralNet/Layers/Base/Contracts/Parametric.php b/src/NeuralNet/Layers/Base/Contracts/Parametric.php
new file mode 100644
index 000000000..ed772c85d
--- /dev/null
+++ b/src/NeuralNet/Layers/Base/Contracts/Parametric.php
@@ -0,0 +1,33 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Base\Contracts;
+
+use Generator;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+
+/**
+ * Parametric
+ *
+ * @internal
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+interface Parametric
+{
+    /**
+     * Return the parameters of the layer.
+     *
+     * @return Generator<\Rubix\ML\NeuralNet\Parameter>
+     */
+    public function parameters() : Generator;
+
+    /**
+     * Restore the parameters on the layer from an associative array.
+     *
+     * @param Parameter[] $parameters
+     */
+    public function restore(array $parameters) : void;
+}
diff --git a/src/NeuralNet/Layers/BatchNorm/BatchNorm.php b/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
new file mode 100644
index 000000000..98c401f48
--- /dev/null
+++ b/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
@@ -0,0 +1,424 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\BatchNorm;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\Deferred;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Parametric;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Initializers\Base\Initializer;
+use Rubix\ML\NeuralNet\Initializers\Constant\Constant;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Generator;
+
+use const Rubix\ML\EPSILON;
+
+/**
+ * Batch Norm
+ *
+ * Normalize the activations of the previous layer such that the mean activation
+ * is close to 0 and the standard deviation is close to 1. Batch Norm can reduce
+ * the amount of covariate shift within the network which makes it possible to use
+ * higher learning rates and converge faster under some circumstances.
+ *
+ * References:
+ * [1] S. Ioffe et al. (2015). Batch Normalization: Accelerating Deep Network
+ * Training by Reducing Internal Covariate Shift.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ */
+class BatchNorm implements Hidden, Parametric
+{
+    /**
+     * The decay rate of the previous running averages of the global mean and variance.
+     *
+     * @var float
+     */
+    protected float $decay;
+
+    /**
+     * The initializer for the beta parameter.
+     *
+     * @var Initializer
+     */
+    protected Initializer $betaInitializer;
+
+    /**
+     * The initializer for the gamma parameter.
+     *
+     * @var Initializer
+     */
+    protected Initializer $gammaInitializer;
+
+    /**
+     * The width of the layer. i.e. the number of neurons.
+     *
+     * @var positive-int|null
+     */
+    protected ?int $width = null;
+
+    /**
+     * The learnable centering parameter.
+     *
+     * @var Parameter|null
+     */
+    protected ?Parameter $beta = null;
+
+    /**
+     * The learnable scaling parameter.
+     *
+     * @var Parameter|null
+     */
+    protected ?Parameter $gamma = null;
+
+    /**
+     * The running mean of each input dimension.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $mean = null;
+
+    /**
+     * The running variance of each input dimension.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $variance = null;
+
+    /**
+     * A cache of inverse standard deviations calculated during the forward pass.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $stdInv = null;
+
+    /**
+     * A cache of normalized inputs to the layer.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $xHat = null;
+
+    /**
+     * @param float $decay
+     * @param Initializer|null $betaInitializer
+     * @param Initializer|null $gammaInitializer
+     * @throws InvalidArgumentException
+     */
+    public function __construct(float $decay = 0.1, ?Initializer $betaInitializer = null, ?Initializer $gammaInitializer = null)
+    {
+        if ($decay < 0.0 or $decay > 1.0) {
+            throw new InvalidArgumentException("Decay must be between 0 and 1, $decay given.");
+        }
+
+        $this->decay = $decay;
+        $this->betaInitializer = $betaInitializer ?? new Constant(0.0);
+        $this->gammaInitializer = $gammaInitializer ?? new Constant(1.0);
+    }
+
+    /**
+     * Return the width of the layer.
+     *
+     * @internal
+     *
+     * @throws RuntimeException
+     * @return positive-int
+     */
+    public function width() : int
+    {
+        if ($this->width === null) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        return $this->width;
+    }
+
+    /**
+     * Initialize the layer with the fan in from the previous layer and return
+     * the fan out for this layer.
+     *
+     * @internal
+     *
+     * @param positive-int $fanIn
+     * @return positive-int
+     */
+    public function initialize(int $fanIn) : int
+    {
+        $fanOut = $fanIn;
+
+        // Initialize beta and gamma as vectors of length fanOut
+        // We request a [fanOut, 1] NDArray and then flatten to 1-D
+        $betaMat = $this->betaInitializer->initialize(1, $fanOut);
+        $gammaMat = $this->gammaInitializer->initialize(1, $fanOut);
+
+        $beta = NumPower::flatten($betaMat);
+        $gamma = NumPower::flatten($gammaMat);
+
+        $this->beta = new Parameter($beta);
+        $this->gamma = new Parameter($gamma);
+
+        $this->width = $fanOut;
+
+        return $fanOut;
+    }
+
+    /**
+     * Compute a forward pass through the layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @throws RuntimeException
+     * @return NDArray
+     */
+    public function forward(NDArray $input) : NDArray
+    {
+        if (!$this->beta or !$this->gamma) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        $rows = $input->shape()[0];
+        $meanArr = [];
+        $varArr = [];
+        $stdInvArr = [];
+
+        for ($i = 0; $i < $rows; $i++) {
+            $meanArr[$i] = NumPower::mean($input->toArray()[$i]);
+            $varArr[$i] = NumPower::variance($input->toArray()[$i]);
+            $stdInvArr[$i] = 1.0 / sqrt($varArr[$i]);
+        }
+
+        $mean = NumPower::array($meanArr);
+
+        $variance = NumPower::array($varArr);
+        $variance = NumPower::clip($variance, EPSILON, PHP_FLOAT_MAX);
+
+        $stdInv = NumPower::array($stdInvArr);
+
+        $xHat = NumPower::multiply(
+            NumPower::subtract(NumPower::transpose($input, [1, 0]), $mean),
+            $stdInv
+        );
+        $xHat = NumPower::transpose($xHat, [1, 0]);
+
+        // Initialize running stats if needed
+        if (!$this->mean or !$this->variance) {
+            $this->mean = $mean;
+            $this->variance = $variance;
+        }
+
+        // Update running mean/variance: running = running*(1-decay) + current*decay
+        $this->mean = NumPower::add(
+            NumPower::multiply($this->mean, 1.0 - $this->decay),
+            NumPower::multiply($mean, $this->decay)
+        );
+
+        $this->variance = NumPower::add(
+            NumPower::multiply($this->variance, 1.0 - $this->decay),
+            NumPower::multiply($variance, $this->decay)
+        );
+
+        $this->stdInv = $stdInv;
+        $this->xHat = $xHat;
+
+        // gamma * xHat + beta (per-column scale/shift) using NDArray ops
+        return NumPower::add(NumPower::multiply($xHat, $this->gamma->param()), $this->beta->param());
+    }
+
+    /**
+     * Compute an inferential pass through the layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @throws RuntimeException
+     * @return NDArray
+     */
+    public function infer(NDArray $input) : NDArray
+    {
+        if (!$this->mean or !$this->variance or !$this->beta or !$this->gamma) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        $xHat = NumPower::divide(
+            NumPower::subtract($input, $this->mean),
+            NumPower::sqrt($this->variance)
+        );
+
+
+        $return = NumPower::add(
+            NumPower::multiply(
+                $xHat,
+                $this->gamma->param()
+            ),
+            $this->beta->param()
+        );
+        //pp("xxxxxxxxxxxxxxxxxxxxxxxxxx", $return->toArray());
+
+        return $return;
+    }
+
+    /**
+     * Calculate the errors and gradients of the layer and update the parameters.
+     *
+     * @internal
+     *
+     * @param Deferred $prevGradient
+     * @param Optimizer $optimizer
+     * @throws RuntimeException
+     * @return Deferred
+     */
+    public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
+    {
+        if (!$this->beta or !$this->gamma) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        if (!$this->stdInv or !$this->xHat) {
+            throw new RuntimeException('Must perform forward pass before'
+                . ' backpropagating.');
+        }
+
+        $dOut = $prevGradient();
+//        pp('New dOut: ', $dOut->toArray());
+
+        $dBeta = NumPower::sum($dOut, 1);
+//        pp('New dBeta: ', $dBeta->toArray());
+
+        $dGamma = NumPower::sum(NumPower::multiply($dOut, $this->xHat), 1);
+//        pp('New dGamma: ', $dGamma->toArray());
+
+        $gamma = $this->gamma->param();
+        //pp('New Gamma: ', $gamma->toArray());
+
+        $this->beta->update($dBeta, $optimizer);
+        $this->gamma->update($dGamma, $optimizer);
+
+        $stdInv = $this->stdInv;
+        $xHat = $this->xHat;
+
+        $this->stdInv = $this->xHat = null;
+
+        $return  = new Deferred(
+            [$this, 'gradient'],
+            [$dOut, $gamma, $stdInv, $xHat]
+        );
+
+        //pp('New back: ', $dOut->toArray(), $gamma->toArray(), $stdInv->toArray(), $xHat->toArray(), end: "\n");
+
+        return $return;
+    }
+
+    /**
+     * Calculate the gradient for the previous layer.
+     *
+     * @internal
+     *
+     * @param NDArray $dOut
+     * @param NDArray $gamma
+     * @param NDArray $stdInv
+     * @param NDArray $xHat
+     * @return NDArray
+     */
+    public function gradient(NDArray $dOut, NDArray $gamma, NDArray $stdInv, NDArray $xHat) : NDArray
+    {
+        // Implement the same formula using PHP arrays
+        $dOutArr = $dOut->toArray();
+        $gammaArr = $gamma->toArray(); // 1-D length n
+        $stdInvArr = $stdInv->toArray(); // 1-D length n
+        $xHatArr = $xHat->toArray(); // [m, n]
+
+        $m = count($dOutArr);
+        $n = $m > 0 ? count($dOutArr[0]) : 0;
+
+        // dXHat = dOut * gamma (per column)
+        $dXHatArr = [];
+        for ($i = 0; $i < $m; $i++) {
+            $row = [];
+            for ($j = 0; $j < $n; $j++) {
+                $row[] = $dOutArr[$i][$j] * $gammaArr[$j];
+            }
+            $dXHatArr[] = $row;
+        }
+
+        // xHatSigma = sum(dXHat * xHat) per column
+        $xHatSigma = array_fill(0, $n, 0.0);
+        $dXHatSigma = array_fill(0, $n, 0.0);
+        for ($j = 0; $j < $n; $j++) {
+            $sum1 = 0.0;
+            $sum2 = 0.0;
+            for ($i = 0; $i < $m; $i++) {
+                $sum1 += $dXHatArr[$i][$j] * $xHatArr[$i][$j];
+                $sum2 += $dXHatArr[$i][$j];
+            }
+            $xHatSigma[$j] = $sum1;
+            $dXHatSigma[$j] = $sum2;
+        }
+
+        // Compute gradient for previous layer per formula:
+        // dX = (dXHat * m - dXHatSigma - xHat * xHatSigma) * (stdInv / m)
+        $dXArr = [];
+        for ($i = 0; $i < $m; $i++) {
+            $row = [];
+            for ($j = 0; $j < $n; $j++) {
+                $val = ($dXHatArr[$i][$j] * $m)
+                    - $dXHatSigma[$j]
+                    - ($xHatArr[$i][$j] * $xHatSigma[$j]);
+                $row[] = $val * ($stdInvArr[$j] / ($m ?: 1));
+            }
+            $dXArr[] = $row;
+        }
+
+        return NumPower::array($dXArr);
+    }
+
+    /**
+     * Return the parameters of the layer.
+     *
+     * @internal
+     *
+     * @throws RuntimeException
+     * @return Generator<Parameter>
+     */
+    public function parameters() : Generator
+    {
+        if (!$this->beta or !$this->gamma) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        yield 'beta' => $this->beta;
+        yield 'gamma' => $this->gamma;
+    }
+
+    /**
+     * Restore the parameters in the layer from an associative array.
+     *
+     * @internal
+     *
+     * @param Parameter[] $parameters
+     */
+    public function restore(array $parameters) : void
+    {
+        $this->beta = $parameters['beta'];
+        $this->gamma = $parameters['gamma'];
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Batch Norm (decay: {$this->decay}, beta initializer: {$this->betaInitializer},"
+            . " gamma initializer: {$this->gammaInitializer})";
+    }
+}
diff --git a/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php b/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
new file mode 100644
index 000000000..ad5fcdc07
--- /dev/null
+++ b/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
@@ -0,0 +1,103 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Layers\BatchNorm;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Group;
+use NDArray;
+use NumPower;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Layers\BatchNorm\BatchNorm;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use Rubix\ML\NeuralNet\Initializers\Constant\Constant;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Layers')]
+#[CoversClass(BatchNorm::class)]
+class BatchNormTest extends TestCase
+{
+    /**
+     * @var positive-int
+     */
+    protected int $fanIn;
+
+    protected NDArray $input;
+
+    protected Deferred $prevGrad;
+
+    protected Optimizer $optimizer;
+
+    protected BatchNorm $layer;
+
+    protected function setUp() : void
+    {
+        $this->fanIn = 3;
+
+        $this->input = NumPower::array([
+            [1.0, 2.5, -0.1],
+            [0.1, 0.0, 3.0],
+            [0.002, -6.0, -0.5],
+        ]);
+
+        $this->prevGrad = new Deferred(fn: function () : NDArray {
+            return NumPower::array([
+                [0.25, 0.7, 0.1],
+                [0.50, 0.2, 0.01],
+                [0.25, 0.1, 0.89],
+            ]);
+        });
+
+        $this->optimizer = new Stochastic(0.001);
+
+        $this->layer = new BatchNorm(
+            decay: 0.9,
+            betaInitializer: new Constant(0.0),
+            gammaInitializer: new Constant(1.0)
+        );
+    }
+
+    public function testInitializeForwardBackInfer() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        self::assertEquals($this->fanIn, $this->layer->width());
+
+        $expected = [
+            [-0.1251222, 1.2825030, -1.1573808],
+            [-0.6708631, -0.7427414, 1.4136046],
+            [0.7974157, -1.4101899, 0.6127743],
+        ];
+
+        $forward = $this->layer->forward($this->input);
+
+        self::assertEqualsWithDelta($expected, $forward->toArray(), 1e-7);
+
+        $gradient = $this->layer->back(
+            prevGradient: $this->prevGrad,
+            optimizer: $this->optimizer
+        )->compute();
+
+        $expected = [
+            [-0.06445877134888621, 0.027271018647605647, 0.03718775270128047],
+            [0.11375900761901864, -0.10996704069838469, -0.0037919669206339162],
+            [-0.11909780311643131, -0.01087038130262698, 0.1299681844190583],
+        ];
+
+        self::assertInstanceOf(NDArray::class, $gradient);
+        self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
+
+//        $expected = [
+//            [-0.1260783, 1.2804902385302876, -1.1575619225761131],
+//            [-0.6718883801743488, -0.7438003494787433, 1.4135587296530918],
+//            [0.7956943312039361, -1.4105786650534555, 0.6111643338495193],
+//        ];
+//
+//        $infer = $this->layer->infer($this->input);
+//
+//        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+//        self::assertTrue(true);
+    }
+}

From 46e101b00b689352c8b919787fbd36ccd1d18d18 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 7 Dec 2025 18:17:21 +0200
Subject: [PATCH 04/36] ML-395 Refactored `BatchNorm` layer to improve
 row/column normalization support

---
 src/NeuralNet/Layers/BatchNorm/BatchNorm.php  |  99 +++----
 .../Layers/BatchNorm/BatchNormTest.php        | 247 ++++++++++++++++--
 2 files changed, 258 insertions(+), 88 deletions(-)

diff --git a/src/NeuralNet/Layers/BatchNorm/BatchNorm.php b/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
index 98c401f48..b7c170abb 100644
--- a/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
+++ b/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
@@ -105,6 +105,13 @@ class BatchNorm implements Hidden, Parametric
      */
     protected ?NDArray $xHat = null;
 
+    /**
+     * Row-wise or column-wise normalization.
+     *
+     * @var int
+     */
+    protected const int COLUMN_WISE = 1;
+
     /**
      * @param float $decay
      * @param Initializer|null $betaInitializer
@@ -246,20 +253,21 @@ public function infer(NDArray $input) : NDArray
             throw new RuntimeException('Layer has not been initialized.');
         }
 
+        // Number of rows
+        $m = $input->shape()[0];
+
         $xHat = NumPower::divide(
-            NumPower::subtract($input, $this->mean),
-            NumPower::sqrt($this->variance)
+            NumPower::subtract($input, NumPower::reshape($this->mean, [$m, 1])),
+            NumPower::reshape(NumPower::sqrt($this->variance), [$m, 1])
         );
 
-
-        $return = NumPower::add(
+        return NumPower::add(
             NumPower::multiply(
                 $xHat,
                 $this->gamma->param()
             ),
             $this->beta->param()
         );
-        //pp("xxxxxxxxxxxxxxxxxxxxxxxxxx", $return->toArray());
 
         return $return;
     }
@@ -286,16 +294,9 @@ public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
         }
 
         $dOut = $prevGradient();
-//        pp('New dOut: ', $dOut->toArray());
-
-        $dBeta = NumPower::sum($dOut, 1);
-//        pp('New dBeta: ', $dBeta->toArray());
-
-        $dGamma = NumPower::sum(NumPower::multiply($dOut, $this->xHat), 1);
-//        pp('New dGamma: ', $dGamma->toArray());
-
+        $dBeta = NumPower::sum($dOut, self::COLUMN_WISE);
+        $dGamma = NumPower::sum(NumPower::multiply($dOut, $this->xHat), self::COLUMN_WISE);
         $gamma = $this->gamma->param();
-        //pp('New Gamma: ', $gamma->toArray());
 
         $this->beta->update($dBeta, $optimizer);
         $this->gamma->update($dGamma, $optimizer);
@@ -305,13 +306,11 @@ public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
 
         $this->stdInv = $this->xHat = null;
 
-        $return  = new Deferred(
+        return new Deferred(
             [$this, 'gradient'],
             [$dOut, $gamma, $stdInv, $xHat]
         );
 
-        //pp('New back: ', $dOut->toArray(), $gamma->toArray(), $stdInv->toArray(), $xHat->toArray(), end: "\n");
-
         return $return;
     }
 
@@ -328,54 +327,26 @@ public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
      */
     public function gradient(NDArray $dOut, NDArray $gamma, NDArray $stdInv, NDArray $xHat) : NDArray
     {
-        // Implement the same formula using PHP arrays
-        $dOutArr = $dOut->toArray();
-        $gammaArr = $gamma->toArray(); // 1-D length n
-        $stdInvArr = $stdInv->toArray(); // 1-D length n
-        $xHatArr = $xHat->toArray(); // [m, n]
-
-        $m = count($dOutArr);
-        $n = $m > 0 ? count($dOutArr[0]) : 0;
-
-        // dXHat = dOut * gamma (per column)
-        $dXHatArr = [];
-        for ($i = 0; $i < $m; $i++) {
-            $row = [];
-            for ($j = 0; $j < $n; $j++) {
-                $row[] = $dOutArr[$i][$j] * $gammaArr[$j];
-            }
-            $dXHatArr[] = $row;
-        }
-
-        // xHatSigma = sum(dXHat * xHat) per column
-        $xHatSigma = array_fill(0, $n, 0.0);
-        $dXHatSigma = array_fill(0, $n, 0.0);
-        for ($j = 0; $j < $n; $j++) {
-            $sum1 = 0.0;
-            $sum2 = 0.0;
-            for ($i = 0; $i < $m; $i++) {
-                $sum1 += $dXHatArr[$i][$j] * $xHatArr[$i][$j];
-                $sum2 += $dXHatArr[$i][$j];
-            }
-            $xHatSigma[$j] = $sum1;
-            $dXHatSigma[$j] = $sum2;
-        }
-
-        // Compute gradient for previous layer per formula:
-        // dX = (dXHat * m - dXHatSigma - xHat * xHatSigma) * (stdInv / m)
-        $dXArr = [];
-        for ($i = 0; $i < $m; $i++) {
-            $row = [];
-            for ($j = 0; $j < $n; $j++) {
-                $val = ($dXHatArr[$i][$j] * $m)
-                    - $dXHatSigma[$j]
-                    - ($xHatArr[$i][$j] * $xHatSigma[$j]);
-                $row[] = $val * ($stdInvArr[$j] / ($m ?: 1));
-            }
-            $dXArr[] = $row;
-        }
+        $dXHat = NumPower::multiply($dOut, $gamma);
+        $xHatSigma = NumPower::sum(NumPower::multiply($dXHat, $xHat), self::COLUMN_WISE);
+        $dXHatSigma = NumPower::sum($dXHat, self::COLUMN_WISE);
+
+        // Number of rows
+        $m = $dOut->shape()[0];
+
+        // Compute gradient per formula: dX = (dXHat * m - dXHatSigma - xHat * xHatSigma) * (stdInv / m)
+        return NumPower::multiply(
+            NumPower::subtract(
+                NumPower::subtract(
+                    NumPower::multiply($dXHat, $m),
+                    NumPower::reshape($dXHatSigma, [$m, 1])
+                ),
+                NumPower::multiply($xHat, NumPower::reshape($xHatSigma, [$m, 1]))
+            ),
+            NumPower::reshape(NumPower::divide($stdInv, $m), [$m, 1])
+        );
 
-        return NumPower::array($dXArr);
+        return $return;
     }
 
     /**
diff --git a/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php b/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
index ad5fcdc07..9f05ab47b 100644
--- a/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
+++ b/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
@@ -8,11 +8,17 @@
 use PHPUnit\Framework\Attributes\Group;
 use NDArray;
 use NumPower;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use PHPUnit\Framework\Attributes\DataProvider;
 use Rubix\ML\Deferred;
 use Rubix\ML\NeuralNet\Layers\BatchNorm\BatchNorm;
 use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
 use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
 use Rubix\ML\NeuralNet\Initializers\Constant\Constant;
+use Rubix\ML\NeuralNet\Parameters\Parameter as TrainableParameter;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException as RubixRuntimeException;
 use PHPUnit\Framework\TestCase;
 
 #[Group('Layers')]
@@ -32,6 +38,83 @@ class BatchNormTest extends TestCase
 
     protected BatchNorm $layer;
 
+    /**
+     * @return array<string, array{0:int}>
+     */
+    public static function initializeProvider() : array
+    {
+        return [
+            'fanIn=3' => [3],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0:array}>
+     */
+    public static function forwardProvider() : array
+    {
+        return [
+            'expectedForward' => [[
+                [-0.1251222, 1.2825030, -1.1573808],
+                [-0.6708631, -0.7427414, 1.4136046],
+                [0.7974157, -1.4101899, 0.6127743],
+            ]],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0:array}>
+     */
+    public static function backProvider() : array
+    {
+        return [
+            'expectedGradient' => [[
+                [-0.0644587, 0.0272710, 0.0371877],
+                [0.1137590, -0.1099670, -0.0037919],
+                [-0.1190978, -0.0108703, 0.1299681],
+            ]],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0:array}>
+     */
+    public static function inferProvider() : array
+    {
+        return [
+            'expectedInfer' => [[
+                [-0.1251222, 1.2825031, -1.1573808],
+                [-0.6708631, -0.7427414, 1.4136046],
+                [0.7974158, -1.4101899, 0.6127743],
+            ]],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0:array}>
+     */
+    public static function gradientProvider() : array
+    {
+        return [
+            'expectedGradient' => [[
+                [-0.0644587, 0.0272710, 0.0371877],
+                [0.1137590, -0.1099670, -0.0037919],
+                [-0.1190978, -0.0108703, 0.1299681],
+            ]],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0:float}>
+     */
+    public static function badDecayProvider() : array
+    {
+        return [
+            'negative' => [-0.01],
+            'greaterThanOne' => [1.01],
+        ];
+    }
+
     protected function setUp() : void
     {
         $this->fanIn = 3;
@@ -59,45 +142,161 @@ protected function setUp() : void
         );
     }
 
-    public function testInitializeForwardBackInfer() : void
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
     {
-        $this->layer->initialize($this->fanIn);
+        self::assertEquals(
+            'Batch Norm (decay: 0.9, beta initializer: Constant (value: 0), gamma initializer: Constant (value: 1))',
+            (string) $this->layer
+        );
+    }
 
-        self::assertEquals($this->fanIn, $this->layer->width());
+    #[Test]
+    #[TestDox('Initializes width and returns fan out')]
+    #[DataProvider('initializeProvider')]
+    public function testInitialize(int $fanIn) : void
+    {
+        $fanOut = $this->layer->initialize($fanIn);
+        self::assertEquals($fanIn, $fanOut);
+        self::assertEquals($fanIn, $this->layer->width());
+    }
 
-        $expected = [
-            [-0.1251222, 1.2825030, -1.1573808],
-            [-0.6708631, -0.7427414, 1.4136046],
-            [0.7974157, -1.4101899, 0.6127743],
-        ];
+    #[Test]
+    #[TestDox('Computes forward pass')]
+    #[DataProvider('forwardProvider')]
+    public function testForward(array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
 
         $forward = $this->layer->forward($this->input);
 
         self::assertEqualsWithDelta($expected, $forward->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Backpropagates and returns gradient for previous layer')]
+    #[DataProvider('backProvider')]
+    public function testBack(array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+        $this->layer->forward($this->input);
 
         $gradient = $this->layer->back(
             prevGradient: $this->prevGrad,
             optimizer: $this->optimizer
         )->compute();
 
-        $expected = [
-            [-0.06445877134888621, 0.027271018647605647, 0.03718775270128047],
-            [0.11375900761901864, -0.10996704069838469, -0.0037919669206339162],
-            [-0.11909780311643131, -0.01087038130262698, 0.1299681844190583],
-        ];
-
         self::assertInstanceOf(NDArray::class, $gradient);
         self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Infers using running statistics')]
+    #[DataProvider('inferProvider')]
+    public function testInfer(array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+        // Perform a forward pass to set running mean/variance
+        $this->layer->forward($this->input);
+
+        $infer = $this->layer->infer($this->input);
+
+        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Throws when width is requested before initialization')]
+    public function testWidthThrowsBeforeInitialize() : void
+    {
+        $layer = new BatchNorm();
+        $this->expectException(RubixRuntimeException::class);
+        $layer->width();
+    }
+
+    #[Test]
+    #[TestDox('Constructor rejects invalid decay values')]
+    #[DataProvider('badDecayProvider')]
+    public function testConstructorRejectsInvalidDecay(float $decay) : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new BatchNorm(decay: $decay);
+    }
+
+    #[Test]
+    #[TestDox('Yields trainable parameters beta and gamma')]
+    public function testParameters() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $params = iterator_to_array($this->layer->parameters());
 
-//        $expected = [
-//            [-0.1260783, 1.2804902385302876, -1.1575619225761131],
-//            [-0.6718883801743488, -0.7438003494787433, 1.4135587296530918],
-//            [0.7956943312039361, -1.4105786650534555, 0.6111643338495193],
-//        ];
-//
-//        $infer = $this->layer->infer($this->input);
-//
-//        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
-//        self::assertTrue(true);
+        self::assertArrayHasKey('beta', $params);
+        self::assertArrayHasKey('gamma', $params);
+        self::assertInstanceOf(TrainableParameter::class, $params['beta']);
+        self::assertInstanceOf(TrainableParameter::class, $params['gamma']);
+
+        self::assertEquals([0.0, 0.0, 0.0], $params['beta']->param()->toArray());
+        self::assertEquals([1.0, 1.0, 1.0], $params['gamma']->param()->toArray());
+    }
+
+    #[Test]
+    #[TestDox('Restores parameters from array')]
+    public function testRestore() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $betaNew = new TrainableParameter(NumPower::full([3], 2.0));
+        $gammaNew = new TrainableParameter(NumPower::full([3], 3.0));
+
+        $this->layer->restore([
+            'beta' => $betaNew,
+            'gamma' => $gammaNew,
+        ]);
+
+        $restored = iterator_to_array($this->layer->parameters());
+        self::assertSame($betaNew, $restored['beta']);
+        self::assertSame($gammaNew, $restored['gamma']);
+        self::assertEquals([2.0, 2.0, 2.0], $restored['beta']->param()->toArray());
+        self::assertEquals([3.0, 3.0, 3.0], $restored['gamma']->param()->toArray());
+    }
+
+    #[Test]
+    #[TestDox('Computes gradient for previous layer directly')]
+    #[DataProvider('gradientProvider')]
+    public function testGradient(array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        // Compute forward-time caches manually to pass into gradient()
+        $input = $this->input;
+        $rows = $input->shape()[0];
+        $meanArr = [];
+        $varArr = [];
+        $stdInvArr = [];
+
+        for ($i = 0; $i < $rows; $i++) {
+            $row = $input->toArray()[$i];
+            $meanArr[$i] = NumPower::mean($row);
+            $varArr[$i] = NumPower::variance($row);
+            $stdInvArr[$i] = 1.0 / sqrt($varArr[$i]);
+        }
+
+        $mean = NumPower::array($meanArr);
+        $stdInv = NumPower::array($stdInvArr);
+
+        $xHat = NumPower::multiply(
+            NumPower::subtract(NumPower::transpose($input, [1, 0]), $mean),
+            $stdInv
+        );
+        $xHat = NumPower::transpose($xHat, [1, 0]);
+
+        // Use provided prevGrad as dOut and current gamma parameter
+        $dOut = ($this->prevGrad)();
+        $gamma = iterator_to_array($this->layer->parameters())['gamma']->param();
+
+        $gradient = $this->layer->gradient($dOut, $gamma, $stdInv, $xHat);
+
+        self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
     }
 }

From a44e86a9d4aa2f7e098861a094bf2b13d47d3e21 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 7 Dec 2025 19:01:46 +0200
Subject: [PATCH 05/36] ML-396 Refactored `BatchNorm` layer to optimize
 normalization logic with `NumPower` utilities

---
 src/NeuralNet/Layers/BatchNorm/BatchNorm.php  | 34 +++++++++----------
 .../Layers/BatchNorm/BatchNormTest.php        |  4 +--
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/src/NeuralNet/Layers/BatchNorm/BatchNorm.php b/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
index b7c170abb..1e29a81ec 100644
--- a/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
+++ b/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
@@ -190,29 +190,27 @@ public function forward(NDArray $input) : NDArray
             throw new RuntimeException('Layer has not been initialized.');
         }
 
-        $rows = $input->shape()[0];
-        $meanArr = [];
-        $varArr = [];
-        $stdInvArr = [];
-
-        for ($i = 0; $i < $rows; $i++) {
-            $meanArr[$i] = NumPower::mean($input->toArray()[$i]);
-            $varArr[$i] = NumPower::variance($input->toArray()[$i]);
-            $stdInvArr[$i] = 1.0 / sqrt($varArr[$i]);
-        }
+        // Shape: [m, n]
+        [$m, $n] = $input->shape();
+
+        // Row-wise mean across features (axis 1), length m
+        $sum = NumPower::sum($input, 1);
+        $mean = NumPower::divide($sum, $n);
 
-        $mean = NumPower::array($meanArr);
+        // Center the input: broadcast mean to [m, n]
+        $centered = NumPower::subtract($input, NumPower::reshape($mean, [$m, 1]));
 
-        $variance = NumPower::array($varArr);
+        // Row-wise variance across features (axis 1)
+        $centeredSq = NumPower::multiply($centered, $centered);
+        $varSum = NumPower::sum($centeredSq, 1);
+        $variance = NumPower::divide($varSum, $n);
         $variance = NumPower::clip($variance, EPSILON, PHP_FLOAT_MAX);
 
-        $stdInv = NumPower::array($stdInvArr);
+        // Inverse std from clipped variance
+        $stdInv = NumPower::reciprocal(NumPower::sqrt($variance));
 
-        $xHat = NumPower::multiply(
-            NumPower::subtract(NumPower::transpose($input, [1, 0]), $mean),
-            $stdInv
-        );
-        $xHat = NumPower::transpose($xHat, [1, 0]);
+        // Normalize: (x - mean) * stdInv
+        $xHat = NumPower::multiply($centered, NumPower::reshape($stdInv, [$m, 1]));
 
         // Initialize running stats if needed
         if (!$this->mean or !$this->variance) {
diff --git a/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php b/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
index 9f05ab47b..4912857e3 100644
--- a/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
+++ b/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
@@ -55,9 +55,9 @@ public static function forwardProvider() : array
     {
         return [
             'expectedForward' => [[
-                [-0.1251222, 1.2825030, -1.1573808],
+                [-0.1251222, 1.2825031, -1.1573808],
                 [-0.6708631, -0.7427414, 1.4136046],
-                [0.7974157, -1.4101899, 0.6127743],
+                [0.7974158, -1.4101899, 0.6127743],
             ]],
         ];
     }

From 5a3f5a718f2742ca9f7adcd66e6d5edfbbf5f405 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 7 Dec 2025 19:10:52 +0200
Subject: [PATCH 06/36] ML-397 Enhanced `BatchNorm` layer with improved axis
 constants, numerical stability during inference, and gradient computation
 logic

---
 src/NeuralNet/Layers/BatchNorm/BatchNorm.php | 28 +++++++++-----------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/src/NeuralNet/Layers/BatchNorm/BatchNorm.php b/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
index 1e29a81ec..7b786b1f0 100644
--- a/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
+++ b/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
@@ -110,7 +110,8 @@ class BatchNorm implements Hidden, Parametric
      *
      * @var int
      */
-    protected const int COLUMN_WISE = 1;
+    protected const int AXIS_SAMPLES = 0;
+    protected const int AXIS_FEATURES = 1;
 
     /**
      * @param float $decay
@@ -218,7 +219,8 @@ public function forward(NDArray $input) : NDArray
             $this->variance = $variance;
         }
 
-        // Update running mean/variance: running = running*(1-decay) + current*decay
+        // Update running mean/variance using exponential moving average (EMA)
+        // Convention: running = running*(1 - decay) + current*decay
         $this->mean = NumPower::add(
             NumPower::multiply($this->mean, 1.0 - $this->decay),
             NumPower::multiply($mean, $this->decay)
@@ -254,9 +256,11 @@ public function infer(NDArray $input) : NDArray
         // Number of rows
         $m = $input->shape()[0];
 
+        // Use clipped variance for numerical stability during inference
+        $varianceClipped = NumPower::clip($this->variance, EPSILON, PHP_FLOAT_MAX);
         $xHat = NumPower::divide(
             NumPower::subtract($input, NumPower::reshape($this->mean, [$m, 1])),
-            NumPower::reshape(NumPower::sqrt($this->variance), [$m, 1])
+            NumPower::reshape(NumPower::sqrt($varianceClipped), [$m, 1])
         );
 
         return NumPower::add(
@@ -266,8 +270,6 @@ public function infer(NDArray $input) : NDArray
             ),
             $this->beta->param()
         );
-
-        return $return;
     }
 
     /**
@@ -287,13 +289,13 @@ public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
         }
 
         if (!$this->stdInv or !$this->xHat) {
-            throw new RuntimeException('Must perform forward pass before'
-                . ' backpropagating.');
+            throw new RuntimeException('Must perform forward pass before backpropagating.');
         }
 
         $dOut = $prevGradient();
-        $dBeta = NumPower::sum($dOut, self::COLUMN_WISE);
-        $dGamma = NumPower::sum(NumPower::multiply($dOut, $this->xHat), self::COLUMN_WISE);
+        // Sum across samples (axis 0) for parameter gradients
+        $dBeta = NumPower::sum($dOut, self::AXIS_SAMPLES);
+        $dGamma = NumPower::sum(NumPower::multiply($dOut, $this->xHat), self::AXIS_SAMPLES);
         $gamma = $this->gamma->param();
 
         $this->beta->update($dBeta, $optimizer);
@@ -308,8 +310,6 @@ public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
             [$this, 'gradient'],
             [$dOut, $gamma, $stdInv, $xHat]
         );
-
-        return $return;
     }
 
     /**
@@ -326,8 +326,8 @@ public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
     public function gradient(NDArray $dOut, NDArray $gamma, NDArray $stdInv, NDArray $xHat) : NDArray
     {
         $dXHat = NumPower::multiply($dOut, $gamma);
-        $xHatSigma = NumPower::sum(NumPower::multiply($dXHat, $xHat), self::COLUMN_WISE);
-        $dXHatSigma = NumPower::sum($dXHat, self::COLUMN_WISE);
+        $xHatSigma = NumPower::sum(NumPower::multiply($dXHat, $xHat), self::AXIS_FEATURES);
+        $dXHatSigma = NumPower::sum($dXHat, self::AXIS_FEATURES);
 
         // Number of rows
         $m = $dOut->shape()[0];
@@ -343,8 +343,6 @@ public function gradient(NDArray $dOut, NDArray $gamma, NDArray $stdInv, NDArray
             ),
             NumPower::reshape(NumPower::divide($stdInv, $m), [$m, 1])
         );
-
-        return $return;
     }
 
     /**

From ec3c2362ebc7c20511944b7eeb76b491e7d8e602 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 7 Dec 2025 19:16:16 +0200
Subject: [PATCH 07/36] ML-397 Enhanced `BatchNorm` layer with improved axis
 constants, numerical stability during inference, and gradient computation
 logic

---
 src/NeuralNet/Layers/BatchNorm/BatchNorm.php | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/NeuralNet/Layers/BatchNorm/BatchNorm.php b/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
index 7b786b1f0..40eafbf3a 100644
--- a/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
+++ b/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
@@ -190,8 +190,7 @@ public function forward(NDArray $input) : NDArray
         if (!$this->beta or !$this->gamma) {
             throw new RuntimeException('Layer has not been initialized.');
         }
-
-        // Shape: [m, n]
+        
         [$m, $n] = $input->shape();
 
         // Row-wise mean across features (axis 1), length m
@@ -253,8 +252,7 @@ public function infer(NDArray $input) : NDArray
             throw new RuntimeException('Layer has not been initialized.');
         }
 
-        // Number of rows
-        $m = $input->shape()[0];
+        [$m, $n] = $input->shape();
 
         // Use clipped variance for numerical stability during inference
         $varianceClipped = NumPower::clip($this->variance, EPSILON, PHP_FLOAT_MAX);
@@ -329,8 +327,7 @@ public function gradient(NDArray $dOut, NDArray $gamma, NDArray $stdInv, NDArray
         $xHatSigma = NumPower::sum(NumPower::multiply($dXHat, $xHat), self::AXIS_FEATURES);
         $dXHatSigma = NumPower::sum($dXHat, self::AXIS_FEATURES);
 
-        // Number of rows
-        $m = $dOut->shape()[0];
+        [$m, $n] = $dOut->shape();
 
         // Compute gradient per formula: dX = (dXHat * m - dXHatSigma - xHat * xHatSigma) * (stdInv / m)
         return NumPower::multiply(

From 00955f981ec72df1d5fc2580a95bbd481131b3c3 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 7 Dec 2025 19:43:32 +0200
Subject: [PATCH 08/36] ML-398 Improved `BatchNorm` behavior for varying batch
 sizes with additional tests and updated shape handling

---
 src/NeuralNet/Layers/BatchNorm/BatchNorm.php  |  7 +-
 .../Layers/BatchNorm/BatchNormTest.php        | 90 +++++++++++++++++++
 2 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/src/NeuralNet/Layers/BatchNorm/BatchNorm.php b/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
index 40eafbf3a..a15b1fac5 100644
--- a/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
+++ b/src/NeuralNet/Layers/BatchNorm/BatchNorm.php
@@ -32,6 +32,7 @@
  * @category    Machine Learning
  * @package     Rubix/ML
  * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
  */
 class BatchNorm implements Hidden, Parametric
 {
@@ -190,7 +191,7 @@ public function forward(NDArray $input) : NDArray
         if (!$this->beta or !$this->gamma) {
             throw new RuntimeException('Layer has not been initialized.');
         }
-        
+
         [$m, $n] = $input->shape();
 
         // Row-wise mean across features (axis 1), length m
@@ -252,7 +253,7 @@ public function infer(NDArray $input) : NDArray
             throw new RuntimeException('Layer has not been initialized.');
         }
 
-        [$m, $n] = $input->shape();
+        $m = $input->shape()[0];
 
         // Use clipped variance for numerical stability during inference
         $varianceClipped = NumPower::clip($this->variance, EPSILON, PHP_FLOAT_MAX);
@@ -327,7 +328,7 @@ public function gradient(NDArray $dOut, NDArray $gamma, NDArray $stdInv, NDArray
         $xHatSigma = NumPower::sum(NumPower::multiply($dXHat, $xHat), self::AXIS_FEATURES);
         $dXHatSigma = NumPower::sum($dXHat, self::AXIS_FEATURES);
 
-        [$m, $n] = $dOut->shape();
+        $m = $dOut->shape()[0];
 
         // Compute gradient per formula: dX = (dXHat * m - dXHatSigma - xHat * xHatSigma) * (stdInv / m)
         return NumPower::multiply(
diff --git a/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php b/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
index 4912857e3..e926782f8 100644
--- a/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
+++ b/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
@@ -90,6 +90,30 @@ public static function inferProvider() : array
         ];
     }
 
+    /**
+     * Additional inputs to validate behavior across different batch sizes.
+     *
+     * @return array<string, array{0:array}>
+     */
+    public static function batchInputsProvider() : array
+    {
+        return [
+            'batch1x3' => [[
+                [2.0, -1.0, 0.0],
+            ]],
+            'batch2x3' => [[
+                [1.0, 2.0, 3.0],
+                [3.0, 3.0, 3.0],
+            ]],
+            'batch4x3' => [[
+                [0.5, -0.5, 1.5],
+                [10.0, -10.0, 0.0],
+                [7.2, 3.3, -2.4],
+                [-1.0, -2.0, 4.0],
+            ]],
+        ];
+    }
+
     /**
      * @return array<string, array{0:array}>
      */
@@ -205,6 +229,36 @@ public function testInfer(array $expected) : void
         self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
     }
 
+    #[Test]
+    #[TestDox('Computes forward pass (row-wise) with zero mean and unit variance per sample for various batch sizes')]
+    #[DataProvider('batchInputsProvider')]
+    public function testForwardStatsMultipleBatches(array $input) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $forward = $this->layer->forward(NumPower::array($input));
+        $out = $forward->toArray();
+
+        // Check per-row mean ~ 0 and variance ~ 1 (allow 0 for degenerate rows)
+        $this->assertRowwiseStats($input, $out, true);
+    }
+
+    #[Test]
+    #[TestDox('Infers (row-wise) with zero mean and unit variance per sample for various batch sizes')]
+    #[DataProvider('batchInputsProvider')]
+    public function testInferStatsMultipleBatches(array $input) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        // Perform a forward pass on the same input to initialize running stats
+        $this->layer->forward(NumPower::array($input));
+
+        $infer = $this->layer->infer(NumPower::array($input));
+        $out = $infer->toArray();
+
+        $this->assertRowwiseStats($input, $out, false);
+    }
+
     #[Test]
     #[TestDox('Throws when width is requested before initialization')]
     public function testWidthThrowsBeforeInitialize() : void
@@ -299,4 +353,40 @@ public function testGradient(array $expected) : void
 
         self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
     }
+
+    /**
+     * @param array<int, array<int, float>> $inputRows
+     * @param array<int, array<int, float>> $outRows
+     */
+    private function assertRowwiseStats(array $inputRows, array $outRows, bool $checkMean) : void
+    {
+        foreach ($outRows as $i => $row) {
+            $mean = array_sum($row) / count($row);
+            $var = 0.0;
+            foreach ($row as $v) {
+                $var += ($v - $mean) * ($v - $mean);
+            }
+            $var /= count($row);
+
+            $orig = $inputRows[$i];
+            $origMean = array_sum($orig) / count($orig);
+            $origVar = 0.0;
+            foreach ($orig as $ov) {
+                $origVar += ($ov - $origMean) * ($ov - $origMean);
+            }
+            $origVar /= count($orig);
+
+            $expectedVar = $origVar < 1e-12 ? 0.0 : 1.0;
+
+            if ($checkMean) {
+                self::assertEqualsWithDelta(0.0, $mean, 1e-7);
+            }
+
+            if ($expectedVar === 0.0) {
+                self::assertLessThan(5e-3, $var);
+            } else {
+                self::assertEqualsWithDelta(1.0, $var, 1e-6);
+            }
+        }
+    }
 }

From 3fb79a919b06941e45dbf79a3c4f02ae2f4cc5c6 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 7 Dec 2025 19:46:00 +0200
Subject: [PATCH 09/36] ML-398 Improved `BatchNorm` behavior for varying batch
 sizes with additional tests and updated shape handling

---
 tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php b/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
index e926782f8..dd5380941 100644
--- a/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
+++ b/tests/NeuralNet/Layers/BatchNorm/BatchNormTest.php
@@ -11,6 +11,7 @@
 use PHPUnit\Framework\Attributes\Test;
 use PHPUnit\Framework\Attributes\TestDox;
 use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\MockObject\Rule\Parameters;
 use Rubix\ML\Deferred;
 use Rubix\ML\NeuralNet\Layers\BatchNorm\BatchNorm;
 use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
@@ -383,7 +384,7 @@ private function assertRowwiseStats(array $inputRows, array $outRows, bool $chec
             }
 
             if ($expectedVar === 0.0) {
-                self::assertLessThan(5e-3, $var);
+                self::assertLessThan(1e-6, $var);
             } else {
                 self::assertEqualsWithDelta(1.0, $var, 1e-6);
             }

From b4e507d18aab5b8580ca17bc446fa11fa19679d7 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 7 Dec 2025 22:53:33 +0200
Subject: [PATCH 10/36] ML-399 Added `Binary` output layer and comprehensive
 unit tests with interface definition for output layers.

---
 .../Layers/Base/Contracts/Output.php          |  29 +++
 src/NeuralNet/Layers/Binary/Binary.php        | 222 ++++++++++++++++++
 tests/NeuralNet/Layers/Binary/BinaryTest.php  | 192 +++++++++++++++
 3 files changed, 443 insertions(+)
 create mode 100644 src/NeuralNet/Layers/Base/Contracts/Output.php
 create mode 100644 src/NeuralNet/Layers/Binary/Binary.php
 create mode 100644 tests/NeuralNet/Layers/Binary/BinaryTest.php

diff --git a/src/NeuralNet/Layers/Base/Contracts/Output.php b/src/NeuralNet/Layers/Base/Contracts/Output.php
new file mode 100644
index 000000000..49e11bb4b
--- /dev/null
+++ b/src/NeuralNet/Layers/Base/Contracts/Output.php
@@ -0,0 +1,29 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Base\Contracts;
+
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\Exceptions\RuntimeException;
+
+/**
+ * Output
+ *
+ * @internal
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+interface Output extends Layer
+{
+    /**
+     * Compute the gradient and loss at the output.
+     *
+     * @param (string|int|float)[] $labels
+     * @param Optimizer $optimizer
+     * @throws RuntimeException
+     * @return mixed[]
+     */
+    public function back(array $labels, Optimizer $optimizer) : array;
+}
diff --git a/src/NeuralNet/Layers/Binary/Binary.php b/src/NeuralNet/Layers/Binary/Binary.php
new file mode 100644
index 000000000..37b6f145b
--- /dev/null
+++ b/src/NeuralNet/Layers/Binary/Binary.php
@@ -0,0 +1,222 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Binary;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Output;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy\CrossEntropy;
+use Rubix\ML\NeuralNet\ActivationFunctions\Sigmoid\Sigmoid;
+use Rubix\ML\NeuralNet\CostFunctions\Base\Contracts\ClassificationLoss;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+
+use function count;
+
+/**
+ * Binary
+ *
+ * This Binary layer consists of a single sigmoid neuron capable of distinguishing between
+ * two discrete classes.
+ *
+ * @internal
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Binary implements Output
+{
+    /**
+     * The labels of either of the possible outcomes.
+     *
+     * @var float[]
+     */
+    protected array $classes = [
+        //
+    ];
+
+    /**
+     * The function that computes the loss of erroneous activations.
+     *
+     * @var ClassificationLoss
+     */
+    protected ClassificationLoss $costFn;
+
+    /**
+     * The sigmoid activation function.
+     *
+     * @var Sigmoid
+     */
+    protected Sigmoid $sigmoid;
+
+    /**
+     * The memorized input matrix.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $input = null;
+
+    /**
+     * The memorized activation matrix.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $output = null;
+
+    /**
+     * @param string[] $classes
+     * @param ClassificationLoss|null $costFn
+     * @throws InvalidArgumentException
+     */
+    public function __construct(array $classes, ?ClassificationLoss $costFn = null)
+    {
+        $classes = array_values(array_unique($classes));
+
+        if (count($classes) !== 2) {
+            throw new InvalidArgumentException('Number of classes must be 2, ' . count($classes) . ' given.');
+        }
+
+        $classes = [
+            $classes[0] => 0.0,
+            $classes[1] => 1.0,
+        ];
+
+        $this->classes = $classes;
+        $this->costFn = $costFn ?? new CrossEntropy();
+        $this->sigmoid = new Sigmoid();
+    }
+
+    /**
+     * Return the width of the layer.
+     *
+     * @return positive-int
+     */
+    public function width() : int
+    {
+        return 1;
+    }
+
+    /**
+     * Initialize the layer with the fan in from the previous layer and return
+     * the fan out for this layer.
+     *
+     * @param positive-int $fanIn
+     * @throws InvalidArgumentException
+     * @return positive-int
+     */
+    public function initialize(int $fanIn) : int
+    {
+        if ($fanIn !== 1) {
+            throw new InvalidArgumentException("Fan in must be equal to 1, $fanIn given.");
+        }
+
+        return 1;
+    }
+
+    /**
+     * Compute a forward pass through the layer.
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function forward(NDArray $input) : NDArray
+    {
+        $output = $this->sigmoid->activate($input);
+
+        $this->input = $input;
+        $this->output = $output;
+
+        return $output;
+    }
+
+    /**
+     * Compute an inferential pass through the layer.
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function infer(NDArray $input) : NDArray
+    {
+        return $this->sigmoid->activate($input);
+    }
+
+    /**
+     * Compute the gradient and loss at the output.
+     *
+     * @param string[] $labels
+     * @param Optimizer $optimizer
+     * @throws RuntimeException
+     * @return (Deferred|float)[]
+     */
+    public function back(array $labels, Optimizer $optimizer) : array
+    {
+        if (!$this->input or !$this->output) {
+            throw new RuntimeException('Must perform forward pass before backpropagating.');
+        }
+
+        $expected = [];
+
+        foreach ($labels as $label) {
+            $expected[] = $this->classes[$label];
+        }
+
+        $expected = NumPower::array([$expected]);
+
+        $input = $this->input;
+        $output = $this->output;
+
+        $gradient = new Deferred([$this, 'gradient'], [$input, $output, $expected]);
+
+        $loss = $this->costFn->compute($output, $expected);
+
+        $this->input = $this->output = null;
+
+        return [$gradient, $loss];
+    }
+
+    /**
+     * Calculate the gradient for the previous layer.
+     *
+     * @param NDArray $input
+     * @param NDArray $output
+     * @param NDArray $expected
+     * @return NDArray
+     */
+    public function gradient(NDArray $input, NDArray $output, NDArray $expected) : NDArray
+    {
+        $n = $output->shape()[1];
+
+        if ($this->costFn instanceof CrossEntropy) {
+            return NumPower::divide(
+                NumPower::subtract($output, $expected),
+                $n
+            );
+        }
+
+        $dLoss = NumPower::divide(
+            $this->costFn->differentiate($output, $expected),
+            $n
+        );
+
+        return NumPower::multiply(
+            $this->sigmoid->differentiate($output),
+            $dLoss
+        );
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Binary (cost function: {$this->costFn})";
+    }
+}
diff --git a/tests/NeuralNet/Layers/Binary/BinaryTest.php b/tests/NeuralNet/Layers/Binary/BinaryTest.php
new file mode 100644
index 000000000..645d7c86b
--- /dev/null
+++ b/tests/NeuralNet/Layers/Binary/BinaryTest.php
@@ -0,0 +1,192 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace Rubix\ML\Tests\NeuralNet\Layers\Binary;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use PHPUnit\Framework\Attributes\DataProvider;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use NDArray;
+use NumPower;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Layers\Binary\Binary;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy\CrossEntropy;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Layers')]
+#[CoversClass(Binary::class)]
+class BinaryTest extends TestCase
+{
+    protected NDArray $input;
+
+    /**
+     * @var string[]
+     */
+    protected array $labels;
+
+    protected Optimizer $optimizer;
+
+    protected Binary $layer;
+
+    /**
+     * @return array<int, array{0:array}>
+     */
+    public static function forwardProvider() : array
+    {
+        return [
+            [
+                [
+                    [0.7310585, 0.9241418, 0.4750207],
+                ],
+            ],
+        ];
+    }
+
+    /**
+     * @return array<int, array{0:array}>
+     */
+    public static function backProvider() : array
+    {
+        return [
+            [
+                [
+                    [0.2436861, -0.0252860, 0.1583402],
+                ],
+            ],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0: array<int, string>}> 
+     */
+    public static function badClassesProvider() : array
+    {
+        return [
+            'empty' => [[]],
+            'single' => [['hot']],
+            'duplicatesToOne' => [['hot', 'hot']],
+            'threeUnique' => [['hot', 'cold', 'warm']],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->input = NumPower::array([
+            [1.0, 2.5, -0.1],
+        ]);
+
+        $this->labels = ['hot', 'cold', 'hot'];
+
+        $this->optimizer = new Stochastic(0.001);
+
+        $this->layer = new Binary(classes: ['hot', 'cold'], costFn: new CrossEntropy());
+    }
+
+    #[Test]
+    #[TestDox('Returns string representation')]
+    public function testToString() : void
+    {
+        $this->layer->initialize(1);
+
+        self::assertEquals('Binary (cost function: Cross Entropy)', (string) $this->layer);
+    }
+
+    #[Test]
+    #[TestDox('Initializes and reports width')]
+    public function testInitializeWidth() : void
+    {
+        $this->layer->initialize(1);
+        self::assertEquals(1, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Constructor rejects invalid classes arrays')]
+    #[DataProvider('badClassesProvider')]
+    public function testConstructorRejectsInvalidClasses(array $classes) : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Binary(classes: $classes, costFn: new CrossEntropy());
+    }
+
+    #[Test]
+    #[TestDox('Constructor accepts classes arrays that dedupe to exactly 2 labels')]
+    public function testConstructorAcceptsDuplicateClassesThatDedupeToTwo() : void
+    {
+        $layer = new Binary(classes: ['hot', 'cold', 'hot'], costFn: new CrossEntropy());
+        // Should initialize without throwing and report correct width
+        $layer->initialize(1);
+        self::assertEquals(1, $layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Computes forward pass')]
+    #[DataProvider('forwardProvider')]
+    public function testForward(array $expected) : void
+    {
+        $this->layer->initialize(1);
+
+        $forward = $this->layer->forward($this->input);
+        self::assertEqualsWithDelta($expected, $forward->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Backpropagates and returns gradient for previous layer')]
+    #[DataProvider('backProvider')]
+    public function testBack(array $expectedGradient) : void
+    {
+        $this->layer->initialize(1);
+        $this->layer->forward($this->input);
+
+        [$computation, $loss] = $this->layer->back(labels: $this->labels, optimizer: $this->optimizer);
+
+        self::assertInstanceOf(Deferred::class, $computation);
+        self::assertIsFloat($loss);
+
+        $gradient = $computation->compute();
+
+        self::assertInstanceOf(NDArray::class, $gradient);
+        self::assertEqualsWithDelta($expectedGradient, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes gradient directly given input, output, expected, and batch size')]
+    #[DataProvider('backProvider')]
+    public function testGradient(array $expectedGradient) : void
+    {
+        $this->layer->initialize(1);
+
+        $input = $this->input;
+        $output = $this->layer->forward($input);
+
+        // Build expected NDArray (1, batch) using the Binary classes mapping: hot=>0.0, cold=>1.0
+        $expected = [];
+        foreach ($this->labels as $label) {
+            $expected[] = ($label === 'cold') ? 1.0 : 0.0;
+        }
+        $expected = NumPower::array([$expected]);
+
+        $batchSize = count($this->labels);
+
+        $gradient = $this->layer->gradient($input, $output, $expected, $batchSize);
+
+        self::assertInstanceOf(NDArray::class, $gradient);
+        self::assertEqualsWithDelta($expectedGradient, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes inference activations')]
+    #[DataProvider('forwardProvider')]
+    public function testInfer(array $expected) : void
+    {
+        $this->layer->initialize(1);
+
+        $infer = $this->layer->infer($this->input);
+        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+    }
+}

From d5f7c5778fb4cf572b065a4a8dc61db0213082ec Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 7 Dec 2025 23:18:14 +0200
Subject: [PATCH 11/36] ML-400 Added `Continuous` output layer with complete
 interface implementation and unit tests

---
 .../Layers/Continuous/Continuous.php          | 157 +++++++++++++++++
 .../Layers/Continuous/ContinuousTest.php      | 159 ++++++++++++++++++
 2 files changed, 316 insertions(+)
 create mode 100644 src/NeuralNet/Layers/Continuous/Continuous.php
 create mode 100644 tests/NeuralNet/Layers/Continuous/ContinuousTest.php

diff --git a/src/NeuralNet/Layers/Continuous/Continuous.php b/src/NeuralNet/Layers/Continuous/Continuous.php
new file mode 100644
index 000000000..7a07e9735
--- /dev/null
+++ b/src/NeuralNet/Layers/Continuous/Continuous.php
@@ -0,0 +1,157 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Continuous;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Output;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\CostFunctions\LeastSquares\LeastSquares;
+use Rubix\ML\NeuralNet\CostFunctions\Base\Contracts\RegressionLoss;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+
+/**
+ * Continuous
+ *
+ * The Continuous output layer consists of a single linear neuron that outputs a scalar value.
+ *
+ * @internal
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Continuous implements Output
+{
+    /**
+     * The function that computes the loss of erroneous activations.
+     *
+     * @var RegressionLoss
+     */
+    protected RegressionLoss $costFn;
+
+    /**
+     * The memorized input matrix.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $input = null;
+
+    /**
+     * @param RegressionLoss|null $costFn
+     */
+    public function __construct(?RegressionLoss $costFn = null)
+    {
+        $this->costFn = $costFn ?? new LeastSquares();
+    }
+
+    /**
+     * Return the width of the layer.
+     *
+     * @return positive-int
+     */
+    public function width() : int
+    {
+        return 1;
+    }
+
+    /**
+     * Initialize the layer with the fan in from the previous layer and return
+     * the fan out for this layer.
+     *
+     * @param positive-int $fanIn
+     * @throws InvalidArgumentException
+     * @return positive-int
+     */
+    public function initialize(int $fanIn) : int
+    {
+        if ($fanIn !== 1) {
+            throw new InvalidArgumentException("Fan in must be equal to 1, $fanIn given.");
+        }
+
+        return 1;
+    }
+
+    /**
+     * Compute a forward pass through the layer.
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function forward(NDArray $input) : NDArray
+    {
+        $this->input = $input;
+
+        return $input;
+    }
+
+    /**
+     * Compute an inferential pass through the layer.
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function infer(NDArray $input) : NDArray
+    {
+        return $input;
+    }
+
+    /**
+     * Compute the gradient and loss at the output.
+     *
+     * @param (int|float)[] $labels
+     * @param Optimizer $optimizer
+     * @throws RuntimeException
+     * @return (Deferred|float)[]
+     */
+    public function back(array $labels, Optimizer $optimizer) : array
+    {
+        if (!$this->input) {
+            throw new RuntimeException('Must perform forward pass before backpropagating.');
+        }
+
+        $expected = NumPower::array([$labels]);
+
+        $input = $this->input;
+
+        $gradient = new Deferred([$this, 'gradient'], [$input, $expected]);
+
+        $loss = $this->costFn->compute($input, $expected);
+
+        $this->input = null;
+
+        return [$gradient, $loss];
+    }
+
+    /**
+     * Calculate the gradient for the previous layer.
+     *
+     * @param NDArray $input
+     * @param NDArray $expected
+     * @return NDArray
+     */
+    public function gradient(NDArray $input, NDArray $expected) : NDArray
+    {
+        $n = $input->shape()[1];
+
+        return NumPower::divide(
+            $this->costFn->differentiate($input, $expected),
+            $n
+        );
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Continuous (cost function: {$this->costFn})";
+    }
+}
diff --git a/tests/NeuralNet/Layers/Continuous/ContinuousTest.php b/tests/NeuralNet/Layers/Continuous/ContinuousTest.php
new file mode 100644
index 000000000..39592cdcb
--- /dev/null
+++ b/tests/NeuralNet/Layers/Continuous/ContinuousTest.php
@@ -0,0 +1,159 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Layers\Continuous;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use PHPUnit\Framework\Attributes\DataProvider;
+use NDArray;
+use NumPower;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Layers\Continuous\Continuous;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use Rubix\ML\NeuralNet\CostFunctions\LeastSquares\LeastSquares;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Layers')]
+#[CoversClass(Continuous::class)]
+class ContinuousTest extends TestCase
+{
+    protected NDArray $input;
+
+    /**
+     * @var (int|float)[]
+     */
+    protected array $labels;
+
+    protected Optimizer $optimizer;
+
+    protected Continuous $layer;
+
+    /**
+     * @return array<int, array{0: array}>
+     */
+    public static function forwardProvider() : array
+    {
+        return [
+            [
+                [
+                    [2.5, 0.0, -6.0],
+                ],
+            ],
+        ];
+    }
+
+    /**
+     * @return array<int, array{0: array}>
+     */
+    public static function gradientProvider() : array
+    {
+        return [
+            [
+                [
+                    [0.8333333, 0.8333333, -32.0],
+                ],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->input = NumPower::array([
+            [2.5, 0.0, -6.0],
+        ]);
+
+        $this->labels = [0.0, -2.5, 90.0];
+
+        $this->optimizer = new Stochastic(0.001);
+
+        $this->layer = new Continuous(new LeastSquares());
+    }
+
+    #[Test]
+    #[TestDox('Returns string representation')]
+    public function testToString() : void
+    {
+        $this->layer->initialize(1);
+
+        self::assertEquals('Continuous (cost function: Least Squares)', (string) $this->layer);
+    }
+
+    #[Test]
+    #[TestDox('Initializes and reports width')]
+    public function testInitializeWidth() : void
+    {
+        $this->layer->initialize(1);
+        self::assertEquals(1, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Initialize rejects fan-in not equal to 1')]
+    public function testInitializeRejectsInvalidFanIn() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        $this->layer->initialize(2);
+    }
+
+    #[Test]
+    #[TestDox('Computes forward pass')]
+    #[DataProvider('forwardProvider')]
+    public function testForward(array $expected) : void
+    {
+        $this->layer->initialize(1);
+
+        $forward = $this->layer->forward($this->input);
+        self::assertEqualsWithDelta($expected, $forward->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Backpropagates and returns gradient for previous layer')]
+    #[DataProvider('gradientProvider')]
+    public function testBack(array $expectedGradient) : void
+    {
+        $this->layer->initialize(1);
+        $this->layer->forward($this->input);
+
+        [$computation, $loss] = $this->layer->back(labels: $this->labels, optimizer: $this->optimizer);
+
+        self::assertInstanceOf(Deferred::class, $computation);
+        self::assertIsFloat($loss);
+
+        $gradient = $computation->compute();
+
+        self::assertInstanceOf(NDArray::class, $gradient);
+        self::assertEqualsWithDelta($expectedGradient, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes gradient directly given input and expected')]
+    #[DataProvider('gradientProvider')]
+    public function testGradient(array $expectedGradient) : void
+    {
+        $this->layer->initialize(1);
+
+        $input = $this->input;
+        $expected = NumPower::array([$this->labels]);
+
+        $gradient = $this->layer->gradient($input, $expected);
+
+        self::assertInstanceOf(NDArray::class, $gradient);
+        self::assertEqualsWithDelta($expectedGradient, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes inference activations')]
+    #[DataProvider('forwardProvider')]
+    public function testInfer(array $expected) : void
+    {
+        $this->layer->initialize(1);
+
+        $infer = $this->layer->infer($this->input);
+        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+    }
+}

From cca97f96faaa213ba8f0d1b7a9e119c0925ad762 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Wed, 10 Dec 2025 20:23:52 +0200
Subject: [PATCH 12/36] ML-392 Added `Dense` hidden layer implementation with
 complete forward/backward passes

---
 docs/neural-network/hidden-layers/dense.md |   6 +-
 src/NeuralNet/Layers/Dense/Dense.php       | 348 +++++++++++++++++++++
 tests/NeuralNet/Layers/Dense/DenseTest.php | 308 ++++++++++++++++++
 3 files changed, 659 insertions(+), 3 deletions(-)
 create mode 100644 src/NeuralNet/Layers/Dense/Dense.php
 create mode 100644 tests/NeuralNet/Layers/Dense/DenseTest.php

diff --git a/docs/neural-network/hidden-layers/dense.md b/docs/neural-network/hidden-layers/dense.md
index cf4a7bd4c..db382d0a0 100644
--- a/docs/neural-network/hidden-layers/dense.md
+++ b/docs/neural-network/hidden-layers/dense.md
@@ -1,4 +1,4 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/Dense.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/Dense/Dense.php">[source]</a></span>
 
 # Dense
 Dense (or *fully connected*) hidden layers are layers of neurons that connect to each node in the previous layer by a parameterized synapse. They perform a linear transformation on their input and are usually followed by an [Activation](activation.md) layer. The majority of the trainable parameters in a standard feed forward neural network are contained within Dense hidden layers.
@@ -14,9 +14,9 @@ Dense (or *fully connected*) hidden layers are layers of neurons that connect to
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Layers\Dense;
+use Rubix\ML\NeuralNet\Layers\Dense\Dense;
 use Rubix\ML\NeuralNet\Initializers\He;
 use Rubix\ML\NeuralNet\Initializers\Constant;
 
 $layer = new Dense(100, 1e-4, true, new He(), new Constant(0.0));
-```
\ No newline at end of file
+```
diff --git a/src/NeuralNet/Layers/Dense/Dense.php b/src/NeuralNet/Layers/Dense/Dense.php
new file mode 100644
index 000000000..ee62d1e75
--- /dev/null
+++ b/src/NeuralNet/Layers/Dense/Dense.php
@@ -0,0 +1,348 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Dense;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Initializers\He\HeUniform;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Parametric;
+use Rubix\ML\Deferred;
+use Rubix\ML\Helpers\Params;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\NeuralNet\Initializers\He;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Initializers\Constant\Constant;
+use Rubix\ML\NeuralNet\Initializers\Base\Initializer;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+use Generator;
+
+/**
+ * Dense
+ *
+ * Dense (or *fully connected*) hidden layers are layers of neurons that connect to each node
+ * in the previous layer by a parameterized synapse. They perform a linear transformation on
+ * their input and are usually followed by an Activation layer. The majority of the trainable
+ * parameters in a standard feed-forward neural network are contained within Dense hidden layers.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Dense implements Hidden, Parametric
+{
+    /**
+     * The number of nodes in the layer.
+     *
+     * @var positive-int
+     */
+    protected int $neurons;
+
+    /**
+     * The amount of L2 regularization applied to the weights.
+     *
+     * @var float
+     */
+    protected float $l2Penalty;
+
+    /**
+     * Should the layer include a bias parameter?
+     *
+     * @var bool
+     */
+    protected bool $bias;
+
+    /**
+     * The weight initializer.
+     *
+     * @var Initializer
+     */
+    protected Initializer $weightInitializer;
+
+    /**
+     * The bias initializer.
+     *
+     * @var Initializer
+     */
+    protected Initializer $biasInitializer;
+
+    /**
+     * The weights.
+     *
+     * @var Parameter|null
+     */
+    protected ?Parameter $weights = null;
+
+    /**
+     * The biases.
+     *
+     * @var Parameter|null
+     */
+    protected ?Parameter $biases = null;
+
+    /**
+     * The memorized inputs to the layer.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $input = null;
+
+    /**
+     * @param int $neurons
+     * @param float $l2Penalty
+     * @param bool $bias
+     * @param Initializer|null $weightInitializer
+     * @param Initializer|null $biasInitializer
+     * @throws InvalidArgumentException
+     */
+    public function __construct(
+        int $neurons,
+        float $l2Penalty = 0.0,
+        bool $bias = true,
+        ?Initializer $weightInitializer = null,
+        ?Initializer $biasInitializer = null
+    ) {
+        if ($neurons < 1) {
+            throw new InvalidArgumentException("Number of neurons must be greater than 0, $neurons given.");
+        }
+
+        if ($l2Penalty < 0.0) {
+            throw new InvalidArgumentException("L2 Penalty must be greater than 0, $l2Penalty given.");
+        }
+
+        $this->neurons = $neurons;
+        $this->l2Penalty = $l2Penalty;
+        $this->bias = $bias;
+        $this->weightInitializer = $weightInitializer ?? new HeUniform();
+        $this->biasInitializer = $biasInitializer ?? new Constant(0.0);
+    }
+
+    /**
+     * Return the width of the layer.
+     *
+     * @internal
+     *
+     * @return positive-int
+     */
+    public function width() : int
+    {
+        return $this->neurons;
+    }
+
+    /**
+     * Return the weight matrix.
+     *
+     * @internal
+     *
+     * @throws RuntimeException
+     * @return NDArray
+     */
+    public function weights() : NDArray
+    {
+        if (!$this->weights) {
+            throw new RuntimeException('Layer is not initialized');
+        }
+
+        return $this->weights->param();
+    }
+
+    /**
+     * Initialize the layer with the fan in from the previous layer and return
+     * the fan out for this layer.
+     *
+     * @internal
+     *
+     * @param positive-int $fanIn
+     * @return positive-int
+     */
+    public function initialize(int $fanIn) : int
+    {
+        $fanOut = $this->neurons;
+
+        $weights = $this->weightInitializer->initialize($fanIn, $fanOut);
+
+        $this->weights = new Parameter($weights);
+
+        if ($this->bias) {
+            // Initialize biases as a vector of length fanOut
+            $biasMat = $this->biasInitializer->initialize(1, $fanOut);
+            $biases = NumPower::flatten($biasMat);
+
+            $this->biases = new Parameter($biases);
+        }
+
+        return $fanOut;
+    }
+
+    /**
+     * Compute a forward pass through the layer.
+     *
+     * @param NDArray $input
+     * @return NDArray
+     * @internal
+     *
+     */
+    public function forward(NDArray $input) : NDArray
+    {
+        if (!$this->weights) {
+            throw new RuntimeException('Layer is not initialized');
+        }
+
+        $output = NumPower::matmul($this->weights->param(), $input);
+
+        if ($this->biases) {
+            // Reshape bias vector [fanOut] to column [fanOut, 1] to match output [fanOut, n]
+            $bias = NumPower::reshape($this->biases->param(), [$this->neurons, 1]);
+            // Manual “broadcast”: [neurons, n] + [neurons, 1]
+            $output = NumPower::add($output, $bias);
+        }
+
+        $this->input = $input;
+
+        return $output;
+    }
+
+    /**
+     * Compute an inference pass through the layer.
+     *
+     * @param NDArray $input
+     * @return NDArray
+     * @internal
+     *
+     */
+    public function infer(NDArray $input) : NDArray
+    {
+        if (!$this->weights) {
+            throw new RuntimeException('Layer is not initialized');
+        }
+
+        $output = NumPower::matmul($this->weights->param(), $input);
+
+        if ($this->biases) {
+            // Reshape bias vector [fanOut] to column [fanOut, 1] to match output [fanOut, n]
+            $bias = NumPower::reshape($this->biases->param(), [$this->neurons, 1]);
+            // Manual “broadcast”: [neurons, n] + [neurons, 1]
+            $output = NumPower::add($output, $bias);
+        }
+
+        return $output;
+    }
+
+    /**
+     * Calculate the gradient and update the parameters of the layer.
+     *
+     * @internal
+     *
+     * @param Deferred $prevGradient
+     * @param Optimizer $optimizer
+     * @throws RuntimeException
+     * @return Deferred
+     */
+    public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
+    {
+        if (!$this->weights) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        if (!$this->input) {
+            throw new RuntimeException('Must perform forward pass before backpropagating.');
+        }
+
+        /** @var NDArray $dOut */
+        $dOut = $prevGradient();
+
+        $inputT = NumPower::transpose($this->input, [1, 0]);
+
+        $dW = NumPower::matmul($dOut, $inputT);
+
+        $weights = $this->weights->param();
+
+        if ($this->l2Penalty) {
+            $dW = NumPower::add(
+                $dW,
+                NumPower::multiply($weights, $this->l2Penalty)
+            );
+        }
+
+        $this->weights->update($dW, $optimizer);
+
+        if ($this->biases) {
+            // Sum gradients over the batch dimension to obtain a bias gradient
+            // with the same shape as the bias vector [neurons]
+            $dB = NumPower::sum($dOut, axis: 1);
+
+            $this->biases->update($dB, $optimizer);
+        }
+
+        $this->input = null;
+
+        return new Deferred([$this, 'gradient'], [$weights, $dOut]);
+    }
+
+    /**
+     * Calculate the gradient for the previous layer.
+     *
+     * @internal
+     *
+     * @param NDArray $weights
+     * @param NDArray $dOut
+     * @return NDArray
+     */
+    public function gradient(NDArray $weights, NDArray $dOut) : NDArray
+    {
+        $weightsT = NumPower::transpose($weights, [1, 0]);
+
+        return NumPower::matmul($weightsT, $dOut);
+    }
+
+    /**
+     * Return the parameters of the layer.
+     *
+     * @internal
+     *
+     * @throws RuntimeException
+     * @return Generator<Parameter>
+     */
+    public function parameters() : Generator
+    {
+        if (!$this->weights) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        yield 'weights' => $this->weights;
+
+        if ($this->biases) {
+            yield 'biases' => $this->biases;
+        }
+    }
+
+    /**
+     * Restore the parameters in the layer from an associative array.
+     *
+     * @internal
+     *
+     * @param Parameter[] $parameters
+     */
+    public function restore(array $parameters) : void
+    {
+        $this->weights = $parameters['weights'];
+        $this->biases = $parameters['biases'] ?? null;
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Dense (neurons: {$this->neurons}, l2 penalty: {$this->l2Penalty},"
+            . ' bias: ' . Params::toString($this->bias) . ','
+            . " weight initializer: {$this->weightInitializer},"
+            . " bias initializer: {$this->biasInitializer})";
+    }
+}
diff --git a/tests/NeuralNet/Layers/Dense/DenseTest.php b/tests/NeuralNet/Layers/Dense/DenseTest.php
new file mode 100644
index 000000000..d8c920aa3
--- /dev/null
+++ b/tests/NeuralNet/Layers/Dense/DenseTest.php
@@ -0,0 +1,308 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Layers\Dense;
+
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use Rubix\ML\Deferred;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Initializers\Base\Initializer;
+use Rubix\ML\NeuralNet\Initializers\Constant\Constant;
+use Rubix\ML\NeuralNet\Layers\Dense\Dense;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use Rubix\ML\NeuralNet\Initializers\He\HeUniform;
+use Rubix\ML\NeuralNet\Initializers\He\HeNormal;
+use Rubix\ML\NeuralNet\Parameters\Parameter as TrainableParameter;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Layers')]
+#[CoversClass(Dense::class)]
+class DenseTest extends TestCase
+{
+    protected const int RANDOM_SEED = 0;
+
+    /**
+     * @var positive-int
+     */
+    protected int $fanIn;
+
+    protected NDArray $input;
+
+    protected Deferred $prevGrad;
+
+    protected Optimizer $optimizer;
+
+    protected Dense $layer;
+
+    /**
+     * @return array<int, array{array<array<float>>, array<float>, array<array<float>>}>
+     */
+    public static function forwardProvider() : array
+    {
+        return [
+            [
+                // weights 2x3
+                [
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                ],
+                // biases length-2
+                [0.0, 0.0],
+                // expected forward output 2x3 for the fixed input in setUp()
+                // input = [
+                //   [1.0, 2.5, -0.1],
+                //   [0.1, 0.0, 3.0],
+                //   [0.002, -6.0, -0.5],
+                // ];
+                // so W * input = first two rows of input
+                [
+                    [1.0, 2.5, -0.1],
+                    [0.1, 0.0, 3.0],
+                ],
+            ],
+        ];
+    }
+
+    /**
+     * @return array<int, array{array<array<float>>, array<float>, array<array<float>>, array<array<float>>}>
+     */
+    public static function backProvider() : array
+    {
+        return [
+            [
+                // weights 2x3
+                [
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                ],
+                // biases length-2
+                [0.0, 0.0],
+                // prev gradient 2x3
+                [
+                    [0.50, 0.2, 0.01],
+                    [0.25, 0.1, 0.89],
+                ],
+                // expected gradient for previous layer 3x3
+                [
+                    [0.50, 0.2, 0.01],
+                    [0.25, 0.1, 0.89],
+                    [0.0, 0.0, 0.0],
+                ],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->fanIn = 3;
+
+        $this->input = NumPower::array([
+            [1.0, 2.5, -0.1],
+            [0.1, 0.0, 3.0],
+            [0.002, -6.0, -0.5],
+        ]);
+
+        $this->prevGrad = new Deferred(fn: function () : NDArray {
+            return NumPower::array([
+                [0.50, 0.2, 0.01],
+                [0.25, 0.1, 0.89],
+            ]);
+        });
+
+        $this->optimizer = new Stochastic(0.001);
+
+        $this->layer = new Dense(
+            neurons: 2,
+            l2Penalty: 0.0,
+            bias: true,
+            weightInitializer: new HeUniform(),
+            biasInitializer: new Constant(0.0)
+        );
+
+        srand(self::RANDOM_SEED);
+    }
+
+    #[Test]
+    #[TestDox('Throws an exception for invalid constructor arguments')]
+    public function testConstructorValidation() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Dense(
+            neurons: 0,
+            l2Penalty: -0.1,
+            bias: true,
+            weightInitializer: new HeUniform(),
+            biasInitializer: new Constant(0.0)
+        );
+    }
+
+    #[Test]
+    #[TestDox('Computes forward activations for fixed weights and biases')]
+    #[DataProvider('forwardProvider')]
+    public function testForward(array $weights, array $biases, array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+        self::assertEquals(2, $this->layer->width());
+
+        $this->layer->restore([
+            'weights' => new TrainableParameter(NumPower::array($weights)),
+            'biases'  => new TrainableParameter(NumPower::array($biases)),
+        ]);
+
+        $forward = $this->layer->forward($this->input);
+
+        self::assertEqualsWithDelta($expected, $forward->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Method weights() returns the restored weight matrix')]
+    public function testWeightsReturnsExpectedValues() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $weightsArray = [
+            [1.0, 0.0, 0.0],
+            [0.0, 1.0, 0.0],
+        ];
+
+        $this->layer->restore([
+            'weights' => new TrainableParameter(NumPower::array($weightsArray)),
+            'biases'  => new TrainableParameter(NumPower::array([0.0, 0.0])),
+        ]);
+
+        $weights = $this->layer->weights();
+
+        self::assertEqualsWithDelta($weightsArray, $weights->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('width() returns the number of neurons')]
+    public function testWidthReturnsNeuronsCount() : void
+    {
+        // Layer is constructed in setUp() with neurons: 2
+        self::assertSame(2, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Computes backpropagated gradients for previous layer')]
+    #[DataProvider('backProvider')]
+    public function testBack(array $weights, array $biases, array $prevGrad, array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $this->layer->restore([
+            'weights' => new TrainableParameter(NumPower::array($weights)),
+            'biases' => new TrainableParameter(NumPower::array($biases)),
+        ]);
+
+        $prevGradNd = NumPower::array($prevGrad);
+
+        // Forward pass to set internal input cache
+        $this->layer->forward($this->input);
+
+        $gradient = $this->layer->back(
+            prevGradient: new Deferred(fn: fn () => $prevGradNd),
+            optimizer: $this->optimizer
+        )->compute();
+
+        self::assertInstanceOf(NDArray::class, $gradient);
+        self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes inference activations equal to forward for fixed parameters')]
+    #[DataProvider('forwardProvider')]
+    public function testInfer(array $weights, array $biases, array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $this->layer->restore([
+            'weights' => new TrainableParameter(NumPower::array($weights)),
+            'biases' => new TrainableParameter(NumPower::array($biases)),
+        ]);
+
+        $infer = $this->layer->infer($this->input);
+
+        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Method restore() correctly replaces layer parameters')]
+    public function testRestoreReplacesParameters() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        // Use the same deterministic weights and biases as in forwardProvider
+        $weights = [
+            [1.0, 0.0, 0.0],
+            [0.0, 1.0, 0.0],
+        ];
+
+        $biases = [0.0, 0.0];
+
+        $expected = [
+            [1.0, 2.5, -0.1],
+            [0.1, 0.0, 3.0],
+        ];
+
+        $this->layer->restore([
+            'weights' => new TrainableParameter(NumPower::array($weights)),
+            'biases'  => new TrainableParameter(NumPower::array($biases)),
+        ]);
+
+        $forward = $this->layer->forward($this->input);
+
+        self::assertEqualsWithDelta($expected, $forward->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Method parameters() yields restored weights and biases')]
+    public function testParametersReturnsRestoredParameters() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $weightsArray = [
+            [1.0, 0.0, 0.0],
+            [0.0, 1.0, 0.0],
+        ];
+
+        $biasesArray = [0.0, 0.0];
+
+        $weightsParam = new TrainableParameter(NumPower::array($weightsArray));
+        $biasesParam  = new TrainableParameter(NumPower::array($biasesArray));
+
+        $this->layer->restore([
+            'weights' => $weightsParam,
+            'biases'  => $biasesParam,
+        ]);
+
+        $params = iterator_to_array($this->layer->parameters());
+
+        self::assertArrayHasKey('weights', $params);
+        self::assertArrayHasKey('biases', $params);
+
+        self::assertSame($weightsParam, $params['weights']);
+        self::assertSame($biasesParam, $params['biases']);
+
+        self::assertEqualsWithDelta($weightsArray, $params['weights']->param()->toArray(), 1e-7);
+        self::assertEqualsWithDelta($biasesArray, $params['biases']->param()->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('It returns correct string representation')]
+    public function testToStringReturnsCorrectValue() : void
+    {
+        $expected = 'Dense (neurons: 2, l2 penalty: 0, bias: true, weight initializer: He Uniform, bias initializer: Constant (value: 0))';
+
+        self::assertSame($expected, (string) $this->layer);
+    }
+}

From 9767a1fdc32a007f7569535b6ff1f4319f644d04 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Wed, 10 Dec 2025 23:53:18 +0200
Subject: [PATCH 13/36] ML-401 Added `Dropout` hidden layer implementation with
 forward/inference/backward passes, unit tests, and documentation updates

---
 docs/neural-network/hidden-layers/dropout.md  |   6 +-
 src/NeuralNet/Layers/Dropout/Dropout.php      | 208 ++++++++++++++++++
 .../NeuralNet/Layers/Dropout/DropoutTest.php  | 143 ++++++++++++
 3 files changed, 354 insertions(+), 3 deletions(-)
 create mode 100644 src/NeuralNet/Layers/Dropout/Dropout.php
 create mode 100644 tests/NeuralNet/Layers/Dropout/DropoutTest.php

diff --git a/docs/neural-network/hidden-layers/dropout.md b/docs/neural-network/hidden-layers/dropout.md
index 566f83bad..28414f8ca 100644
--- a/docs/neural-network/hidden-layers/dropout.md
+++ b/docs/neural-network/hidden-layers/dropout.md
@@ -1,4 +1,4 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/Dropout.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/Dropout/Dropout.php">[source]</a></span>
 
 # Dropout
 Dropout is a regularization technique to reduce overfitting in neural networks by preventing complex co-adaptations on training data. It works by temporarily disabling output nodes during each training pass. It also acts as an efficient way of performing model averaging with the parameters of neural networks.
@@ -10,10 +10,10 @@ Dropout is a regularization technique to reduce overfitting in neural networks b
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Layers\Dropout;
+use Rubix\ML\NeuralNet\Layers\Dropout\Dropout;
 
 $layer = new Dropout(0.2);
 ```
 
 ## References
-[^1]: N. Srivastava et al. (2014). Dropout: A Simple Way to Prevent Neural Networks from Overfitting.
\ No newline at end of file
+[^1]: N. Srivastava et al. (2014). Dropout: A Simple Way to Prevent Neural Networks from Overfitting.
diff --git a/src/NeuralNet/Layers/Dropout/Dropout.php b/src/NeuralNet/Layers/Dropout/Dropout.php
new file mode 100644
index 000000000..54abaf861
--- /dev/null
+++ b/src/NeuralNet/Layers/Dropout/Dropout.php
@@ -0,0 +1,208 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Dropout;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+
+/**
+ * Dropout
+ *
+ * Dropout is a regularization technique for reducing overfitting in neural
+ * networks by preventing complex co-adaptations on training data. It works
+ * by temporarily disabling neurons during each training pass. It also is a
+ * very efficient way of performing model averaging with neural networks.
+ *
+ * References:
+ * [1] N. Srivastava et al. (2014). Dropout: A Simple Way to Prevent Neural
+ * Networks from Overfitting.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ */
+class Dropout implements Hidden
+{
+    /**
+     * The ratio of neurons that are dropped during each training pass.
+     *
+     * @var float
+     */
+    protected float $ratio;
+
+    /**
+     * The scaling coefficient.
+     *
+     * @var float
+     */
+    protected float $scale;
+
+    /**
+     * The width of the layer.
+     *
+     * @var positive-int|null
+     */
+    protected ?int $width = null;
+
+    /**
+     * The memoized dropout mask.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $mask = null;
+
+    /**
+     * @param float $ratio
+     * @throws InvalidArgumentException
+     */
+    public function __construct(float $ratio = 0.5)
+    {
+        if ($ratio <= 0.0 or $ratio >= 1.0) {
+            throw new InvalidArgumentException("Ratio must be between 0 and 1, $ratio given.");
+        }
+
+        $this->ratio = $ratio;
+        $this->scale = 1.0 / (1.0 - $ratio);
+    }
+
+    /**
+     * Return the width of the layer.
+     *
+     * @internal
+     *
+     * @throws RuntimeException
+     * @return positive-int
+     */
+    public function width() : int
+    {
+        if ($this->width === null) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        return $this->width;
+    }
+
+    /**
+     * Initialize the layer with the fan in from the previous layer and return
+     * the fan out for this layer.
+     *
+     * @internal
+     *
+     * @param positive-int $fanIn
+     * @return positive-int
+     */
+    public function initialize(int $fanIn) : int
+    {
+        $fanOut = $fanIn;
+
+        $this->width = $fanOut;
+
+        return $fanOut;
+    }
+
+    /**
+     * Compute a forward pass through the layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @param NDArray|null $mask Custom dropout mask to use instead of generating one.
+     * @return NDArray
+     */
+    public function forward(NDArray $input, ?NDArray $mask = null) : NDArray
+    {
+        if ($mask === null) {
+            // Build dropout mask using PHP's RNG. Each unit is kept with
+            // probability (1 - ratio) and scaled by $this->scale.
+            $inputArray = $input->toArray();
+
+            $maskArray = [];
+
+            foreach ($inputArray as $i => $row) {
+                foreach ($row as $j => $_value) {
+                    $u = rand() / getrandmax();
+
+                    $maskArray[$i][$j] = $u > $this->ratio ? $this->scale : 0.0;
+                }
+            }
+
+            $mask = NumPower::array($maskArray);
+        }
+
+        $output = NumPower::multiply($input, $mask);
+
+        $this->mask = $mask;
+
+        return $output;
+    }
+
+    /**
+     * Compute an inferential pass through the layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function infer(NDArray $input) : NDArray
+    {
+        return $input;
+    }
+
+    /**
+     * Calculate the gradients of the layer and update the parameters.
+     *
+     * @internal
+     *
+     * @param Deferred $prevGradient
+     * @param Optimizer $optimizer
+     * @throws RuntimeException
+     * @return Deferred
+     */
+    public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
+    {
+        if (!$this->mask) {
+            throw new RuntimeException('Must perform forward pass before backpropagating.');
+        }
+
+        $mask = $this->mask;
+
+        $this->mask = null;
+
+        return new Deferred([$this, 'gradient'], [$prevGradient, $mask]);
+    }
+
+    /**
+     * Calculate the gradient for the previous layer.
+     *
+     * @internal
+     *
+     * @param Deferred $prevGradient
+     * @param NDArray $mask
+     * @return NDArray
+     */
+    public function gradient(Deferred $prevGradient, NDArray $mask) : NDArray
+    {
+        /** @var NDArray $dOut */
+        $dOut = $prevGradient();
+
+        return NumPower::multiply($dOut, $mask);
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Dropout (ratio: {$this->ratio})";
+    }
+}
diff --git a/tests/NeuralNet/Layers/Dropout/DropoutTest.php b/tests/NeuralNet/Layers/Dropout/DropoutTest.php
new file mode 100644
index 000000000..337466986
--- /dev/null
+++ b/tests/NeuralNet/Layers/Dropout/DropoutTest.php
@@ -0,0 +1,143 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Layers\Dropout;
+
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Layers\Dropout\Dropout;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Layers')]
+#[CoversClass(Dropout::class)]
+class DropoutTest extends TestCase
+{
+    protected const int RANDOM_SEED = 0;
+
+    /**
+     * @var positive-int
+     */
+    protected int $fanIn;
+
+    protected NDArray $input;
+
+    protected Deferred $prevGrad;
+
+    protected Optimizer $optimizer;
+
+    protected Dropout $layer;
+
+    protected function setUp() : void
+    {
+        $this->fanIn = 3;
+
+        $this->input = NumPower::array([
+            [1.0, 2.5, -0.1],
+            [0.1, 0.0, 3.0],
+            [0.002, -6.0, -0.5],
+        ]);
+
+        $this->prevGrad = new Deferred(fn: function () : NDArray {
+            return NumPower::array([
+                [0.25, 0.7, 0.1],
+                [0.50, 0.2, 0.01],
+                [0.25, 0.1, 0.89],
+            ]);
+        });
+
+        $this->optimizer = new Stochastic(0.001);
+
+        $this->layer = new Dropout(0.5);
+    }
+
+    #[Test]
+    #[TestDox('Initializes width equal to fan-in')]
+    public function testInitializeSetsWidth() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        self::assertEquals($this->fanIn, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('forward() returns an NDArray with the same shape as the input')]
+    public function testForward() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        // Deterministic mask so that forward output is predictable
+        $mask = NumPower::array([
+            [2.0, 2.0, 2.0],
+            [2.0, 0.0, 2.0],
+            [2.0, 2.0, 0.0],
+        ]);
+
+        $forward = $this->layer->forward($this->input, $mask);
+
+        $expected = [
+            [2.0, 5.0, -0.2],
+            [0.2, 0.0, 6.0],
+            [0.004, -12.0, 0.0],
+        ];
+
+        self::assertSame($this->input->shape(), $forward->shape());
+        self::assertEqualsWithDelta($expected, $forward->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Backpropagates gradients using the same dropout mask')]
+    public function testBack() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        // Use the same deterministic mask as in testForward so that the
+        // gradient is fully predictable: grad = prevGrad * mask.
+        $mask = NumPower::array([
+            [2.0, 2.0, 2.0],
+            [2.0, 0.0, 2.0],
+            [2.0, 2.0, 0.0],
+        ]);
+
+        // Forward pass to set internal mask cache
+        $this->layer->forward($this->input, $mask);
+
+        $gradient = $this->layer->back(
+            prevGradient: $this->prevGrad,
+            optimizer: $this->optimizer
+        )->compute();
+
+        $expected = [
+            [0.5, 1.4, 0.2],
+            [1.0, 0.0, 0.02],
+            [0.5, 0.2, 0.0],
+        ];
+
+        self::assertInstanceOf(NDArray::class, $gradient);
+        self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Inference pass leaves inputs unchanged')]
+    public function testInfer() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $expected = [
+            [1.0, 2.5, -0.1],
+            [0.1, 0.0, 3.0],
+            [0.002, -6.0, -0.5],
+        ];
+
+        $infer = $this->layer->infer($this->input);
+
+        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+    }
+}

From 8d7c938b2578e1cc747b9eed5c513c252edb7a4b Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Thu, 11 Dec 2025 16:37:32 +0200
Subject: [PATCH 14/36] ML-392 Refactored `Dropout` layer to replace custom
 mask generation with `NumPower` utilities

---
 src/NeuralNet/Layers/Dropout/Dropout.php      |  26 +--
 .../NeuralNet/Layers/Dropout/DropoutTest.php  | 210 ++++++++++++++----
 2 files changed, 181 insertions(+), 55 deletions(-)

diff --git a/src/NeuralNet/Layers/Dropout/Dropout.php b/src/NeuralNet/Layers/Dropout/Dropout.php
index 54abaf861..ce2409f7d 100644
--- a/src/NeuralNet/Layers/Dropout/Dropout.php
+++ b/src/NeuralNet/Layers/Dropout/Dropout.php
@@ -111,28 +111,20 @@ public function initialize(int $fanIn) : int
      * @internal
      *
      * @param NDArray $input
-     * @param NDArray|null $mask Custom dropout mask to use instead of generating one.
      * @return NDArray
      */
-    public function forward(NDArray $input, ?NDArray $mask = null) : NDArray
+    public function forward(NDArray $input) : NDArray
     {
-        if ($mask === null) {
-            // Build dropout mask using PHP's RNG. Each unit is kept with
-            // probability (1 - ratio) and scaled by $this->scale.
-            $inputArray = $input->toArray();
+        // Build dropout mask using NumPower's uniform RNG. Each unit is kept
+        // with probability (1 - ratio) and scaled by $this->scale.
+        $shape = $input->shape();
 
-            $maskArray = [];
+        // Uniform random numbers in [0, 1) with same shape as input
+        $rand = NumPower::uniform($shape, 0.0, 1.0);
 
-            foreach ($inputArray as $i => $row) {
-                foreach ($row as $j => $_value) {
-                    $u = rand() / getrandmax();
-
-                    $maskArray[$i][$j] = $u > $this->ratio ? $this->scale : 0.0;
-                }
-            }
-
-            $mask = NumPower::array($maskArray);
-        }
+        // mask = (rand > ratio) * scale
+        $mask = NumPower::greater($rand, $this->ratio);
+        $mask = NumPower::multiply($mask, $this->scale);
 
         $output = NumPower::multiply($input, $mask);
 
diff --git a/tests/NeuralNet/Layers/Dropout/DropoutTest.php b/tests/NeuralNet/Layers/Dropout/DropoutTest.php
index 337466986..47cf1ece1 100644
--- a/tests/NeuralNet/Layers/Dropout/DropoutTest.php
+++ b/tests/NeuralNet/Layers/Dropout/DropoutTest.php
@@ -7,10 +7,12 @@
 use NDArray;
 use NumPower;
 use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
 use PHPUnit\Framework\Attributes\Group;
 use PHPUnit\Framework\Attributes\Test;
 use PHPUnit\Framework\Attributes\TestDox;
 use Rubix\ML\Deferred;
+use Rubix\ML\Exceptions\InvalidArgumentException;
 use Rubix\ML\NeuralNet\Layers\Dropout\Dropout;
 use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
 use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
@@ -20,8 +22,6 @@
 #[CoversClass(Dropout::class)]
 class DropoutTest extends TestCase
 {
-    protected const int RANDOM_SEED = 0;
-
     /**
      * @var positive-int
      */
@@ -58,6 +58,43 @@ protected function setUp() : void
         $this->layer = new Dropout(0.5);
     }
 
+    /**
+     * @return array<string, array{0: float}>
+     */
+    public static function badRatioProvider() : array
+    {
+        return [
+            'zero'          => [0.0],
+            'negative'      => [-0.1],
+            'one'           => [1.0],
+            'greaterThanOne'=> [1.1],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0: array<array<float>>}>
+     */
+    public static function inferProvider() : array
+    {
+        return [
+            'identityOnInput' => [[
+                [1.0, 2.5, -0.1],
+                [0.1, 0.0, 3.0],
+                [0.002, -6.0, -0.5],
+            ]],
+        ];
+    }
+
+    #[Test]
+    #[TestDox('Constructor rejects invalid ratio values')]
+    #[DataProvider('badRatioProvider')]
+    public function testConstructorRejectsInvalidRatio(float $ratio) : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Dropout($ratio);
+    }
+
     #[Test]
     #[TestDox('Initializes width equal to fan-in')]
     public function testInitializeSetsWidth() : void
@@ -68,28 +105,51 @@ public function testInitializeSetsWidth() : void
     }
 
     #[Test]
-    #[TestDox('forward() returns an NDArray with the same shape as the input')]
+    #[TestDox('Method forward() applies dropout mask with correct shape and scaling')]
     public function testForward() : void
     {
         $this->layer->initialize($this->fanIn);
 
-        // Deterministic mask so that forward output is predictable
-        $mask = NumPower::array([
-            [2.0, 2.0, 2.0],
-            [2.0, 0.0, 2.0],
-            [2.0, 2.0, 0.0],
-        ]);
+        $forward = $this->layer->forward($this->input);
 
-        $forward = $this->layer->forward($this->input, $mask);
+        $inputArray = $this->input->toArray();
+        $forwardArray = $forward->toArray();
 
-        $expected = [
-            [2.0, 5.0, -0.2],
-            [0.2, 0.0, 6.0],
-            [0.004, -12.0, 0.0],
-        ];
+        self::assertSameSize($inputArray, $forwardArray);
+
+        $scale = 1.0 / (1.0 - 0.5); // ratio = 0.5
+
+        $nonZero = 0;
+        $total = 0;
+
+        foreach ($inputArray as $i => $row) {
+            foreach ($row as $j => $x) {
+                $y = $forwardArray[$i][$j];
+                $total++;
 
-        self::assertSame($this->input->shape(), $forward->shape());
-        self::assertEqualsWithDelta($expected, $forward->toArray(), 1e-7);
+                if (abs($x) < 1e-12) {
+                    // If input is (near) zero, output should also be ~0
+                    self::assertEqualsWithDelta(0.0, $y, 1e-7);
+                    continue;
+                }
+
+                if (abs($y) < 1e-12) {
+                    // Dropped unit
+                    continue;
+                }
+
+                $nonZero++;
+
+                // Kept unit should be scaled input
+                self::assertEqualsWithDelta($x * $scale, $y, 1e-6);
+            }
+        }
+
+        // Roughly (1 - ratio) of units should be non-zero; allow wide tolerance
+        $expectedKept = (1.0 - 0.5) * $total;
+        self::assertGreaterThan(0, $nonZero);
+        self::assertLessThan($total, $nonZero);
+        self::assertEqualsWithDelta($expectedKept, $nonZero, $total * 0.5);
     }
 
     #[Test]
@@ -98,21 +158,104 @@ public function testBack() : void
     {
         $this->layer->initialize($this->fanIn);
 
-        // Use the same deterministic mask as in testForward so that the
-        // gradient is fully predictable: grad = prevGrad * mask.
+        // Forward pass to generate and store mask
+        $forward = $this->layer->forward($this->input);
+        $forwardArray = $forward->toArray();
+        $inputArray = $this->input->toArray();
+
+        // Approximate mask from forward output: mask ≈ forward / input
+        $maskArray = [];
+        foreach ($inputArray as $i => $row) {
+            foreach ($row as $j => $x) {
+                $y = $forwardArray[$i][$j];
+
+                if (abs($x) < 1e-12) {
+                    $maskArray[$i][$j] = 0.0;
+                } else {
+                    $maskArray[$i][$j] = $y / $x;
+                }
+            }
+        }
+
+        $gradient = $this->layer->back(
+            prevGradient: $this->prevGrad,
+            optimizer: $this->optimizer
+        )->compute();
+
+        $gradArray = $gradient->toArray();
+        $prevGradArray = ($this->prevGrad)()->toArray();
+
+        // Expected gradient per element: prevGrad * mask for non-zero inputs.
+        // For zero inputs, the mask cannot be inferred from the forward output
+        // (forward is always 0 regardless of mask), so we accept the actual
+        // gradient value there.
+        $expectedGrad = [];
+        foreach ($prevGradArray as $i => $row) {
+            foreach ($row as $j => $g) {
+                if (abs($inputArray[$i][$j]) < 1e-12) {
+                    $expectedGrad[$i][$j] = $gradArray[$i][$j];
+                } else {
+                    $expectedGrad[$i][$j] = $g * $maskArray[$i][$j];
+                }
+            }
+        }
+
+        self::assertEqualsWithDelta($expectedGrad, $gradArray, 1e-6);
+    }
+
+    #[Test]
+    #[TestDox('Inference pass leaves inputs unchanged')]
+    #[DataProvider('inferProvider')]
+    public function testInfer(array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $infer = $this->layer->infer($this->input);
+
+        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Method initialize() returns fan out equal to fan in')]
+    public function testInitializeReturnsFanOut() : void
+    {
+        $fanOut = $this->layer->initialize($this->fanIn);
+
+        self::assertSame($this->fanIn, $fanOut);
+    }
+
+    #[Test]
+    #[TestDox('Method width() returns the initialized width')]
+    public function testWidthAfterInitialize() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        self::assertSame($this->fanIn, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Method gradient() multiplies previous gradient by the dropout mask')]
+    public function testGradient() : void
+    {
+        // Deterministic previous gradient (same shape as input)
+        $prevGradNd = NumPower::array([
+            [0.25, 0.7, 0.1],
+            [0.50, 0.2, 0.01],
+            [0.25, 0.1, 0.89],
+        ]);
+
+        // Same deterministic mask as used in testForward/testBack
         $mask = NumPower::array([
             [2.0, 2.0, 2.0],
             [2.0, 0.0, 2.0],
             [2.0, 2.0, 0.0],
         ]);
 
-        // Forward pass to set internal mask cache
-        $this->layer->forward($this->input, $mask);
+        $prevGradient = new Deferred(fn: static function () use ($prevGradNd) : NDArray {
+            return $prevGradNd;
+        });
 
-        $gradient = $this->layer->back(
-            prevGradient: $this->prevGrad,
-            optimizer: $this->optimizer
-        )->compute();
+        $gradient = $this->layer->gradient($prevGradient, $mask);
 
         $expected = [
             [0.5, 1.4, 0.2],
@@ -120,24 +263,15 @@ public function testBack() : void
             [0.5, 0.2, 0.0],
         ];
 
-        self::assertInstanceOf(NDArray::class, $gradient);
         self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
     }
 
     #[Test]
-    #[TestDox('Inference pass leaves inputs unchanged')]
-    public function testInfer() : void
+    #[TestDox('It returns correct string representation')]
+    public function testToString() : void
     {
-        $this->layer->initialize($this->fanIn);
-
-        $expected = [
-            [1.0, 2.5, -0.1],
-            [0.1, 0.0, 3.0],
-            [0.002, -6.0, -0.5],
-        ];
+        $expected = 'Dropout (ratio: 0.5)';
 
-        $infer = $this->layer->infer($this->input);
-
-        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+        self::assertSame($expected, (string) $this->layer);
     }
 }

From 5ad0ed2da331f89ceb15cc92dd21ea3509653ef5 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Thu, 11 Dec 2025 19:41:29 +0200
Subject: [PATCH 15/36] ML-392 Added `Multiclass` output layer with complete
 interface, forward/inference/backward passes, unit tests

---
 src/NeuralNet/Layers/Dropout/Dropout.php      |   1 +
 .../Layers/Multiclass/Multiclass.php          | 229 ++++++++++++++++++
 .../Layers/Multiclass/MulticlassTest.php      | 217 +++++++++++++++++
 3 files changed, 447 insertions(+)
 create mode 100644 src/NeuralNet/Layers/Multiclass/Multiclass.php
 create mode 100644 tests/NeuralNet/Layers/Multiclass/MulticlassTest.php

diff --git a/src/NeuralNet/Layers/Dropout/Dropout.php b/src/NeuralNet/Layers/Dropout/Dropout.php
index ce2409f7d..45d88e57a 100644
--- a/src/NeuralNet/Layers/Dropout/Dropout.php
+++ b/src/NeuralNet/Layers/Dropout/Dropout.php
@@ -25,6 +25,7 @@
  * @category    Machine Learning
  * @package     Rubix/ML
  * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
  */
 class Dropout implements Hidden
 {
diff --git a/src/NeuralNet/Layers/Multiclass/Multiclass.php b/src/NeuralNet/Layers/Multiclass/Multiclass.php
new file mode 100644
index 000000000..b6e33a5ac
--- /dev/null
+++ b/src/NeuralNet/Layers/Multiclass/Multiclass.php
@@ -0,0 +1,229 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Multiclass;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Output;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy\CrossEntropy;
+use Rubix\ML\NeuralNet\ActivationFunctions\Softmax\Softmax;
+use Rubix\ML\NeuralNet\CostFunctions\Base\Contracts\ClassificationLoss;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+
+/**
+ * Multiclass
+ *
+ * The Multiclass output layer gives a joint probability estimate of a multiclass classification
+ * problem using the Softmax activation function.
+ *
+ * @internal
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Multiclass implements Output
+{
+    /**
+     * The unique class labels.
+     *
+     * @var string[]
+     */
+    protected array $classes = [
+        //
+    ];
+
+    /**
+     * The function that computes the loss of erroneous activations.
+     *
+     * @var ClassificationLoss
+     */
+    protected ClassificationLoss $costFn;
+
+    /**
+     * The softmax activation function.
+     *
+     * @var Softmax
+     */
+    protected Softmax $softmax;
+
+    /**
+     * The memorized input matrix.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $input = null;
+
+    /**
+     * The memorized activation matrix.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $output = null;
+
+    /**
+     * @param string[] $classes
+     * @param ClassificationLoss|null $costFn
+     * @throws InvalidArgumentException
+     */
+    public function __construct(array $classes, ?ClassificationLoss $costFn = null)
+    {
+        $classes = array_values(array_unique($classes));
+
+        if (count($classes) < 2) {
+            throw new InvalidArgumentException('Number of classes'
+                . ' must be greater than 1, ' . count($classes)
+                . ' given.');
+        }
+
+        $this->classes = $classes;
+        $this->costFn = $costFn ?? new CrossEntropy();
+        $this->softmax = new Softmax();
+    }
+
+    /**
+     * Return the width of the layer.
+     *
+     * @return positive-int
+     */
+    public function width() : int
+    {
+        return max(1, count($this->classes));
+    }
+
+    /**
+     * Initialize the layer with the fan in from the previous layer and return
+     * the fan out for this layer.
+     *
+     * @param positive-int $fanIn
+     * @throws InvalidArgumentException
+     * @return positive-int
+     */
+    public function initialize(int $fanIn) : int
+    {
+        $fanOut = count($this->classes);
+
+        if ($fanIn !== $fanOut) {
+            throw new InvalidArgumentException('Fan in must be'
+                . " equal to fan out, $fanOut expected but"
+                . " $fanIn given.");
+        }
+
+        return $fanOut;
+    }
+
+    /**
+     * Compute a forward pass through the layer.
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function forward(NDArray $input) : NDArray
+    {
+        $output = $this->softmax->activate($input);
+
+        $this->input = $input;
+        $this->output = $output;
+
+        return $output;
+    }
+
+    /**
+     * Compute an inferential pass through the layer.
+     *
+     * @param NDArray $input
+     * @throws RuntimeException
+     * @return NDArray
+     */
+    public function infer(NDArray $input) : NDArray
+    {
+        return $this->softmax->activate($input);
+    }
+
+    /**
+     * Compute the gradient and loss at the output.
+     *
+     * @param string[] $labels
+     * @param Optimizer $optimizer
+     * @throws RuntimeException
+     * @return array
+     */
+    public function back(array $labels, Optimizer $optimizer) : array
+    {
+        if (!$this->input or !$this->output) {
+            throw new RuntimeException('Must perform forward pass'
+                . ' before backpropagating.');
+        }
+
+        $expected = [];
+
+        foreach ($labels as $label) {
+            $dist = [];
+
+            foreach ($this->classes as $class) {
+                $dist[] = $class == $label ? 1.0 : 0.0;
+            }
+
+            $expected[] = $dist;
+        }
+
+        $expected = NumPower::array($expected);
+
+        $input = $this->input;
+        $output = $this->output;
+
+        $gradient = new Deferred([$this, 'gradient'], [$input, $output, $expected]);
+
+        $loss = $this->costFn->compute($output, $expected);
+
+        $this->input = $this->output = null;
+
+        return [$gradient, $loss];
+    }
+
+    /**
+     * Calculate the gradient for the previous layer.
+     *
+     * @param NDArray $input
+     * @param NDArray $output
+     * @param NDArray $expected
+     * @return NDArray
+     */
+    public function gradient(NDArray $input, NDArray $output, NDArray $expected) : NDArray
+    {
+        $n = array_product($output->shape());
+
+        if ($this->costFn instanceof CrossEntropy) {
+            return NumPower::divide(
+                NumPower::subtract($output, $expected),
+                $n
+            );
+        }
+
+        $dLoss = NumPower::divide(
+            $this->costFn->differentiate($output, $expected),
+            $n
+        );
+
+        return NumPower::multiply(
+            $this->softmax->differentiate($output),
+            $dLoss
+        );
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Multiclass (cost function: {$this->costFn})";
+    }
+}
diff --git a/tests/NeuralNet/Layers/Multiclass/MulticlassTest.php b/tests/NeuralNet/Layers/Multiclass/MulticlassTest.php
new file mode 100644
index 000000000..a920a4272
--- /dev/null
+++ b/tests/NeuralNet/Layers/Multiclass/MulticlassTest.php
@@ -0,0 +1,217 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Layers\Multiclass;
+
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Layers\Multiclass\Multiclass;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy\CrossEntropy;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Layers')]
+#[CoversClass(Multiclass::class)]
+class MulticlassTest extends TestCase
+{
+    protected NDArray $input;
+
+    /**
+     * @var string[]
+     */
+    protected array $labels;
+
+    protected Optimizer $optimizer;
+
+    protected Multiclass $layer;
+
+    /**
+     * @return array<string, array{0: int}>
+     */
+    public static function initializeProvider() : array
+    {
+        return [
+            'fanInEqualsClasses' => [3],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0: array<int, array<int, float>>}>
+     */
+    public static function forwardProvider() : array
+    {
+        return [
+            'expectedForward' => [[
+                [0.1719820, 0.7707700, 0.0572478],
+                [0.0498033, 0.0450639, 0.9051327],
+                [0.6219707, 0.0015385, 0.3764905],
+            ]],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0: array<int, array<int, float>>}>
+     */
+    public static function backProvider() : array
+    {
+        return [
+            'expectedGradient' => [[
+                [-0.0920019, 0.0856411, 0.0063608],
+                [0.0055337, -0.1061040, 0.1005703],
+                [0.0691078, 0.00017093, -0.0692788],
+            ]],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0: array<int, array<int, float>>}>
+     */
+    public static function inferProvider() : array
+    {
+        // Same expectations as forward
+        return self::forwardProvider();
+    }
+
+    protected function setUp() : void
+    {
+        $this->input = NumPower::array([
+            [1.0, 2.5, -0.1],
+            [0.1, 0.0, 3.0],
+            [0.002, -6.0, -0.5],
+        ]);
+
+        $this->labels = ['hot', 'cold', 'ice cold'];
+
+        $this->optimizer = new Stochastic(0.001);
+
+        $this->layer = new Multiclass(
+            classes: ['hot', 'cold', 'ice cold'],
+            costFn: new CrossEntropy()
+        );
+    }
+
+    #[Test]
+    #[TestDox('Constructor rejects invalid number of classes')]
+    public function testConstructorRejectsInvalidClasses() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Multiclass(classes: ['only-one-class']);
+    }
+
+    #[Test]
+    #[TestDox('Method width() returns number of classes')]
+    public function testWidthReturnsNumberOfClasses() : void
+    {
+        self::assertSame(3, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Initializes and returns correct fan out')]
+    #[DataProvider('initializeProvider')]
+    public function testInitializeReturnsFanOut(int $fanIn) : void
+    {
+        $fanOut = $this->layer->initialize($fanIn);
+
+        self::assertSame($fanIn, $fanOut);
+        self::assertSame(3, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Computes forward softmax probabilities')]
+    #[DataProvider('forwardProvider')]
+    public function testForward(array $expected) : void
+    {
+        $this->layer->initialize(3);
+
+        self::assertEquals(3, $this->layer->width());
+
+        $forward = $this->layer->forward($this->input);
+
+        self::assertEqualsWithDelta($expected, $forward->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Backpropagates and returns output gradient')]
+    #[DataProvider('backProvider')]
+    public function testBack(array $expected) : void
+    {
+        $this->layer->initialize(3);
+
+        // Set internal caches
+        $this->layer->forward($this->input);
+
+        [$computation, $loss] = $this->layer->back(
+            labels: $this->labels,
+            optimizer: $this->optimizer
+        );
+
+        self::assertInstanceOf(Deferred::class, $computation);
+        self::assertIsFloat($loss);
+
+        $gradient = $computation->compute();
+
+        self::assertInstanceOf(NDArray::class, $gradient);
+        self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes gradient for previous layer directly')]
+    #[DataProvider('backProvider')]
+    public function testGradient(array $expectedGradient) : void
+    {
+        $this->layer->initialize(3);
+
+        // Forward pass to obtain output probabilities
+        $output = $this->layer->forward($this->input);
+
+        // Rebuild expected one-hot matrix the same way as Multiclass::back()
+        $expected = [];
+
+        foreach ($this->labels as $label) {
+            $dist = [];
+
+            foreach (['hot', 'cold', 'ice cold'] as $class) {
+                $dist[] = $class === $label ? 1.0 : 0.0;
+            }
+
+            $expected[] = $dist;
+        }
+
+        $expectedNd = NumPower::array($expected);
+
+        $gradient = $this->layer->gradient($this->input, $output, $expectedNd);
+
+        self::assertEqualsWithDelta($expectedGradient, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes infer softmax probabilities')]
+    #[DataProvider('inferProvider')]
+    public function testInfer(array $expected) : void
+    {
+        $this->layer->initialize(3);
+
+        $infer = $this->layer->infer($this->input);
+
+        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('It returns correct string representation')]
+    public function testToStringReturnsCorrectValue() : void
+    {
+        $expected = 'Multiclass (cost function: Cross Entropy)';
+
+        self::assertSame($expected, (string) $this->layer);
+    }
+}

From a6c634f1493763c529d3592a876251030e03fffe Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 19 Dec 2025 16:51:29 +0200
Subject: [PATCH 16/36] ML-392 Added `Noise` output layer with complete
 interface, forward/inference/backward passes, unit tests

---
 docs/neural-network/hidden-layers/noise.md |   6 +-
 src/NeuralNet/Layers/Noise/Noise.php       | 157 ++++++++++++++++
 tests/NeuralNet/Layers/Noise/NoiseTest.php | 208 +++++++++++++++++++++
 3 files changed, 368 insertions(+), 3 deletions(-)
 create mode 100644 src/NeuralNet/Layers/Noise/Noise.php
 create mode 100644 tests/NeuralNet/Layers/Noise/NoiseTest.php

diff --git a/docs/neural-network/hidden-layers/noise.md b/docs/neural-network/hidden-layers/noise.md
index 7979549af..4d29732cb 100644
--- a/docs/neural-network/hidden-layers/noise.md
+++ b/docs/neural-network/hidden-layers/noise.md
@@ -1,4 +1,4 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/Noise.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/Noise/Noise.php">[source]</a></span>
 
 # Noise
 This layer adds random Gaussian noise to the inputs with a user-defined standard deviation. Noise added to neural network activations acts as a regularizer by indirectly adding a penalty to the weights through the cost function in the output layer.
@@ -10,10 +10,10 @@ This layer adds random Gaussian noise to the inputs with a user-defined standard
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Layers\Noise;
+use Rubix\ML\NeuralNet\Layers\Noise\Noise;
 
 $layer = new Noise(1e-3);
 ```
 
 ## References
-[^1]: C. Gulcehre et al. (2016). Noisy Activation Functions.
\ No newline at end of file
+[^1]: C. Gulcehre et al. (2016). Noisy Activation Functions.
diff --git a/src/NeuralNet/Layers/Noise/Noise.php b/src/NeuralNet/Layers/Noise/Noise.php
new file mode 100644
index 000000000..934265bb3
--- /dev/null
+++ b/src/NeuralNet/Layers/Noise/Noise.php
@@ -0,0 +1,157 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Noise;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+
+/**
+ * Noise
+ *
+ * This layer adds random Gaussian noise to the inputs to the layer with a
+ * given standard deviation. Noise added to neural network activations acts as
+ * a regularizer by indirectly adding a penalty to the weights through the cost
+ * function in the output layer.
+ *
+ * References:
+ * [1] C. Gulcehre et al. (2016). Noisy Activation Functions.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Noise implements Hidden
+{
+    /**
+     * The amount (standard deviation) of the gaussian noise to add to the inputs.
+     *
+     * @var float
+     */
+    protected float $stdDev;
+
+    /**
+     * The width of the layer.
+     *
+     * @var positive-int|null
+     */
+    protected ?int $width = null;
+
+    /**
+     * @param float $stdDev
+     * @throws InvalidArgumentException
+     */
+    public function __construct(float $stdDev)
+    {
+        if ($stdDev < 0.0) {
+            throw new InvalidArgumentException("Standard deviation must be 0 or greater, $stdDev given.");
+        }
+
+        $this->stdDev = $stdDev;
+    }
+
+    /**
+     * Return the width of the layer.
+     *
+     * @internal
+     *
+     * @throws RuntimeException
+     * @return positive-int
+     */
+    public function width() : int
+    {
+        if ($this->width === null) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        return $this->width;
+    }
+
+    /**
+     * Initialize the layer with the fan in from the previous layer and return
+     * the fan out for this layer.
+     *
+     * @internal
+     *
+     * @param positive-int $fanIn
+     * @return positive-int
+     */
+    public function initialize(int $fanIn) : int
+    {
+        $fanOut = $fanIn;
+
+        $this->width = $fanOut;
+
+        return $fanOut;
+    }
+
+    /**
+     * Compute a forward pass through the layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function forward(NDArray $input) : NDArray
+    {
+        if ($this->width === null) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        if ($this->stdDev === 0.0) {
+            return $input;
+        }
+
+        $shape = $input->shape();
+
+        // Gaussian noise with mean 0 and standard deviation $this->stdDev
+        $noise = NumPower::normal(size: $shape, loc: 0.0, scale: $this->stdDev);
+
+        return NumPower::add($input, $noise);
+    }
+
+    /**
+     * Compute an inferential pass through the layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function infer(NDArray $input) : NDArray
+    {
+        return $input;
+    }
+
+    /**
+     * Calculate the gradients of the layer and update the parameters.
+     *
+     * @internal
+     *
+     * @param Deferred $prevGradient
+     * @param Optimizer $optimizer
+     * @return Deferred
+     */
+    public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
+    {
+        return $prevGradient;
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Noise (std dev: {$this->stdDev})";
+    }
+}
diff --git a/tests/NeuralNet/Layers/Noise/NoiseTest.php b/tests/NeuralNet/Layers/Noise/NoiseTest.php
new file mode 100644
index 000000000..4eaf11770
--- /dev/null
+++ b/tests/NeuralNet/Layers/Noise/NoiseTest.php
@@ -0,0 +1,208 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Layers\Noise;
+
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use Rubix\ML\Exceptions\RuntimeException;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Layers\Noise\Noise;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Layers')]
+#[CoversClass(Noise::class)]
+class NoiseTest extends TestCase
+{
+    /**
+     * @var positive-int
+     */
+    protected int $fanIn;
+
+    protected NDArray $input;
+
+    protected Deferred $prevGrad;
+
+    protected Optimizer $optimizer;
+
+    protected Noise $layer;
+
+    protected function setUp() : void
+    {
+        $this->fanIn = 3;
+
+        $this->input = NumPower::array([
+            [1.0, 2.5, -0.1],
+            [0.1, 0.0, 3.0],
+            [0.002, -6.0, -0.5],
+        ]);
+
+        $this->prevGrad = new Deferred(fn: function () : NDArray {
+            return NumPower::array([
+                [0.25, 0.7, 0.1],
+                [0.50, 0.2, 0.01],
+                [0.25, 0.1, 0.89],
+            ]);
+        });
+
+        $this->optimizer = new Stochastic(0.001);
+
+        $this->layer = new Noise(0.1);
+    }
+
+    /**
+     * @return array<int, array{0: array<int, array<int, float>>}>
+     */
+    public static function backProvider() : array
+    {
+        return [
+            [
+                [
+                    [0.25, 0.7, 0.1],
+                    [0.5, 0.2, 0.01],
+                    [0.25, 0.1, 0.89],
+                ],
+            ],
+        ];
+    }
+
+    /**
+     * @return array<int, array{0: array<int, array<int, float>>}>
+     */
+    public static function inferProvider() : array
+    {
+        return [
+            [
+                [
+                    [1.0, 2.5, -0.1],
+                    [0.1, 0.0, 3.0],
+                    [0.002, -6.0, -0.5],
+                ],
+            ],
+        ];
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('Noise (std dev: 0.1)', (string) $this->layer);
+    }
+
+    #[Test]
+    #[TestDox('Constructor rejects invalid standard deviation')]
+    public function testConstructorRejectsInvalidStdDev() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        // Negative std dev should be rejected
+        new Noise(-0.1);
+    }
+
+    #[Test]
+    #[TestDox('Forward throws if layer is not initialized')]
+    public function testForwardThrowsIfNotInitialized() : void
+    {
+        $layer = new Noise(0.1);
+
+        $this->expectException(RuntimeException::class);
+
+        $layer->forward($this->input);
+    }
+
+    #[Test]
+    #[TestDox('Initializes width equal to fan-in')]
+    public function testInitializeSetsWidth() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        self::assertEquals($this->fanIn, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Computes forward pass that adds Gaussian noise with correct shape and scale')]
+    public function testForwardAddsNoiseWithCorrectProperties() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $forward = $this->layer->forward($this->input);
+
+        self::assertInstanceOf(NDArray::class, $forward);
+
+        $inputArray = $this->input->toArray();
+        $forwardArray = $forward->toArray();
+
+        // 1) Shape is preserved
+        self::assertSameSize($inputArray, $forwardArray);
+
+        // 2) At least one element differs (very high probability)
+        $allEqual = true;
+        foreach ($inputArray as $i => $row) {
+            if ($row !== $forwardArray[$i]) {
+                $allEqual = false;
+                break;
+            }
+        }
+        self::assertFalse($allEqual, 'Expected forward output to differ from input due to noise.');
+
+        // 3) Empirical std dev of (forward - input) is ~ stdDev, within tolerance
+        $diffs = [];
+        foreach ($inputArray as $i => $row) {
+            foreach ($row as $j => $v) {
+                $diffs[] = $forwardArray[$i][$j] - $v;
+            }
+        }
+
+        $n = count($diffs);
+        $mean = array_sum($diffs) / $n;
+
+        $var = 0.0;
+        foreach ($diffs as $d) {
+            $var += ($d - $mean) * ($d - $mean);
+        }
+        $var /= $n;
+        $std = sqrt($var);
+
+        // Mean of noise should be near 0, std near $this->stdDev
+        self::assertEqualsWithDelta(0.0, $mean, 2e-1);   // +/-0.2 around 0
+        self::assertEqualsWithDelta(0.1, $std, 1e-1);    // +/-0.1 around 0.1
+    }
+
+    #[Test]
+    #[TestDox('Backpropagates and returns previous gradient unchanged')]
+    #[DataProvider('backProvider')]
+    public function testBackReturnsPrevGradient(array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+        $this->layer->forward($this->input);
+
+        $gradient = $this->layer->back(
+            prevGradient: $this->prevGrad,
+            optimizer: $this->optimizer
+        )->compute();
+
+        self::assertInstanceOf(NDArray::class, $gradient);
+        self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Infer returns input unchanged')]
+    #[DataProvider('inferProvider')]
+    public function testInferIdentity(array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $infer = $this->layer->infer($this->input);
+
+        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+    }
+}

From 7013a569df2ce38573027d626d39cbf0b11c095d Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 19 Dec 2025 17:58:38 +0200
Subject: [PATCH 17/36] ML-392 Added `Placeholder1D` output layer with complete
 interface, forward/inference/backward passes, unit tests

---
 .../hidden-layers/placeholder1d.md            |  17 +++
 src/NeuralNet/Layers/Base/Contracts/Input.php |  18 +++
 .../Layers/Placeholder1D/Placeholder1D.php    | 108 +++++++++++++++++
 .../Placeholder1D/Placeholder1DTest.php       | 114 ++++++++++++++++++
 4 files changed, 257 insertions(+)
 create mode 100644 docs/neural-network/hidden-layers/placeholder1d.md
 create mode 100644 src/NeuralNet/Layers/Base/Contracts/Input.php
 create mode 100644 src/NeuralNet/Layers/Placeholder1D/Placeholder1D.php
 create mode 100644 tests/NeuralNet/Layers/Placeholder1D/Placeholder1DTest.php

diff --git a/docs/neural-network/hidden-layers/placeholder1d.md b/docs/neural-network/hidden-layers/placeholder1d.md
new file mode 100644
index 000000000..f70575eee
--- /dev/null
+++ b/docs/neural-network/hidden-layers/placeholder1d.md
@@ -0,0 +1,17 @@
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/Placeholder1D/Placeholder1D.php">[source]</a></span>
+
+# Placeholder 1D
+
+The Placeholder 1D input layer represents the future input values of a mini batch (matrix) of single dimensional tensors (vectors) to the neural network. It performs shape validation on the input and then forwards it unchanged to the next layer.
+
+## Parameters
+| # | Name | Default | Type | Description |
+|---|---|---|---|---|
+| 1 | inputs | | int | The number of input nodes (features). |
+
+## Example
+```php
+use Rubix\ML\NeuralNet\Layers\Placeholder1D\Placeholder1D;
+
+$layer = new Placeholder1D(10);
+```
diff --git a/src/NeuralNet/Layers/Base/Contracts/Input.php b/src/NeuralNet/Layers/Base/Contracts/Input.php
new file mode 100644
index 000000000..f0d755253
--- /dev/null
+++ b/src/NeuralNet/Layers/Base/Contracts/Input.php
@@ -0,0 +1,18 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Base\Contracts;
+
+/**
+ * Input
+ *
+ * @internal
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+interface Input extends Layer
+{
+    //
+}
diff --git a/src/NeuralNet/Layers/Placeholder1D/Placeholder1D.php b/src/NeuralNet/Layers/Placeholder1D/Placeholder1D.php
new file mode 100644
index 000000000..45f8fc49d
--- /dev/null
+++ b/src/NeuralNet/Layers/Placeholder1D/Placeholder1D.php
@@ -0,0 +1,108 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Placeholder1D;
+
+use NDArray;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Input;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+
+/**
+ * Placeholder 1D
+ *
+ * The Placeholder 1D input layer represents the *future* input values of a mini
+ * batch (matrix) of single dimensional tensors (vectors) to the neural network.
+ *
+ * @internal
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Placeholder1D implements Input
+{
+    /**
+     * The number of input nodes. i.e. feature inputs.
+     *
+     * @var positive-int
+     */
+    protected int $inputs;
+
+    /**
+     * @param int $inputs
+     * @throws InvalidArgumentException
+     */
+    public function __construct(int $inputs)
+    {
+        if ($inputs < 1) {
+            throw new InvalidArgumentException("Number of input nodes must be greater than 0, $inputs given.");
+        }
+
+        $this->inputs = $inputs;
+    }
+
+    /**
+     * @return positive-int
+     */
+    public function width() : int
+    {
+        return $this->inputs;
+    }
+
+    /**
+     * Initialize the layer with the fan in from the previous layer and return
+     * the fan out for this layer.
+     *
+     * @param positive-int $fanIn
+     * @return positive-int
+     */
+    public function initialize(int $fanIn) : int
+    {
+        return $this->inputs;
+    }
+
+    /**
+     * Compute a forward pass through the layer.
+     *
+     * @param NDArray $input
+     * @throws InvalidArgumentException
+     * @return NDArray
+     */
+    public function forward(NDArray $input) : NDArray
+    {
+        $shape = $input->shape();
+
+        if (empty($shape) || $shape[0] !== $this->inputs) {
+            $features = $shape[0] ?? 0;
+
+            throw new InvalidArgumentException(
+                'The number of features and input nodes must be equal,'
+                . " {$this->inputs} expected but {$features} given.");
+        }
+
+        return $input;
+    }
+
+    /**
+     * Compute an inferential pass through the layer.
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function infer(NDArray $input) : NDArray
+    {
+        return $this->forward($input);
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Placeholder 1D (inputs: {$this->inputs})";
+    }
+}
diff --git a/tests/NeuralNet/Layers/Placeholder1D/Placeholder1DTest.php b/tests/NeuralNet/Layers/Placeholder1D/Placeholder1DTest.php
new file mode 100644
index 000000000..7aa3168c8
--- /dev/null
+++ b/tests/NeuralNet/Layers/Placeholder1D/Placeholder1DTest.php
@@ -0,0 +1,114 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace Rubix\ML\Tests\NeuralNet\Layers\Placeholder1D;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use NDArray;
+use NumPower;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Layers\Placeholder1D\Placeholder1D;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Layers')]
+#[CoversClass(Placeholder1D::class)]
+class Placeholder1DTest extends TestCase
+{
+    protected NDArray $input;
+
+    protected Placeholder1D $layer;
+
+    protected function setUp() : void
+    {
+        $this->input = NumPower::array([
+            [1.0, 2.5],
+            [0.1, 0.0],
+            [0.002, -6.0],
+        ]);
+
+        $this->layer = new Placeholder1D(3);
+    }
+
+    /**
+     * @return array<int, array{NDArray,array<int, array<int, float>>}>
+     */
+    public static function inputProvider() : array
+    {
+        return [
+            [
+                NumPower::array([
+                    [1.0, 2.5],
+                    [0.1, 0.0],
+                    [0.002, -6.0],
+                ]),
+                [
+                    [1.0, 2.5],
+                    [0.1, 0.0],
+                    [0.002, -6.0],
+                ],
+            ],
+        ];
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('Placeholder 1D (inputs: 3)', (string) $this->layer);
+    }
+
+    #[Test]
+    #[TestDox('Returns width equal to number of inputs')]
+    public function testWidth() : void
+    {
+        self::assertEquals(3, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Constructor rejects invalid number of inputs')]
+    public function testConstructorRejectsInvalidInputs() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Placeholder1D(0);
+    }
+
+    #[Test]
+    #[TestDox('Initialize returns fan out equal to inputs without changing width')]
+    public function testInitialize() : void
+    {
+        $fanOut = $this->layer->initialize(5);
+
+        self::assertEquals(3, $fanOut);
+        self::assertEquals(3, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Computes forward pass')]
+    #[DataProvider('inputProvider')]
+    public function testForward(NDArray $input, array $expected) : void
+    {
+        self::assertEquals(3, $this->layer->width());
+
+        $forward = $this->layer->forward($input);
+
+        self::assertEqualsWithDelta($expected, $forward->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes inference pass')]
+    #[DataProvider('inputProvider')]
+    public function testInfer(NDArray $input, array $expected) : void
+    {
+        self::assertEquals(3, $this->layer->width());
+
+        $infer = $this->layer->infer($input);
+
+        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+    }
+}

From 08ad6b4e3b0faba8e697453cfe64c18457ed6053 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 19 Dec 2025 18:12:45 +0200
Subject: [PATCH 18/36] ML-392 Fixed wrong exception for AssertsShapes and
 exception texts

---
 src/NeuralNet/Initializers/He/HeNormal.php            | 2 +-
 src/NeuralNet/Initializers/LeCun/LeCunNormal.php      | 2 +-
 src/NeuralNet/Initializers/Normal/Normal.php          | 2 +-
 src/NeuralNet/Initializers/Normal/TruncatedNormal.php | 2 +-
 src/NeuralNet/Initializers/Xavier/XavierNormal.php    | 2 +-
 src/Traits/AssertsShapes.php                          | 4 ++--
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/NeuralNet/Initializers/He/HeNormal.php b/src/NeuralNet/Initializers/He/HeNormal.php
index 3d68844e4..193c7ff16 100644
--- a/src/NeuralNet/Initializers/He/HeNormal.php
+++ b/src/NeuralNet/Initializers/He/HeNormal.php
@@ -35,7 +35,7 @@ public function initialize(int $fanIn, int $fanOut) : NDArray
 
         $stdDev = sqrt(2 / $fanOut);
 
-        return NumPower::truncatedNormal(size: [$fanOut, $fanIn], scale: $stdDev);
+        return NumPower::truncatedNormal(size: [$fanOut, $fanIn], loc: 0.0, scale: $stdDev);
     }
 
     /**
diff --git a/src/NeuralNet/Initializers/LeCun/LeCunNormal.php b/src/NeuralNet/Initializers/LeCun/LeCunNormal.php
index 81d8add56..3fc5832bc 100644
--- a/src/NeuralNet/Initializers/LeCun/LeCunNormal.php
+++ b/src/NeuralNet/Initializers/LeCun/LeCunNormal.php
@@ -36,7 +36,7 @@ public function initialize(int $fanIn, int $fanOut) : NDArray
 
         $stdDev = sqrt(1 / $fanOut);
 
-        return NumPower::truncatedNormal(size: [$fanOut, $fanIn], scale: $stdDev);
+        return NumPower::truncatedNormal(size: [$fanOut, $fanIn], loc: 0.0, scale: $stdDev);
     }
 
     /**
diff --git a/src/NeuralNet/Initializers/Normal/Normal.php b/src/NeuralNet/Initializers/Normal/Normal.php
index 08c77ff38..acb4ad050 100644
--- a/src/NeuralNet/Initializers/Normal/Normal.php
+++ b/src/NeuralNet/Initializers/Normal/Normal.php
@@ -43,7 +43,7 @@ public function initialize(int $fanIn, int $fanOut) : NDArray
     {
         $this->validateFanInFanOut(fanIn: $fanIn, fanOut: $fanOut);
 
-        return NumPower::normal(size: [$fanOut, $fanIn], scale: $this->stdDev);
+        return NumPower::normal(size: [$fanOut, $fanIn], loc: 0.0, scale: $this->stdDev);
     }
 
     /**
diff --git a/src/NeuralNet/Initializers/Normal/TruncatedNormal.php b/src/NeuralNet/Initializers/Normal/TruncatedNormal.php
index c0c90196d..af9ed43fe 100644
--- a/src/NeuralNet/Initializers/Normal/TruncatedNormal.php
+++ b/src/NeuralNet/Initializers/Normal/TruncatedNormal.php
@@ -44,7 +44,7 @@ public function initialize(int $fanIn, int $fanOut) : NDArray
     {
         $this->validateFanInFanOut(fanIn: $fanIn, fanOut: $fanOut);
 
-        return NumPower::truncatedNormal(size: [$fanOut, $fanIn], scale: $this->stdDev);
+        return NumPower::truncatedNormal(size: [$fanOut, $fanIn], loc: 0.0, scale: $this->stdDev);
     }
 
     /**
diff --git a/src/NeuralNet/Initializers/Xavier/XavierNormal.php b/src/NeuralNet/Initializers/Xavier/XavierNormal.php
index dfe5bc956..428c74e49 100644
--- a/src/NeuralNet/Initializers/Xavier/XavierNormal.php
+++ b/src/NeuralNet/Initializers/Xavier/XavierNormal.php
@@ -36,7 +36,7 @@ public function initialize(int $fanIn, int $fanOut) : NDArray
 
         $stdDev = sqrt(2 / ($fanOut + $fanIn));
 
-        return NumPower::truncatedNormal(size: [$fanOut, $fanIn], scale: $stdDev);
+        return NumPower::truncatedNormal(size: [$fanOut, $fanIn], loc: 0.0, scale: $stdDev);
     }
 
     /**
diff --git a/src/Traits/AssertsShapes.php b/src/Traits/AssertsShapes.php
index 7fabc316f..88fe23c1e 100644
--- a/src/Traits/AssertsShapes.php
+++ b/src/Traits/AssertsShapes.php
@@ -4,7 +4,7 @@
 
 namespace Rubix\ML\Traits;
 
-use InvalidArgumentException;
+use Rubix\ML\Exceptions\InvalidArgumentException;
 use NDArray;
 
 /**
@@ -29,7 +29,7 @@ trait AssertsShapes
     protected function assertSameShape(NDArray $output, NDArray $target) : void
     {
         if ($output->shape() !== $target->shape()) {
-            throw new InvalidArgumentException('Output and target must have identical shapes.');
+            throw new InvalidArgumentException('Output and target must have the same shape.');
         }
     }
 }

From ce99147475683eac03624289ff85627ac6c6695c Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 19 Dec 2025 18:23:08 +0200
Subject: [PATCH 19/36] ML-392 Increased memory for tests

---
 phpunit.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/phpunit.xml b/phpunit.xml
index 22063bc22..379cdc0a2 100644
--- a/phpunit.xml
+++ b/phpunit.xml
@@ -83,5 +83,6 @@
   </testsuites>
   <php>
     <env name="ENV" value="testing"/>
+    <ini name="memory_limit" value="256M"/>
   </php>
 </phpunit>

From 7628fecbe13408f2914163288fc8051a96e79ea8 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Mon, 22 Dec 2025 20:14:49 +0200
Subject: [PATCH 20/36] ML-392 Added `PReLU` output layer with complete
 interface and unit tests

---
 docs/neural-network/hidden-layers/prelu.md |   6 +-
 src/NeuralNet/Layers/PReLU/PReLU.php       | 287 ++++++++++++++++++++
 tests/NeuralNet/Layers/PReLU/PReLUTest.php | 291 +++++++++++++++++++++
 3 files changed, 581 insertions(+), 3 deletions(-)
 create mode 100644 src/NeuralNet/Layers/PReLU/PReLU.php
 create mode 100644 tests/NeuralNet/Layers/PReLU/PReLUTest.php

diff --git a/docs/neural-network/hidden-layers/prelu.md b/docs/neural-network/hidden-layers/prelu.md
index baaef2f32..22a5b4762 100644
--- a/docs/neural-network/hidden-layers/prelu.md
+++ b/docs/neural-network/hidden-layers/prelu.md
@@ -1,4 +1,4 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/PReLU.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/PReLU/PReLU.php">[source]</a></span>
 
 # PReLU
 Parametric Rectified Linear Units are leaky rectifiers whose *leakage* coefficient is learned during training. Unlike standard [Leaky ReLUs](../activation-functions/leaky-relu.md) whose leakage remains constant, PReLU layers can adjust the leakage to better suite the model on a per node basis.
@@ -14,8 +14,8 @@ $$
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Layers\PReLU;
-use Rubix\ML\NeuralNet\Initializers\Normal;
+use Rubix\ML\NeuralNet\Layers\PReLU\PReLU;
+use Rubix\ML\NeuralNet\Initializers\Normal\Normal;
 
 $layer = new PReLU(new Normal(0.5));
 ```
diff --git a/src/NeuralNet/Layers/PReLU/PReLU.php b/src/NeuralNet/Layers/PReLU/PReLU.php
new file mode 100644
index 000000000..a8986cce4
--- /dev/null
+++ b/src/NeuralNet/Layers/PReLU/PReLU.php
@@ -0,0 +1,287 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\PReLU;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\Deferred;
+use Rubix\ML\Exceptions\RuntimeException;
+use Rubix\ML\NeuralNet\Initializers\Base\Initializer;
+use Rubix\ML\NeuralNet\Initializers\Constant\Constant;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Parametric;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Generator;
+
+/**
+ * PReLU
+ *
+ * Parametric Rectified Linear Units are leaky rectifiers whose leakage coefficients
+ * are learned during training.
+ *
+ * References:
+ * [1] K. He et al. (2015). Delving Deep into Rectifiers: Surpassing Human-Level
+ * Performance on ImageNet Classification.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class PReLU implements Hidden, Parametric
+{
+    /**
+     * The initializer of the alpha (leakage) parameter.
+     *
+     * @var Initializer
+     */
+    protected Initializer $initializer;
+
+    /**
+     * The width of the layer.
+     *
+     * @var positive-int|null
+     */
+    protected ?int $width = null;
+
+    /**
+     * The parameterized leakage coefficients.
+     *
+     * @var Parameter|null
+     */
+    protected ?Parameter $alpha = null;
+
+    /**
+     * The memoized input matrix.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $input = null;
+
+    /**
+     * @param Initializer|null $initializer
+     */
+    public function __construct(?Initializer $initializer = null)
+    {
+        $this->initializer = $initializer ?? new Constant(0.25);
+    }
+
+    /**
+     * Return the width of the layer.
+     *
+     * @internal
+     *
+     * @throws RuntimeException
+     * @return positive-int
+     */
+    public function width() : int
+    {
+        if ($this->width === null) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        return $this->width;
+    }
+
+    /**
+     * Initialize the layer with the fan in from the previous layer and return
+     * the fan out for this layer.
+     *
+     * @internal
+     *
+     * @param positive-int $fanIn
+     * @return positive-int
+     */
+    public function initialize(int $fanIn) : int
+    {
+        $fanOut = $fanIn;
+
+        // Initialize alpha as a vector of length fanOut (one alpha per neuron)
+        // Using shape [fanOut, 1] then flattening to [fanOut]
+        $alphaMat = $this->initializer->initialize(1, $fanOut);
+        $alpha = NumPower::flatten($alphaMat);
+
+        $this->width = $fanOut;
+        $this->alpha = new Parameter($alpha);
+
+        return $fanOut;
+    }
+
+    /**
+     * Compute a forward pass through the layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function forward(NDArray $input) : NDArray
+    {
+        $this->input = $input;
+
+        return $this->activate($input);
+    }
+
+    /**
+     * Compute an inferential pass through the layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function infer(NDArray $input) : NDArray
+    {
+        return $this->activate($input);
+    }
+
+    /**
+     * Calculate the gradient and update the parameters of the layer.
+     *
+     * @internal
+     *
+     * @param Deferred $prevGradient
+     * @param Optimizer $optimizer
+     * @throws RuntimeException
+     * @return Deferred
+     */
+    public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
+    {
+        if (!$this->alpha) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        if (!$this->input) {
+            throw new RuntimeException('Must perform forward pass before backpropagating.');
+        }
+
+        /** @var NDArray $dOut */
+        $dOut = $prevGradient();
+
+        // Negative part of the input (values <= 0), used for dL/dalpha
+        $negativeInput = NumPower::minimum($this->input, 0.0);
+
+        $dAlphaFull = NumPower::multiply($dOut, $negativeInput);
+
+        // Sum over the batch axis (axis = 1) to obtain a gradient vector [width]
+        $dAlpha = NumPower::sum($dAlphaFull, axis: 1);
+
+        $this->alpha->update($dAlpha, $optimizer);
+
+        $input = $this->input;
+
+        $this->input = null;
+
+        return new Deferred([$this, 'gradient'], [$input, $dOut]);
+    }
+
+    /**
+     * Calculate the gradient for the previous layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @param NDArray $dOut
+     * @return NDArray
+     */
+    public function gradient(NDArray $input, NDArray $dOut) : NDArray
+    {
+        $derivative = $this->differentiate($input);
+
+        return NumPower::multiply($derivative, $dOut);
+    }
+
+    /**
+     * Return the parameters of the layer.
+     *
+     * @internal
+     *
+     * @throws \RuntimeException
+     * @return Generator<Parameter>
+     */
+    public function parameters() : Generator
+    {
+        if (!$this->alpha) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        yield 'alpha' => $this->alpha;
+    }
+
+    /**
+     * Restore the parameters in the layer from an associative array.
+     *
+     * @internal
+     *
+     * @param Parameter[] $parameters
+     */
+    public function restore(array $parameters) : void
+    {
+        $this->alpha = $parameters['alpha'];
+    }
+
+    /**
+     * Compute the leaky ReLU activation function and return a matrix.
+     *
+     * @param NDArray $input
+     * @throws RuntimeException
+     * @return NDArray
+     */
+    protected function activate(NDArray $input) : NDArray
+    {
+        if (!$this->alpha) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        // Reshape alpha vector [width] to column [width, 1] for broadcasting
+        $alphaCol = NumPower::reshape($this->alpha->param(), [$this->width(), 1]);
+
+        $positiveActivation = NumPower::maximum($input, 0.0);
+
+        $negativeActivation = NumPower::multiply(
+            NumPower::minimum($input, 0.0),
+            $alphaCol,
+        );
+
+        return NumPower::add($positiveActivation, $negativeActivation);
+    }
+
+    /**
+     * Calculate the derivative of the activation function at a given output.
+     *
+     * @param NDArray $input
+     * @throws RuntimeException
+     * @return NDArray
+     */
+    protected function differentiate(NDArray $input) : NDArray
+    {
+        if (!$this->alpha) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        // Reshape alpha vector [width] to column [width, 1] for broadcasting
+        $alphaCol = NumPower::reshape($this->alpha->param(), [$this->width(), 1]);
+
+        $positivePart = NumPower::greater($input, 0.0);
+
+        $negativePart = NumPower::multiply(
+            NumPower::lessEqual($input, 0.0),
+            $alphaCol,
+        );
+
+        return NumPower::add($positivePart, $negativePart);
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "PReLU (initializer: {$this->initializer})";
+    }
+}
diff --git a/tests/NeuralNet/Layers/PReLU/PReLUTest.php b/tests/NeuralNet/Layers/PReLU/PReLUTest.php
new file mode 100644
index 000000000..a1193ea09
--- /dev/null
+++ b/tests/NeuralNet/Layers/PReLU/PReLUTest.php
@@ -0,0 +1,291 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Layers\PReLU;
+
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use Rubix\ML\Deferred;
+use Rubix\ML\NeuralNet\Layers\PReLU\PReLU;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use Rubix\ML\NeuralNet\Initializers\Constant\Constant;
+use Rubix\ML\NeuralNet\Parameters\Parameter as TrainableParameter;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Layers')]
+#[CoversClass(PReLU::class)]
+class PReLUTest extends TestCase
+{
+    protected const int RANDOM_SEED = 0;
+
+    /**
+     * @var positive-int
+     */
+    protected int $fanIn;
+
+    protected NDArray $input;
+
+    protected Deferred $prevGrad;
+
+    protected Optimizer $optimizer;
+
+    protected PReLU $layer;
+
+    protected function setUp() : void
+    {
+        $this->fanIn = 3;
+
+        $this->input = NumPower::array([
+            [1.0, 2.5, -0.1],
+            [0.1, 0.0, 3.0],
+            [0.002, -6.0, -0.5],
+        ]);
+
+        $this->prevGrad = new Deferred(fn: function () : NDArray {
+            return NumPower::array([
+                [0.25, 0.7, 0.1],
+                [0.50, 0.2, 0.01],
+                [0.25, 0.1, 0.89],
+            ]);
+        });
+
+        $this->optimizer = new Stochastic(0.001);
+
+        $this->layer = new PReLU(new Constant(0.25));
+
+        srand(self::RANDOM_SEED);
+    }
+
+    /**
+     * @return array<string, array{0:int}>
+     */
+    public static function initializeProvider() : array
+    {
+        return [
+            'fanIn=3' => [3],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0:array}>
+     */
+    public static function forwardProvider() : array
+    {
+        return [
+            'expectedForward' => [[
+                [1.0, 2.5, -0.025],
+                [0.1, 0.0, 3.0],
+                [0.002, -1.5, -0.125],
+            ]],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0:array}>
+     */
+    public static function backProvider() : array
+    {
+        return [
+            'expectedGradient' => [[
+                [0.25, 0.6999999, 0.0250010],
+                [0.5, 0.05, 0.01],
+                [0.25, 0.0251045, 0.2234300],
+            ]],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0:array}>
+     */
+    public static function gradientProvider() : array
+    {
+        return [
+            'expectedGradient' => [[
+                [0.25, 0.7, 0.025],
+                [0.5, 0.05, 0.01],
+                [0.25, 0.025, 0.2225],
+            ]],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0:array}>
+     */
+    public static function inferProvider() : array
+    {
+        return [
+            'expectedInfer' => [[
+                [1.0, 2.5, -0.0250000],
+                [0.1, 0.0, 3.0],
+                [0.0020000, -1.5, -0.125],
+            ]],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0:array,1:array}>
+     */
+    public static function activateProvider() : array
+    {
+        return [
+            'defaultInput' => [
+                [
+                    [1.0, 2.5, -0.1],
+                    [0.1, 0.0, 3.0],
+                    [0.002, -6.0, -0.5],
+                ],
+                [
+                    [1.0, 2.5, -0.025],
+                    [0.1, 0.0, 3.0],
+                    [0.002, -1.5, -0.125],
+                ],
+            ],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0:array,1:array}>
+     */
+    public static function differentiateProvider() : array
+    {
+        return [
+            'defaultInput' => [
+                [
+                    [1.0, 2.5, -0.1],
+                    [0.1, 0.0, 3.0],
+                    [0.002, -6.0, -0.5],
+                ],
+                [
+                    [1.0, 1.0, 0.25],
+                    [1.0, 0.25, 1.0],
+                    [1.0, 0.25, 0.25],
+                ],
+            ],
+        ];
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('PReLU (initializer: Constant (value: 0.25))', (string) $this->layer);
+    }
+
+    #[Test]
+    #[TestDox('Initializes width equal to fan-in')]
+    public function testInitializeSetsWidth() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        self::assertEquals($this->fanIn, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Initializes and returns fan out equal to fan-in')]
+    #[DataProvider('initializeProvider')]
+    public function testInitializeReturnsFanOut(int $fanIn) : void
+    {
+        $fanOut = $this->layer->initialize($fanIn);
+
+        self::assertEquals($fanIn, $fanOut);
+        self::assertEquals($fanIn, $this->layer->width());
+    }
+
+    #[Test]
+    #[TestDox('Computes forward activations')]
+    #[DataProvider('forwardProvider')]
+    public function testForward(array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $forward = $this->layer->forward($this->input);
+
+        self::assertEqualsWithDelta($expected, $forward->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Backpropagates and returns gradient for previous layer')]
+    #[DataProvider('backProvider')]
+    public function testBack(array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        // Forward pass to set internal input state
+        $this->layer->forward($this->input);
+
+        $gradient = $this->layer->back(
+            prevGradient: $this->prevGrad,
+            optimizer: $this->optimizer
+        )->compute();
+
+        self::assertInstanceOf(NDArray::class, $gradient);
+        self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes gradient for previous layer directly')]
+    #[DataProvider('gradientProvider')]
+    public function testGradient(array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $gradient = $this->layer->gradient(
+            $this->input,
+            ($this->prevGrad)(),
+        );
+
+        self::assertEqualsWithDelta($expected, $gradient->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Computes inference activations')]
+    #[DataProvider('inferProvider')]
+    public function testInfer(array $expected) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $infer = $this->layer->infer($this->input);
+
+        self::assertEqualsWithDelta($expected, $infer->toArray(), 1e-7);
+    }
+
+    #[Test]
+    #[TestDox('Yields trainable alpha parameter')]
+    public function testParameters() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $params = iterator_to_array($this->layer->parameters());
+
+        self::assertArrayHasKey('alpha', $params);
+        self::assertInstanceOf(TrainableParameter::class, $params['alpha']);
+    }
+
+    #[Test]
+    #[TestDox('Restores alpha parameter from array')]
+    public function testRestore() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $alphaNew = new TrainableParameter(NumPower::full([$this->fanIn], 0.5));
+
+        $this->layer->restore([
+            'alpha' => $alphaNew,
+        ]);
+
+        $restored = iterator_to_array($this->layer->parameters());
+
+        self::assertSame($alphaNew, $restored['alpha']);
+        self::assertEquals(
+            array_fill(0, $this->fanIn, 0.5),
+            $restored['alpha']->param()->toArray(),
+        );
+    }
+}

From f83fed6a592247c122db35a997e990cb4e7775fc Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Wed, 24 Dec 2025 20:06:37 +0200
Subject: [PATCH 21/36] ML-392 Added `Swish` output layer with complete
 interface and unit tests

---
 docs/neural-network/hidden-layers/swish.md |   6 +-
 src/NeuralNet/Layers/Swish/Swish.php       | 303 +++++++++++++++++++++
 tests/NeuralNet/Layers/Swish/SwishTest.php | 202 ++++++++++++++
 3 files changed, 508 insertions(+), 3 deletions(-)
 create mode 100644 src/NeuralNet/Layers/Swish/Swish.php
 create mode 100644 tests/NeuralNet/Layers/Swish/SwishTest.php

diff --git a/docs/neural-network/hidden-layers/swish.md b/docs/neural-network/hidden-layers/swish.md
index e91138566..29e6677f7 100644
--- a/docs/neural-network/hidden-layers/swish.md
+++ b/docs/neural-network/hidden-layers/swish.md
@@ -1,4 +1,4 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/Swish.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Layers/Swish/Swish.php">[source]</a></span>
 
 # Swish
 Swish is a parametric activation layer that utilizes smooth rectified activation functions. The trainable *beta* parameter allows each activation function in the layer to tailor its output to the training set by interpolating between the linear function and ReLU.
@@ -10,8 +10,8 @@ Swish is a parametric activation layer that utilizes smooth rectified activation
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Layers\Swish;
-use Rubix\ML\NeuralNet\Initializers\Constant;
+use Rubix\ML\NeuralNet\Layers\Swish\Swish;
+use Rubix\ML\NeuralNet\Initializers\Constant\Constant;
 
 $layer = new Swish(new Constant(1.0));
 ```
diff --git a/src/NeuralNet/Layers/Swish/Swish.php b/src/NeuralNet/Layers/Swish/Swish.php
new file mode 100644
index 000000000..fcb00fa44
--- /dev/null
+++ b/src/NeuralNet/Layers/Swish/Swish.php
@@ -0,0 +1,303 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Layers\Swish;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\Deferred;
+use Rubix\ML\Exceptions\RuntimeException;
+use Rubix\ML\NeuralNet\ActivationFunctions\Sigmoid\Sigmoid;
+use Rubix\ML\NeuralNet\Initializers\Base\Initializer;
+use Rubix\ML\NeuralNet\Initializers\Constant\Constant;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Parametric;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Generator;
+
+/**
+ * Swish
+ *
+ * Swish is a parametric activation layer that utilizes smooth rectified activation functions. The trainable
+ * *beta* parameter allows each activation function in the layer to tailor its output to the training set by
+ * interpolating between the linear function and ReLU.
+ *
+ * [1] P. Ramachandran et al. (2017). Swish: A Self-gated Activation Function.
+ * [2] P. Ramachandran et al. (2017). Searching for Activation Functions.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Swish implements Hidden, Parametric
+{
+    /**
+     * The initializer of the beta parameter.
+     *
+     * @var Initializer
+     */
+    protected Initializer $initializer;
+
+    /**
+     * The sigmoid activation function.
+     *
+     * @var Sigmoid
+     */
+    protected Sigmoid $sigmoid;
+
+    /**
+     * The width of the layer.
+     *
+     * @var positive-int|null
+     */
+    protected ?int $width = null;
+
+    /**
+     * The parameterized scaling factors.
+     *
+     * @var Parameter|null
+     */
+    protected ?Parameter $beta = null;
+
+    /**
+     * The memoized input matrix.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $input = null;
+
+    /**
+     * The memorized activation matrix.
+     *
+     * @var NDArray|null
+     */
+    protected ?NDArray $output = null;
+
+    /**
+     * @param Initializer|null $initializer
+     */
+    public function __construct(?Initializer $initializer = null)
+    {
+        $this->initializer = $initializer ?? new Constant(1.0);
+        $this->sigmoid = new Sigmoid();
+    }
+
+    /**
+     * Return the width of the layer.
+     *
+     * @internal
+     *
+     * @throws RuntimeException
+     * @return positive-int
+     */
+    public function width() : int
+    {
+        if ($this->width === null) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        return $this->width;
+    }
+
+    /**
+     * Initialize the layer with the fan in from the previous layer and return
+     * the fan out for this layer.
+     *
+     * @internal
+     *
+     * @param positive-int $fanIn
+     * @return positive-int
+     */
+    public function initialize(int $fanIn) : int
+    {
+        $fanOut = $fanIn;
+
+        // Initialize beta as a vector of length fanOut (one beta per neuron)
+        // Using shape [fanOut, 1] then flattening to [fanOut]
+        $betaMat = $this->initializer->initialize(1, $fanOut);
+        $beta = NumPower::flatten($betaMat);
+
+        $this->width = $fanOut;
+        $this->beta = new Parameter($beta);
+
+        return $fanOut;
+    }
+
+    /**
+     * Compute a forward pass through the layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function forward(NDArray $input) : NDArray
+    {
+        $this->input = $input;
+
+        $this->output = $this->activate($input);
+
+        return $this->output;
+    }
+
+    /**
+     * Compute an inferential pass through the layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function infer(NDArray $input) : NDArray
+    {
+        return $this->activate($input);
+    }
+
+    /**
+     * Calculate the gradient and update the parameters of the layer.
+     *
+     * @internal
+     *
+     * @param Deferred $prevGradient
+     * @param Optimizer $optimizer
+     * @throws RuntimeException
+     * @return Deferred
+     */
+    public function back(Deferred $prevGradient, Optimizer $optimizer) : Deferred
+    {
+        if (!$this->beta) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        if (!$this->input or !$this->output) {
+            throw new RuntimeException('Must perform forward pass before backpropagating.');
+        }
+
+        /** @var NDArray $dOut */
+        $dOut = $prevGradient();
+
+        // Gradient of the loss with respect to beta
+        // dL/dbeta = sum_over_batch(dL/dy * dy/dbeta)
+        // Here we use a simplified formulation: dL/dbeta ~ sum(dOut * input)
+        $dBetaFull = NumPower::multiply($dOut, $this->input);
+
+        // Sum over the batch axis (axis = 1) to obtain a gradient vector [width]
+        $dBeta = NumPower::sum($dBetaFull, axis: 1);
+
+        $this->beta->update($dBeta, $optimizer);
+
+        $input = $this->input;
+        $output = $this->output;
+
+        $this->input = $this->output = null;
+
+        return new Deferred([$this, 'gradient'], [$input, $output, $dOut]);
+    }
+
+    /**
+     * Calculate the gradient for the previous layer.
+     *
+     * @internal
+     *
+     * @param NDArray $input
+     * @param NDArray $output
+     * @param NDArray $dOut
+     * @return NDArray
+     */
+    public function gradient(NDArray $input, NDArray $output, NDArray $dOut) : NDArray
+    {
+        $derivative = $this->differentiate($input, $output);
+
+        return NumPower::multiply($derivative, $dOut);
+    }
+
+    /**
+     * Return the parameters of the layer.
+     *
+     * @internal
+     *
+     * @throws \RuntimeException
+     * @return Generator<Parameter>
+     */
+    public function parameters() : Generator
+    {
+        if (!$this->beta) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        yield 'beta' => $this->beta;
+    }
+
+    /**
+     * Restore the parameters in the layer from an associative array.
+     *
+     * @internal
+     *
+     * @param Parameter[] $parameters
+     */
+    public function restore(array $parameters) : void
+    {
+        $this->beta = $parameters['beta'];
+    }
+
+    /**
+     * Compute the Swish activation function and return a matrix.
+     *
+     * @param NDArray $input
+     * @throws RuntimeException
+     * @return NDArray
+     */
+    protected function activate(NDArray $input) : NDArray
+    {
+        if (!$this->beta) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        // Reshape beta vector [width] to column [width, 1] for broadcasting
+        $betaCol = NumPower::reshape($this->beta->param(), [$this->width(), 1]);
+
+        $zHat = NumPower::multiply($betaCol, $input);
+
+        $activated = $this->sigmoid->activate($zHat);
+
+        return NumPower::multiply($activated, $input);
+    }
+
+    /**
+     * Calculate the derivative of the activation function at a given output.
+     *
+     * @param NDArray $input
+     * @param NDArray $output
+     * @throws RuntimeException
+     * @return NDArray
+     */
+    protected function differentiate(NDArray $input, NDArray $output) : NDArray
+    {
+        if (!$this->beta) {
+            throw new RuntimeException('Layer has not been initialized.');
+        }
+
+        // Original formulation:
+        // derivative = (output / input) * (1 - output) + output
+        // Implemented using NumPower operations to avoid explicit ones matrix.
+        $term1 = NumPower::divide($output, $input);
+        $oneMinusOutput = NumPower::subtract(1.0, $output);
+
+        $product = NumPower::multiply($term1, $oneMinusOutput);
+
+        return NumPower::add($product, $output);
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Swish (initializer: {$this->initializer})";
+    }
+}
diff --git a/tests/NeuralNet/Layers/Swish/SwishTest.php b/tests/NeuralNet/Layers/Swish/SwishTest.php
new file mode 100644
index 000000000..5f8d55503
--- /dev/null
+++ b/tests/NeuralNet/Layers/Swish/SwishTest.php
@@ -0,0 +1,202 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Layers\Swish;
+
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\TestCase;
+use Rubix\ML\Deferred;
+use Rubix\ML\Exceptions\RuntimeException;
+use Rubix\ML\NeuralNet\Initializers\Constant\Constant;
+use Rubix\ML\NeuralNet\Layers\Swish\Swish;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+
+#[Group('Layers')]
+#[CoversClass(Swish::class)]
+class SwishTest extends TestCase
+{
+    /**
+     * @var positive-int
+     */
+    protected int $fanIn;
+
+    protected NDArray $input;
+
+    protected Deferred $prevGrad;
+
+    protected Optimizer $optimizer;
+
+    protected Swish $layer;
+
+    protected function setUp() : void
+    {
+        $this->fanIn = 3;
+
+        $this->input = NumPower::array([
+            [1.0, 2.5, -0.1],
+            [0.1, 0.1, 3.0],
+            [0.002, -6.0, -0.5],
+        ]);
+
+        $this->prevGrad = new Deferred(fn: function () : NDArray {
+            return NumPower::array([
+                [0.25, 0.7, 0.1],
+                [0.50, 0.2, 0.01],
+                [0.25, 0.1, 0.89],
+            ]);
+        });
+
+        $this->optimizer = new Stochastic(0.001);
+
+        $this->layer = new Swish(new Constant(1.0));
+    }
+
+    /**
+     * @return array<int, array<string, array<int, array<int, float>>>>
+     */
+    public static function initializeForwardBackInferProvider() : array
+    {
+        return [
+            [
+                'forwardExpected' => [
+                    [0.7310585, 2.3103545, -0.0475020],
+                    [0.0524979, 0.0524979, 2.8577223],
+                    [0.0010009, -0.0148357, -0.1887703],
+                ],
+                'backExpected' => [
+                    [0.2319176, 0.7695808, 0.0450083],
+                    [0.2749583, 0.1099833, 0.0108810],
+                    [0.1252499, -0.0012326, 0.2314345],
+                ],
+                'inferExpected' => [
+                    [0.7306671, 2.3094806, -0.0475070],
+                    [0.0524976, 0.0524976, 2.8576817],
+                    [0.0010010, -0.0147432, -0.1887089],
+                ],
+            ],
+        ];
+    }
+
+    /**
+     * @return array<string, array{0: float, 1: string}>
+     */
+    public static function toStringProvider() : array
+    {
+        return [
+            'value one' => [1.0, 'Swish (initializer: Constant (value: 1))'],
+            'value zero' => [0.0, 'Swish (initializer: Constant (value: 0))'],
+        ];
+    }
+
+    #[DataProvider('initializeForwardBackInferProvider')]
+    public function testInitializeForwardBackInfer(
+        array $forwardExpected,
+        array $backExpected,
+        array $inferExpected,
+    ) : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        self::assertEquals($this->fanIn, $this->layer->width());
+
+        $forward = $this->layer->forward($this->input);
+
+        self::assertEqualsWithDelta($forwardExpected, $forward->toArray(), 1e-7);
+
+        $gradient = $this->layer->back(
+            prevGradient: $this->prevGrad,
+            optimizer: $this->optimizer
+        )->compute();
+
+        self::assertInstanceOf(NDArray::class, $gradient);
+        self::assertEqualsWithDelta($backExpected, $gradient->toArray(), 1e-7);
+
+        $infer = $this->layer->infer($this->input);
+
+        self::assertEqualsWithDelta($inferExpected, $infer->toArray(), 1e-7);
+    }
+
+    #[DataProvider('toStringProvider')]
+    public function testToString(float $value, string $expected) : void
+    {
+        $layer = new Swish(new Constant($value));
+
+        self::assertSame($expected, (string) $layer);
+    }
+
+    public function testWidthThrowsIfNotInitialized() : void
+    {
+        $layer = new Swish();
+
+        $this->expectException(RuntimeException::class);
+        $this->expectExceptionMessage('Layer has not been initialized.');
+
+        $layer->width();
+    }
+
+    public function testInitializeReturnsFanOutAndSetsWidth() : void
+    {
+        $fanIn = 4;
+        $layer = new Swish(new Constant(1.0));
+
+        $fanOut = $layer->initialize($fanIn);
+
+        self::assertSame($fanIn, $fanOut);
+        self::assertSame($fanIn, $layer->width());
+    }
+
+    public function testParametersAndRestore() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $parameters = iterator_to_array($this->layer->parameters());
+
+        self::assertArrayHasKey('beta', $parameters);
+        self::assertInstanceOf(Parameter::class, $parameters['beta']);
+
+        $betaParam = $parameters['beta'];
+        $originalBeta = $betaParam->param()->toArray();
+
+        $newLayer = new Swish(new Constant(0.0));
+        $newLayer->initialize($this->fanIn);
+
+        $newLayer->restore($parameters);
+
+        $restoredParams = iterator_to_array($newLayer->parameters());
+
+        self::assertArrayHasKey('beta', $restoredParams);
+        self::assertInstanceOf(Parameter::class, $restoredParams['beta']);
+
+        $restoredBeta = $restoredParams['beta']->param()->toArray();
+
+        self::assertEquals($originalBeta, $restoredBeta);
+    }
+
+    public function testGradientMatchesBackpropagatedGradient() : void
+    {
+        $this->layer->initialize($this->fanIn);
+
+        $output = $this->layer->forward($this->input);
+
+        $backGradient = $this->layer->back(
+            prevGradient: $this->prevGrad,
+            optimizer: $this->optimizer
+        )->compute();
+
+        $directGradient = $this->layer->gradient(
+            $this->input,
+            $output,
+            ($this->prevGrad)()
+        );
+
+        self::assertInstanceOf(NDArray::class, $directGradient);
+        self::assertEqualsWithDelta($backGradient->toArray(), $directGradient->toArray(), 1e-7);
+    }
+}

From be52a098de8cb75475d1c6c05744219a03375e64 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Wed, 24 Dec 2025 20:16:43 +0200
Subject: [PATCH 22/36] ML-392 Fixed syntax issues in tests annotations

---
 tests/Helpers/GraphvizTest.php                |  9 ++--
 tests/NeuralNet/FeedForwardTest.php           | 52 ++++++++-----------
 .../SamplesAreCompatibleWithDistanceTest.php  |  8 +--
 3 files changed, 32 insertions(+), 37 deletions(-)

diff --git a/tests/Helpers/GraphvizTest.php b/tests/Helpers/GraphvizTest.php
index da1c70b99..3bef96a06 100644
--- a/tests/Helpers/GraphvizTest.php
+++ b/tests/Helpers/GraphvizTest.php
@@ -6,6 +6,8 @@
 
 use PHPUnit\Framework\Attributes\CoversClass;
 use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
 use Rubix\ML\Encoding;
 use Rubix\ML\Helpers\Graphviz;
 use PHPUnit\Framework\TestCase;
@@ -14,9 +16,8 @@
 #[CoversClass(GraphvizTest::class)]
 class GraphvizTest extends TestCase
 {
-    /**
-     * @test
-     */
+    #[Test]
+    #[TestDox('Converts a DOT graph description to an image encoding')]
     public function dotToImage() : void
     {
         // Almost always skip this test, needed to appease Stan.
@@ -56,6 +57,6 @@ public function dotToImage() : void
 
         $encoding = Graphviz::dotToImage($dot, 'png');
 
-        $this->assertInstanceOf(Encoding::class, $encoding);
+        self::assertInstanceOf(Encoding::class, $encoding);
     }
 }
diff --git a/tests/NeuralNet/FeedForwardTest.php b/tests/NeuralNet/FeedForwardTest.php
index c68ae47be..a060975e0 100644
--- a/tests/NeuralNet/FeedForwardTest.php
+++ b/tests/NeuralNet/FeedForwardTest.php
@@ -14,11 +14,14 @@
 use Rubix\ML\NeuralNet\ActivationFunctions\ReLU;
 use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy;
 use PHPUnit\Framework\TestCase;
-
-/**
- * @group NeuralNet
- * @covers \Rubix\ML\NeuralNet\FeedForward
- */
+use PHPUnit\Framework\Attributes\Before;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+
+#[Group('NeuralNet')]
+#[CoversClass(FeedForward::class)]
 class FeedForwardTest extends TestCase
 {
     /**
@@ -46,9 +49,7 @@ class FeedForwardTest extends TestCase
      */
     protected $output;
 
-    /**
-     * @before
-     */
+    #[Before]
     protected function setUp() : void
     {
         $this->dataset = Labeled::quick([
@@ -72,50 +73,44 @@ protected function setUp() : void
         $this->network = new FeedForward($this->input, $this->hidden, $this->output, new Adam(0.001));
     }
 
-    /**
-     * @test
-     */
+    #[Test]
+    #[TestDox('Builds a feed-forward network instance')]
     public function build() : void
     {
         $this->assertInstanceOf(FeedForward::class, $this->network);
         $this->assertInstanceOf(Network::class, $this->network);
     }
 
-    /**
-     * @test
-     */
+    #[Test]
+    #[TestDox('Returns all hidden and output layers')]
     public function layers() : void
     {
         $this->assertCount(5, iterator_to_array($this->network->layers()));
     }
 
-    /**
-     * @test
-     */
+    #[Test]
+    #[TestDox('Returns the input layer')]
     public function input() : void
     {
         $this->assertInstanceOf(Placeholder1D::class, $this->network->input());
     }
 
-    /**
-     * @test
-     */
+    #[Test]
+    #[TestDox('Returns the hidden layers')]
     public function hidden() : void
     {
         $this->assertCount(5, $this->network->hidden());
     }
 
-    /**
-     * @test
-     */
+    #[Test]
+    #[TestDox('Returns the output layer')]
     public function networkOutput() : void
     {
         $this->assertInstanceOf(Output::class, $this->network->output());
     }
 
-    /**
-     * @test
-     */
+    #[Test]
+    #[TestDox('Reports the correct number of parameters after initialization')]
     public function numParams() : void
     {
         $this->network->initialize();
@@ -123,9 +118,8 @@ public function numParams() : void
         $this->assertEquals(103, $this->network->numParams());
     }
 
-    /**
-     * @test
-     */
+    #[Test]
+    #[TestDox('Performs a roundtrip pass and returns a loss value')]
     public function roundtrip() : void
     {
         $this->network->initialize();
diff --git a/tests/Specifications/SamplesAreCompatibleWithDistanceTest.php b/tests/Specifications/SamplesAreCompatibleWithDistanceTest.php
index 885eb5d7b..7b564fbd2 100644
--- a/tests/Specifications/SamplesAreCompatibleWithDistanceTest.php
+++ b/tests/Specifications/SamplesAreCompatibleWithDistanceTest.php
@@ -7,6 +7,8 @@
 use PHPUnit\Framework\Attributes\CoversClass;
 use PHPUnit\Framework\Attributes\DataProvider;
 use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
 use Rubix\ML\Datasets\Unlabeled;
 use Rubix\ML\Kernels\Distance\Hamming;
 use Rubix\ML\Kernels\Distance\Euclidean;
@@ -61,11 +63,9 @@ public static function passesProvider() : Generator
         ];
     }
 
-    /**
-     * @param SamplesAreCompatibleWithDistance $specification
-     * @param bool $expected
-     */
     #[DataProvider('passesProvider')]
+    #[Test]
+    #[TestDox('Checks whether samples are compatible with the given distance metric')]
     public function passes(SamplesAreCompatibleWithDistance $specification, bool $expected) : void
     {
         $this->assertSame($expected, $specification->passes());

From 0b591eda5bd4ea3a14d33c70f1fb28109c21afcb Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 4 Jan 2026 19:46:05 +0200
Subject: [PATCH 23/36] ML-392 Typo fixes

---
 docs/neural-network/optimizers/cyclical.md     | 2 +-
 src/NeuralNet/Optimizers/Cyclical.php          | 2 +-
 src/NeuralNet/Optimizers/Cyclical/Cyclical.php | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/neural-network/optimizers/cyclical.md b/docs/neural-network/optimizers/cyclical.md
index eed8b2779..02622461b 100644
--- a/docs/neural-network/optimizers/cyclical.md
+++ b/docs/neural-network/optimizers/cyclical.md
@@ -11,7 +11,7 @@ $$
 \text{cycle} &= \left\lfloor 1 + \frac{t}{2\,\text{steps}} \right\rfloor \\
 x &= \left| \frac{t}{\text{steps}} - 2\,\text{cycle} + 1 \right| \\
 \text{scale} &= \text{decay}^{\,t} \\
-\eta_t &= \text{lower} + (\text{upper} - \text{lower})\,\max\bigl(0\,1 - x\bigr)\,\text{scale} \\
+\eta_t &= \text{lower} + (\text{upper} - \text{lower})\,\max\bigl(0,1 - x\bigr)\,\text{scale} \\
 \Delta\theta_t &= \eta_t\,g_t
 \end{aligned}
 $$
diff --git a/src/NeuralNet/Optimizers/Cyclical.php b/src/NeuralNet/Optimizers/Cyclical.php
index dcce49bf2..606228611 100644
--- a/src/NeuralNet/Optimizers/Cyclical.php
+++ b/src/NeuralNet/Optimizers/Cyclical.php
@@ -86,7 +86,7 @@ public function __construct(
 
         if ($lower > $upper) {
             throw new InvalidArgumentException('Lower bound cannot be'
-                . ' reater than the upper bound.');
+                . ' greater than the upper bound.');
         }
 
         if ($losses < 1) {
diff --git a/src/NeuralNet/Optimizers/Cyclical/Cyclical.php b/src/NeuralNet/Optimizers/Cyclical/Cyclical.php
index ac22d9d52..ca929cdeb 100644
--- a/src/NeuralNet/Optimizers/Cyclical/Cyclical.php
+++ b/src/NeuralNet/Optimizers/Cyclical/Cyclical.php
@@ -90,7 +90,7 @@ public function __construct(
 
         if ($lower > $upper) {
             throw new InvalidArgumentException(
-                'Lower bound cannot be reater than the upper bound.'
+                'Lower bound cannot be greater than the upper bound.'
             );
         }
 

From 106c07016fdfb10e6ac2b36516ddc10ebf01c0b7 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 25 Jan 2026 16:22:18 +0200
Subject: [PATCH 24/36] ML-394 Converted FeedForward to use NumPower

---
 src/NeuralNet/FeedForwards/FeedForward.php    | 274 ++++++++++++++++++
 .../FeedForwards/FeedForwardTest.php          | 131 +++++++++
 2 files changed, 405 insertions(+)
 create mode 100644 src/NeuralNet/FeedForwards/FeedForward.php
 create mode 100644 tests/NeuralNet/FeedForwards/FeedForwardTest.php

diff --git a/src/NeuralNet/FeedForwards/FeedForward.php b/src/NeuralNet/FeedForwards/FeedForward.php
new file mode 100644
index 000000000..af430d083
--- /dev/null
+++ b/src/NeuralNet/FeedForwards/FeedForward.php
@@ -0,0 +1,274 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\FeedForwards;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Input;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Output;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Parametric;
+use Rubix\ML\Encoding;
+use Rubix\ML\Datasets\Dataset;
+use Rubix\ML\Datasets\Labeled;
+use Rubix\ML\NeuralNet\Optimizers\Base\Adaptive;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Traversable;
+
+use function array_reverse;
+
+/**
+ * Feed Forward
+ *
+ * A feed forward neural network implementation consisting of an input and
+ * output layer and any number of intermediate hidden layers.
+ *
+ * @internal
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class FeedForward
+{
+    /**
+     * The input layer to the network.
+     *
+     * @var Input
+     */
+    protected Input $input;
+
+    /**
+     * The hidden layers of the network.
+     *
+     * @var list<Hidden>
+     */
+    protected array $hidden = [
+        //
+    ];
+
+    /**
+     * The pathing of the backward pass through the hidden layers.
+     *
+     * @var list<Hidden>
+     */
+    protected array $backPass = [
+        //
+    ];
+
+    /**
+     * The output layer of the network.
+     *
+     * @var Output
+     */
+    protected Output $output;
+
+    /**
+     * The gradient descent optimizer used to train the network.
+     *
+     * @var Optimizer
+     */
+    protected Optimizer $optimizer;
+
+    /**
+     * @param Input $input
+     * @param Hidden[] $hidden
+     * @param Output $output
+     * @param Optimizer $optimizer
+     */
+    public function __construct(Input $input, array $hidden, Output $output, Optimizer $optimizer)
+    {
+        $hidden = array_values($hidden);
+
+        $backPass = array_reverse($hidden);
+
+        $this->input = $input;
+        $this->hidden = $hidden;
+        $this->output = $output;
+        $this->optimizer = $optimizer;
+        $this->backPass = $backPass;
+    }
+
+    /**
+     * Return the input layer.
+     *
+     * @return Input
+     */
+    public function input() : Input
+    {
+        return $this->input;
+    }
+
+    /**
+     * Return an array of hidden layers indexed left to right.
+     *
+     * @return list<Layers\Hidden>
+     */
+    public function hidden() : array
+    {
+        return $this->hidden;
+    }
+
+    /**
+     * Return the output layer.
+     *
+     * @return Output
+     */
+    public function output() : Output
+    {
+        return $this->output;
+    }
+
+    /**
+     * Return all the layers in the network.
+     *
+     * @return Traversable<Layers\Layer>
+     */
+    public function layers() : Traversable
+    {
+        yield $this->input;
+
+        yield from $this->hidden;
+
+        yield $this->output;
+    }
+
+    /**
+     * Return the number of trainable parameters in the network.
+     *
+     * @return int
+     */
+    public function numParams() : int
+    {
+        $numParams = 0;
+
+        foreach ($this->layers() as $layer) {
+            if ($layer instanceof Parametric) {
+                foreach ($layer->parameters() as $parameter) {
+                    $numParams += $parameter->param()->size();
+                }
+            }
+        }
+
+        return $numParams;
+    }
+
+    /**
+     * Initialize the parameters of the layers and warm the optimizer cache.
+     */
+    public function initialize() : void
+    {
+        $fanIn = 1;
+
+        foreach ($this->layers() as $layer) {
+            $fanIn = $layer->initialize($fanIn);
+        }
+
+        if ($this->optimizer instanceof Adaptive) {
+            foreach ($this->layers() as $layer) {
+                if ($layer instanceof Parametric) {
+                    foreach ($layer->parameters() as $param) {
+                        $this->optimizer->warm($param);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Run an inference pass and return the activations at the output layer.
+     *
+     * @param Dataset $dataset
+     * @return NDArray
+     */
+    public function infer(Dataset $dataset) : NDArray
+    {
+        $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]);
+
+        foreach ($this->layers() as $layer) {
+            $input = $layer->infer($input);
+        }
+
+        return NumPower::transpose($input, [1, 0]);
+    }
+
+    /**
+     * Perform a forward and backward pass of the network in one call. Returns
+     * the loss from the backward pass.
+     *
+     * @param Labeled $dataset
+     * @return float
+     */
+    public function roundtrip(Labeled $dataset) : float
+    {
+        $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]);
+
+        $this->feed($input);
+
+        $loss = $this->backpropagate($dataset->labels());
+
+        return $loss;
+    }
+
+    /**
+     * Feed a batch through the network and return a matrix of activations at the output later.
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function feed(NDArray $input) : NDArray
+    {
+        foreach ($this->layers() as $layer) {
+            $input = $layer->forward($input);
+        }
+
+        return $input;
+    }
+
+    /**
+     * Backpropagate the gradient of the cost function and return the loss.
+     *
+     * @param list<string|int|float> $labels
+     * @return float
+     */
+    public function backpropagate(array $labels) : float
+    {
+        [$gradient, $loss] = $this->output->back($labels, $this->optimizer);
+
+        foreach ($this->backPass as $layer) {
+            $gradient = $layer->back($gradient, $this->optimizer);
+        }
+
+        return $loss;
+    }
+
+    /**
+     * Export the network architecture as a graph in dot format.
+     *
+     * @return Encoding
+     */
+    public function exportGraphviz() : Encoding
+    {
+        $dot = 'digraph Tree {' . PHP_EOL;
+        $dot .= '  node [shape=box, fontname=helvetica];' . PHP_EOL;
+
+        $layerNum = 0;
+
+        foreach ($this->layers() as $layer) {
+            ++$layerNum;
+
+            $dot .= "  N$layerNum [label=\"$layer\",style=\"rounded\"]" . PHP_EOL;
+
+            if ($layerNum > 1) {
+                $parentId = $layerNum - 1;
+
+                $dot .= "  N{$parentId} -> N{$layerNum};" . PHP_EOL;
+            }
+        }
+
+        $dot .= '}';
+
+        return new Encoding($dot);
+    }
+}
diff --git a/tests/NeuralNet/FeedForwards/FeedForwardTest.php b/tests/NeuralNet/FeedForwards/FeedForwardTest.php
new file mode 100644
index 000000000..ac1c42d53
--- /dev/null
+++ b/tests/NeuralNet/FeedForwards/FeedForwardTest.php
@@ -0,0 +1,131 @@
+<?php
+
+namespace Rubix\ML\Tests\NeuralNet\FeedForwards;
+
+use Rubix\ML\Datasets\Labeled;
+use Rubix\ML\NeuralNet\FeedForwards\FeedForward;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Input;
+use Rubix\ML\NeuralNet\Layers\Dense\Dense;
+use Rubix\ML\NeuralNet\Layers\Activation\Activation;
+use Rubix\ML\NeuralNet\Layers\Multiclass\Multiclass;
+use Rubix\ML\NeuralNet\Layers\Placeholder1D\Placeholder1D;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Output;
+use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
+use Rubix\ML\NeuralNet\ActivationFunctions\ReLU\ReLU;
+use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy\CrossEntropy;
+use PHPUnit\Framework\TestCase;
+use PHPUnit\Framework\Attributes\Before;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+
+#[Group('NeuralNet')]
+#[CoversClass(FeedForward::class)]
+class FeedForwardTest extends TestCase
+{
+    /**
+     * @var Labeled
+     */
+    protected Labeled $dataset;
+
+    /**
+     * @var FeedForward
+     */
+    protected FeedForward $network;
+
+    /**
+     * @var Input
+     */
+    protected Input $input;
+
+    /**
+     * @var Hidden[]
+     */
+    protected array $hidden;
+
+    /**
+     * @var Output
+     */
+    protected Output $output;
+
+    #[Before]
+    protected function setUp() : void
+    {
+        $this->dataset = Labeled::quick([
+            [1.0, 2.5],
+            [0.1, 0.0],
+            [0.002, -6.0],
+        ], ['yes', 'no', 'maybe']);
+
+        $this->input = new Placeholder1D(2);
+
+        $this->hidden = [
+            new Dense(10),
+            new Activation(new ReLU()),
+            new Dense(5),
+            new Activation(new ReLU()),
+            new Dense(3),
+        ];
+
+        $this->output = new Multiclass(['yes', 'no', 'maybe'], new CrossEntropy());
+
+        $this->network = new FeedForward($this->input, $this->hidden, $this->output, new Adam(0.001));
+    }
+
+    #[Test]
+    #[TestDox('Builds a feed-forward network instance')]
+    public function build() : void
+    {
+        self::assertInstanceOf(FeedForward::class, $this->network);
+    }
+
+    #[Test]
+    #[TestDox('Returns all hidden and output layers')]
+    public function layers() : void
+    {
+        self::assertCount(5, iterator_to_array($this->network->layers()));
+    }
+
+    #[Test]
+    #[TestDox('Returns the input layer')]
+    public function input() : void
+    {
+        self::assertInstanceOf(Placeholder1D::class, $this->network->input());
+    }
+
+    #[Test]
+    #[TestDox('Returns the hidden layers')]
+    public function hidden() : void
+    {
+        self::assertCount(5, $this->network->hidden());
+    }
+
+    #[Test]
+    #[TestDox('Returns the output layer')]
+    public function networkOutput() : void
+    {
+        self::assertInstanceOf(Output::class, $this->network->output());
+    }
+
+    #[Test]
+    #[TestDox('Reports the correct number of parameters after initialization')]
+    public function numParams() : void
+    {
+        $this->network->initialize();
+
+        self::assertEquals(103, $this->network->numParams());
+    }
+
+    #[Test]
+    #[TestDox('Performs a roundtrip pass and returns a loss value')]
+    public function roundtrip() : void
+    {
+        $this->network->initialize();
+
+        $loss = $this->network->roundtrip($this->dataset);
+
+        self::assertIsFloat($loss);
+    }
+}

From e118309f93c81837589ea4eb3eda4fe486fa3009 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 25 Jan 2026 16:37:36 +0200
Subject: [PATCH 25/36] ML-394 Converted Network to use NumPower

---
 src/NeuralNet/Networks/Network.php       | 275 +++++++++++++++++++++++
 tests/NeuralNet/Networks/NetworkTest.php | 101 +++++++++
 2 files changed, 376 insertions(+)
 create mode 100644 src/NeuralNet/Networks/Network.php
 create mode 100644 tests/NeuralNet/Networks/NetworkTest.php

diff --git a/src/NeuralNet/Networks/Network.php b/src/NeuralNet/Networks/Network.php
new file mode 100644
index 000000000..6554940b3
--- /dev/null
+++ b/src/NeuralNet/Networks/Network.php
@@ -0,0 +1,275 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Networks;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Input;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Layer;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Output;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Parametric;
+use Rubix\ML\Encoding;
+use Rubix\ML\Datasets\Dataset;
+use Rubix\ML\Datasets\Labeled;
+use Rubix\ML\NeuralNet\Optimizers\Base\Adaptive;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Traversable;
+
+use function array_reverse;
+
+/**
+ * Network
+ *
+ * A  neural network implementation consisting of an input and output layer and any number
+ * of intermediate hidden layers.
+ *
+ * @internal
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Network
+{
+    /**
+     * The input layer to the network.
+     *
+     * @var Input
+     */
+    protected Input $input;
+
+    /**
+     * The hidden layers of the network.
+     *
+     * @var list<Hidden>
+     */
+    protected array $hidden = [
+        //
+    ];
+
+    /**
+     * The pathing of the backward pass through the hidden layers.
+     *
+     * @var list<Hidden>
+     */
+    protected array $backPass = [
+        //
+    ];
+
+    /**
+     * The output layer of the network.
+     *
+     * @var Output
+     */
+    protected Output $output;
+
+    /**
+     * The gradient descent optimizer used to train the network.
+     *
+     * @var Optimizer
+     */
+    protected Optimizer $optimizer;
+
+    /**
+     * @param Input $input
+     * @param Hidden[] $hidden
+     * @param Output $output
+     * @param Optimizer $optimizer
+     */
+    public function __construct(Input $input, array $hidden, Output $output, Optimizer $optimizer)
+    {
+        $hidden = array_values($hidden);
+
+        $backPass = array_reverse($hidden);
+
+        $this->input = $input;
+        $this->hidden = $hidden;
+        $this->output = $output;
+        $this->optimizer = $optimizer;
+        $this->backPass = $backPass;
+    }
+
+    /**
+     * Return the input layer.
+     *
+     * @return Input
+     */
+    public function input() : Input
+    {
+        return $this->input;
+    }
+
+    /**
+     * Return an array of hidden layers indexed left to right.
+     *
+     * @return list<Hidden>
+     */
+    public function hidden() : array
+    {
+        return $this->hidden;
+    }
+
+    /**
+     * Return the output layer.
+     *
+     * @return Output
+     */
+    public function output() : Output
+    {
+        return $this->output;
+    }
+
+    /**
+     * Return all the layers in the network.
+     *
+     * @return Traversable<Layer>
+     */
+    public function layers() : Traversable
+    {
+        yield $this->input;
+
+        yield from $this->hidden;
+
+        yield $this->output;
+    }
+
+    /**
+     * Return the number of trainable parameters in the network.
+     *
+     * @return int
+     */
+    public function numParams() : int
+    {
+        $numParams = 0;
+
+        foreach ($this->layers() as $layer) {
+            if ($layer instanceof Parametric) {
+                foreach ($layer->parameters() as $parameter) {
+                    $numParams += $parameter->param()->size();
+                }
+            }
+        }
+
+        return $numParams;
+    }
+
+    /**
+     * Initialize the parameters of the layers and warm the optimizer cache.
+     */
+    public function initialize() : void
+    {
+        $fanIn = 1;
+
+        foreach ($this->layers() as $layer) {
+            $fanIn = $layer->initialize($fanIn);
+        }
+
+        if ($this->optimizer instanceof Adaptive) {
+            foreach ($this->layers() as $layer) {
+                if ($layer instanceof Parametric) {
+                    foreach ($layer->parameters() as $param) {
+                        $this->optimizer->warm($param);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Run an inference pass and return the activations at the output layer.
+     *
+     * @param Dataset $dataset
+     * @return NDArray
+     */
+    public function infer(Dataset $dataset) : NDArray
+    {
+        $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]);
+
+        foreach ($this->layers() as $layer) {
+            $input = $layer->infer($input);
+        }
+
+        return NumPower::transpose($input, [1, 0]);
+    }
+
+    /**
+     * Perform a forward and backward pass of the network in one call. Returns
+     * the loss from the backward pass.
+     *
+     * @param Labeled $dataset
+     * @return float
+     */
+    public function roundtrip(Labeled $dataset) : float
+    {
+        $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]);
+
+        $this->feed($input);
+
+        $loss = $this->backpropagate($dataset->labels());
+
+        return $loss;
+    }
+
+    /**
+     * Feed a batch through the network and return a matrix of activations at the output later.
+     *
+     * @param NDArray $input
+     * @return NDArray
+     */
+    public function feed(NDArray $input) : NDArray
+    {
+        foreach ($this->layers() as $layer) {
+            $input = $layer->forward($input);
+        }
+
+        return $input;
+    }
+
+    /**
+     * Backpropagate the gradient of the cost function and return the loss.
+     *
+     * @param list<string|int|float> $labels
+     * @return float
+     */
+    public function backpropagate(array $labels) : float
+    {
+        [$gradient, $loss] = $this->output->back($labels, $this->optimizer);
+
+        foreach ($this->backPass as $layer) {
+            $gradient = $layer->back($gradient, $this->optimizer);
+        }
+
+        return $loss;
+    }
+
+    /**
+     * Export the network architecture as a graph in dot format.
+     *
+     * @return Encoding
+     */
+    public function exportGraphviz() : Encoding
+    {
+        $dot = 'digraph Tree {' . PHP_EOL;
+        $dot .= '  node [shape=box, fontname=helvetica];' . PHP_EOL;
+
+        $layerNum = 0;
+
+        foreach ($this->layers() as $layer) {
+            ++$layerNum;
+
+            $dot .= "  N$layerNum [label=\"$layer\",style=\"rounded\"]" . PHP_EOL;
+
+            if ($layerNum > 1) {
+                $parentId = $layerNum - 1;
+
+                $dot .= "  N{$parentId} -> N{$layerNum};" . PHP_EOL;
+            }
+        }
+
+        $dot .= '}';
+
+        return new Encoding($dot);
+    }
+}
diff --git a/tests/NeuralNet/Networks/NetworkTest.php b/tests/NeuralNet/Networks/NetworkTest.php
new file mode 100644
index 000000000..0197c225d
--- /dev/null
+++ b/tests/NeuralNet/Networks/NetworkTest.php
@@ -0,0 +1,101 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Networks;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Group;
+use Rubix\ML\Datasets\Labeled;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Input;
+use Rubix\ML\NeuralNet\Layers\Dense\Dense;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Output;
+use Rubix\ML\NeuralNet\Layers\Activation\Activation;
+use Rubix\ML\NeuralNet\Layers\Multiclass\Multiclass;
+use Rubix\ML\NeuralNet\Layers\Placeholder1D\Placeholder1D;
+use Rubix\ML\NeuralNet\Networks\Network;
+use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
+use Rubix\ML\NeuralNet\ActivationFunctions\ReLU\ReLU;
+use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy\CrossEntropy;
+use PHPUnit\Framework\TestCase;
+
+#[Group('NeuralNet')]
+#[CoversClass(Network::class)]
+class NetworkTest extends TestCase
+{
+    protected Labeled $dataset;
+
+    protected Network $network;
+
+    protected Input $input;
+
+    /**
+     * @var Hidden[]
+     */
+    protected array $hidden;
+
+    protected Output $output;
+
+    protected function setUp() : void
+    {
+        $this->dataset = Labeled::quick(
+            samples: [
+                [1.0, 2.5],
+                [0.1, 0.0],
+                [0.002, -6.0],
+            ],
+            labels: ['yes', 'no', 'maybe']
+        );
+
+        $this->input = new Placeholder1D(2);
+
+        $this->hidden = [
+            new Dense(neurons: 10),
+            new Activation(new ReLU()),
+            new Dense(neurons: 5),
+            new Activation(new ReLU()),
+            new Dense(neurons: 3),
+        ];
+
+        $this->output = new Multiclass(
+            classes: ['yes', 'no', 'maybe'],
+            costFn: new CrossEntropy()
+        );
+
+        $this->network = new Network(
+            input: $this->input,
+            hidden: $this->hidden,
+            output: $this->output,
+            optimizer: new Adam(0.001)
+        );
+    }
+
+    public function testLayers() : void
+    {
+        $count = 0;
+
+        foreach ($this->network->layers() as $item) {
+            ++$count;
+        }
+
+        self::assertSame(7, $count);
+    }
+
+    public function testInput() : void
+    {
+        self::assertInstanceOf(Placeholder1D::class, $this->network->input());
+    }
+
+    public function testHidden() : void
+    {
+        self::assertCount(5, $this->network->hidden());
+    }
+
+    public function testNumParams() : void
+    {
+        $this->network->initialize();
+
+        self::assertEquals(103, $this->network->numParams());
+    }
+}

From 5dfc35491f8103c7be260f24a831d51196f4f976 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 25 Jan 2026 16:55:31 +0200
Subject: [PATCH 26/36] ML-394 Converted Snapshot to use NumPower

---
 src/NeuralNet/Snapshots/Snapshot.php       | 90 ++++++++++++++++++++++
 tests/NeuralNet/Snapshots/SnapshotTest.php | 64 +++++++++++++++
 2 files changed, 154 insertions(+)
 create mode 100644 src/NeuralNet/Snapshots/Snapshot.php
 create mode 100644 tests/NeuralNet/Snapshots/SnapshotTest.php

diff --git a/src/NeuralNet/Snapshots/Snapshot.php b/src/NeuralNet/Snapshots/Snapshot.php
new file mode 100644
index 000000000..033224d5c
--- /dev/null
+++ b/src/NeuralNet/Snapshots/Snapshot.php
@@ -0,0 +1,90 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Snapshots;
+
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Parametric;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Networks\Network;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+
+/**
+ * Snapshot
+ *
+ * A snapshot represents the state of a neural network at a moment in time.
+ *
+ * @internal
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Snapshot
+{
+    /**
+     * The parametric layers of the network.
+     *
+     * @var Parametric[]
+     */
+    protected array $layers;
+
+    /**
+     * The parameters corresponding to each layer in the network at the time of the snapshot.
+     *
+     * @var list<Parameter[]>
+     */
+    protected array $parameters;
+
+    /**
+     * Take a snapshot of the network.
+     *
+     * @param Network $network
+     * @return Snapshot
+     */
+    public static function take(Network $network) : self
+    {
+        $layers = $parameters = [];
+
+        foreach ($network->layers() as $layer) {
+            if ($layer instanceof Parametric) {
+                $params = [];
+
+                foreach ($layer->parameters() as $key => $parameter) {
+                    $params[$key] = clone $parameter;
+                }
+
+                $layers[] = $layer;
+                $parameters[] = $params;
+            }
+        }
+
+        return new self($layers, $parameters);
+    }
+
+    /**
+     * Class constructor.
+     *
+     * @param Parametric[] $layers
+     * @param list<Parameter[]> $parameters
+     * @throws InvalidArgumentException
+     */
+    public function __construct(array $layers, array $parameters)
+    {
+        if (count($layers) !== count($parameters)) {
+            throw new InvalidArgumentException('Number of layers and parameter groups must be equal.');
+        }
+
+        $this->layers = $layers;
+        $this->parameters = $parameters;
+    }
+
+    /**
+     * Restore the network parameters.
+     */
+    public function restore() : void
+    {
+        foreach ($this->layers as $i => $layer) {
+            $layer->restore($this->parameters[$i]);
+        }
+    }
+}
diff --git a/tests/NeuralNet/Snapshots/SnapshotTest.php b/tests/NeuralNet/Snapshots/SnapshotTest.php
new file mode 100644
index 000000000..ecde317e3
--- /dev/null
+++ b/tests/NeuralNet/Snapshots/SnapshotTest.php
@@ -0,0 +1,64 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Snapshots;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Group;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Snapshots\Snapshot;
+use Rubix\ML\NeuralNet\Networks\Network;
+use Rubix\ML\NeuralNet\Layers\Dense\Dense;
+use Rubix\ML\NeuralNet\Layers\Binary\Binary;
+use Rubix\ML\NeuralNet\Layers\Activation\Activation;
+use Rubix\ML\NeuralNet\Layers\Placeholder1D\Placeholder1D;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use Rubix\ML\NeuralNet\ActivationFunctions\ELU\ELU;
+use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy\CrossEntropy;
+use PHPUnit\Framework\TestCase;
+
+#[Group('NeuralNet')]
+#[CoversClass(Snapshot::class)]
+class SnapshotTest extends TestCase
+{
+    protected Snapshot $snapshot;
+
+    protected Network $network;
+
+    public function testConstructorThrowsWithWrongParameters() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        $this->expectExceptionMessage('Number of layers and parameter groups must be equal.');
+
+        new Snapshot(
+            layers: [new Dense(1)],
+            parameters: []
+        );
+    }
+
+    public function testTake() : void
+    {
+        $network = new Network(
+            input: new Placeholder1D(1),
+            hidden: [
+                new Dense(10),
+                new Activation(new ELU()),
+                new Dense(5),
+                new Activation(new ELU()),
+                new Dense(1),
+            ],
+            output: new Binary(
+                classes: ['yes', 'no'],
+                costFn:  new CrossEntropy()
+            ),
+            optimizer: new Stochastic()
+        );
+
+        $network->initialize();
+
+        $this->expectNotToPerformAssertions();
+
+        Snapshot::take($network);
+    }
+}

From 0e815dae709cde3a3be96c544b63dac98847e287 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 25 Jan 2026 16:58:54 +0200
Subject: [PATCH 27/36] ML-394 Improved FeedForwardTest

---
 src/NeuralNet/FeedForwards/FeedForward.php       | 3 ++-
 tests/NeuralNet/FeedForwards/FeedForwardTest.php | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/NeuralNet/FeedForwards/FeedForward.php b/src/NeuralNet/FeedForwards/FeedForward.php
index af430d083..155a0ec00 100644
--- a/src/NeuralNet/FeedForwards/FeedForward.php
+++ b/src/NeuralNet/FeedForwards/FeedForward.php
@@ -11,6 +11,7 @@
 use Rubix\ML\Encoding;
 use Rubix\ML\Datasets\Dataset;
 use Rubix\ML\Datasets\Labeled;
+use Rubix\ML\NeuralNet\Networks\Network;
 use Rubix\ML\NeuralNet\Optimizers\Base\Adaptive;
 use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
 use Traversable;
@@ -30,7 +31,7 @@
  * @author      Andrew DalPino
  * @author      Samuel Akopyan <leumas.a@gmail.com>
  */
-class FeedForward
+class FeedForward extends Network
 {
     /**
      * The input layer to the network.
diff --git a/tests/NeuralNet/FeedForwards/FeedForwardTest.php b/tests/NeuralNet/FeedForwards/FeedForwardTest.php
index ac1c42d53..84226fc70 100644
--- a/tests/NeuralNet/FeedForwards/FeedForwardTest.php
+++ b/tests/NeuralNet/FeedForwards/FeedForwardTest.php
@@ -11,6 +11,7 @@
 use Rubix\ML\NeuralNet\Layers\Multiclass\Multiclass;
 use Rubix\ML\NeuralNet\Layers\Placeholder1D\Placeholder1D;
 use Rubix\ML\NeuralNet\Layers\Base\Contracts\Output;
+use Rubix\ML\NeuralNet\Networks\Network;
 use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
 use Rubix\ML\NeuralNet\ActivationFunctions\ReLU\ReLU;
 use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy\CrossEntropy;
@@ -79,6 +80,7 @@ protected function setUp() : void
     public function build() : void
     {
         self::assertInstanceOf(FeedForward::class, $this->network);
+        self::assertInstanceOf(Network::class, $this->network);
     }
 
     #[Test]

From a261d6930bd77ed31c2e7b267c74562bf6e0cf30 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 1 Feb 2026 15:45:53 +0200
Subject: [PATCH 28/36] ML-394 Fixed copilot style comments, added prevention
 division by zero in Swish

---
 src/NeuralNet/FeedForwards/FeedForward.php           |  5 +++--
 src/NeuralNet/Layers/Base/Contracts/Layer.php        |  2 +-
 src/NeuralNet/Layers/Swish/Swish.php                 | 12 +++++++-----
 tests/Helpers/GraphvizTest.php                       |  2 +-
 tests/NeuralNet/Layers/Binary/BinaryTest.php         | 10 ++++------
 .../Layers/Placeholder1D/Placeholder1DTest.php       |  2 +-
 tests/NeuralNet/Layers/Placeholder1DTest.php         |  2 +-
 7 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/NeuralNet/FeedForwards/FeedForward.php b/src/NeuralNet/FeedForwards/FeedForward.php
index 155a0ec00..aea7fe6ed 100644
--- a/src/NeuralNet/FeedForwards/FeedForward.php
+++ b/src/NeuralNet/FeedForwards/FeedForward.php
@@ -6,6 +6,7 @@
 use NumPower;
 use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
 use Rubix\ML\NeuralNet\Layers\Base\Contracts\Input;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Layer;
 use Rubix\ML\NeuralNet\Layers\Base\Contracts\Output;
 use Rubix\ML\NeuralNet\Layers\Base\Contracts\Parametric;
 use Rubix\ML\Encoding;
@@ -104,7 +105,7 @@ public function input() : Input
     /**
      * Return an array of hidden layers indexed left to right.
      *
-     * @return list<Layers\Hidden>
+     * @return list<Hidden>
      */
     public function hidden() : array
     {
@@ -124,7 +125,7 @@ public function output() : Output
     /**
      * Return all the layers in the network.
      *
-     * @return Traversable<Layers\Layer>
+     * @return Traversable<Layer>
      */
     public function layers() : Traversable
     {
diff --git a/src/NeuralNet/Layers/Base/Contracts/Layer.php b/src/NeuralNet/Layers/Base/Contracts/Layer.php
index 10cf17b6e..dddff074f 100644
--- a/src/NeuralNet/Layers/Base/Contracts/Layer.php
+++ b/src/NeuralNet/Layers/Base/Contracts/Layer.php
@@ -6,7 +6,7 @@
 use Stringable;
 
 /**
- * Hidden
+ * Layer
  *
  * @category    Machine Learning
  * @package     Rubix/ML
diff --git a/src/NeuralNet/Layers/Swish/Swish.php b/src/NeuralNet/Layers/Swish/Swish.php
index fcb00fa44..575e11df8 100644
--- a/src/NeuralNet/Layers/Swish/Swish.php
+++ b/src/NeuralNet/Layers/Swish/Swish.php
@@ -15,6 +15,8 @@
 use Rubix\ML\NeuralNet\Parameters\Parameter;
 use Generator;
 
+use const Rubix\ML\EPSILON;
+
 /**
  * Swish
  *
@@ -266,6 +268,7 @@ protected function activate(NDArray $input) : NDArray
 
     /**
      * Calculate the derivative of the activation function at a given output.
+     * Formulation: derivative = (output / input) * (1 - output) + output
      *
      * @param NDArray $input
      * @param NDArray $output
@@ -278,12 +281,11 @@ protected function differentiate(NDArray $input, NDArray $output) : NDArray
             throw new RuntimeException('Layer has not been initialized.');
         }
 
-        // Original formulation:
-        // derivative = (output / input) * (1 - output) + output
-        // Implemented using NumPower operations to avoid explicit ones matrix.
-        $term1 = NumPower::divide($output, $input);
-        $oneMinusOutput = NumPower::subtract(1.0, $output);
+        // Prevent division by zero if the input contains zero values
+        $denominator = NumPower::add($input, EPSILON);
+        $term1 = NumPower::divide($output, $denominator);
 
+        $oneMinusOutput = NumPower::subtract(1.0, $output);
         $product = NumPower::multiply($term1, $oneMinusOutput);
 
         return NumPower::add($product, $output);
diff --git a/tests/Helpers/GraphvizTest.php b/tests/Helpers/GraphvizTest.php
index 3bef96a06..6493d97de 100644
--- a/tests/Helpers/GraphvizTest.php
+++ b/tests/Helpers/GraphvizTest.php
@@ -13,7 +13,7 @@
 use PHPUnit\Framework\TestCase;
 
 #[Group('Helpers')]
-#[CoversClass(GraphvizTest::class)]
+#[CoversClass(Graphviz::class)]
 class GraphvizTest extends TestCase
 {
     #[Test]
diff --git a/tests/NeuralNet/Layers/Binary/BinaryTest.php b/tests/NeuralNet/Layers/Binary/BinaryTest.php
index 645d7c86b..ad129bf97 100644
--- a/tests/NeuralNet/Layers/Binary/BinaryTest.php
+++ b/tests/NeuralNet/Layers/Binary/BinaryTest.php
@@ -1,6 +1,6 @@
 <?php
 
-declare(strict_types = 1);
+declare(strict_types=1);
 
 namespace Rubix\ML\Tests\NeuralNet\Layers\Binary;
 
@@ -63,7 +63,7 @@ public static function backProvider() : array
     }
 
     /**
-     * @return array<string, array{0: array<int, string>}> 
+     * @return array<string, array{0: array<int, string>}>
      */
     public static function badClassesProvider() : array
     {
@@ -155,7 +155,7 @@ public function testBack(array $expectedGradient) : void
     }
 
     #[Test]
-    #[TestDox('Computes gradient directly given input, output, expected, and batch size')]
+    #[TestDox('Computes gradient directly given input, output and expected')]
     #[DataProvider('backProvider')]
     public function testGradient(array $expectedGradient) : void
     {
@@ -171,9 +171,7 @@ public function testGradient(array $expectedGradient) : void
         }
         $expected = NumPower::array([$expected]);
 
-        $batchSize = count($this->labels);
-
-        $gradient = $this->layer->gradient($input, $output, $expected, $batchSize);
+        $gradient = $this->layer->gradient($input, $output, $expected);
 
         self::assertInstanceOf(NDArray::class, $gradient);
         self::assertEqualsWithDelta($expectedGradient, $gradient->toArray(), 1e-7);
diff --git a/tests/NeuralNet/Layers/Placeholder1D/Placeholder1DTest.php b/tests/NeuralNet/Layers/Placeholder1D/Placeholder1DTest.php
index 7aa3168c8..81dbc91cf 100644
--- a/tests/NeuralNet/Layers/Placeholder1D/Placeholder1DTest.php
+++ b/tests/NeuralNet/Layers/Placeholder1D/Placeholder1DTest.php
@@ -1,6 +1,6 @@
 <?php
 
-declare(strict_types = 1);
+declare(strict_types=1);
 
 namespace Rubix\ML\Tests\NeuralNet\Layers\Placeholder1D;
 
diff --git a/tests/NeuralNet/Layers/Placeholder1DTest.php b/tests/NeuralNet/Layers/Placeholder1DTest.php
index 8e0dbf24c..c6c5ff897 100644
--- a/tests/NeuralNet/Layers/Placeholder1DTest.php
+++ b/tests/NeuralNet/Layers/Placeholder1DTest.php
@@ -1,6 +1,6 @@
 <?php
 
-declare(strict_types = 1);
+declare(strict_types=1);
 
 namespace Rubix\ML\Tests\NeuralNet\Layers;
 

From c80d2784968b8f7a015af1dd8cf24bc0e8889260 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 14 Feb 2026 17:16:30 +0200
Subject: [PATCH 29/36] ML-396 Converted MLPRegressor to work with
 NumPower/NDArray related classes

---
 .../Generators/SwissRoll/SwissRoll.php        | 188 ++++++
 src/NeuralNet/Networks/Network.php            |  76 ++-
 src/NeuralNet/Parameters/Parameter.php        |   7 +-
 src/Regressors/MLPRegressor/MLPRegressor.php  | 561 ++++++++++++++++++
 .../Generators/SwissRoll/SwissRollTest.php    |  47 ++
 tests/NeuralNet/Layers/Swish/SwishTest.php    |   2 +-
 tests/NeuralNet/Networks/NetworkTest.php      |  51 ++
 .../MLPRegressors/MLPRegressorTest.php        | 216 +++++++
 8 files changed, 1144 insertions(+), 4 deletions(-)
 create mode 100644 src/Datasets/Generators/SwissRoll/SwissRoll.php
 create mode 100644 src/Regressors/MLPRegressor/MLPRegressor.php
 create mode 100644 tests/Datasets/Generators/SwissRoll/SwissRollTest.php
 create mode 100644 tests/Regressors/MLPRegressors/MLPRegressorTest.php

diff --git a/src/Datasets/Generators/SwissRoll/SwissRoll.php b/src/Datasets/Generators/SwissRoll/SwissRoll.php
new file mode 100644
index 000000000..c965ef865
--- /dev/null
+++ b/src/Datasets/Generators/SwissRoll/SwissRoll.php
@@ -0,0 +1,188 @@
+<?php
+
+namespace Rubix\ML\Datasets\Generators\SwissRoll;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\Datasets\Generators\Generator;
+use Rubix\ML\Datasets\Labeled;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+
+use function cos;
+use function sin;
+use function log;
+use function sqrt;
+use function mt_rand;
+
+use const Rubix\ML\HALF_PI;
+
+/**
+ * Swiss Roll
+ *
+ * Generate a 3-dimensional swiss roll dataset with continuous valued labels.
+ * The labels are the inputs to the swiss roll transformation and are suitable
+ * for non-linear regression problems.
+ *
+ * References:
+ * [1] S. Marsland. (2009). Machine Learning: An Algorithmic Perspective,
+ * Chapter 10.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class SwissRoll implements Generator
+{
+    /**
+     * The center vector of the swiss roll.
+     *
+     * @var list<float>
+     */
+    protected array $center;
+
+    /**
+     * The scaling factor of the swiss roll.
+     *
+     * @var float
+     */
+    protected float $scale;
+
+    /**
+     * The depth of the swiss roll i.e the scale of the y dimension.
+     *
+     * @var float
+     */
+    protected float $depth;
+
+    /**
+     * The standard deviation of the gaussian noise.
+     *
+     * @var float
+     */
+    protected float $noise;
+
+    /**
+     * @param float $x
+     * @param float $y
+     * @param float $z
+     * @param float $scale
+     * @param float $depth
+     * @param float $noise
+     * @throws InvalidArgumentException
+     */
+    public function __construct(
+        float $x = 0.0,
+        float $y = 0.0,
+        float $z = 0.0,
+        float $scale = 1.0,
+        float $depth = 21.0,
+        float $noise = 0.1
+    ) {
+        if ($scale < 0.0) {
+            throw new InvalidArgumentException('Scale must be'
+                . " greater than 0, $scale given.");
+        }
+
+        if ($depth < 0) {
+            throw new InvalidArgumentException('Depth must be'
+                . " greater than 0, $depth given.");
+        }
+
+        if ($noise < 0.0) {
+            throw new InvalidArgumentException('Noise factor cannot be less'
+                . " than 0, $noise given.");
+        }
+
+        $this->center = [$x, $y, $z];
+        $this->scale = $scale;
+        $this->depth = $depth;
+        $this->noise = $noise;
+    }
+
+    /**
+     * Return the dimensionality of the data this generates.
+     *
+     * @internal
+     *
+     * @return int<0,max>
+     */
+    public function dimensions() : int
+    {
+        return 3;
+    }
+
+    /**
+     * Generate n data points.
+     *
+     * @param int<0,max> $n
+     * @return Labeled
+     */
+    public function generate(int $n) : Labeled
+    {
+        $range = M_PI + HALF_PI;
+
+        $t = [];
+        $y = [];
+        $coords = [];
+
+        for ($i = 0; $i < $n; ++$i) {
+            $u = mt_rand() / mt_getrandmax();
+            $ti = (($u * 2.0) + 1.0) * $range;
+            $t[] = $ti;
+
+            $uy = mt_rand() / mt_getrandmax();
+            $y[] = $uy * $this->depth;
+
+            $coords[] = [
+                $ti * cos($ti),
+                $y[$i],
+                $ti * sin($ti),
+            ];
+        }
+
+        $noise = [];
+
+        if ($this->noise > 0.0) {
+            for ($i = 0; $i < $n; ++$i) {
+                $row = [];
+
+                for ($j = 0; $j < 3; ++$j) {
+                    $u1 = mt_rand() / mt_getrandmax();
+                    $u2 = mt_rand() / mt_getrandmax();
+                    $u1 = $u1 > 0.0 ? $u1 : 1e-12;
+
+                    $z0 = sqrt(-2.0 * log($u1)) * cos(2.0 * M_PI * $u2);
+
+                    $row[] = $z0 * $this->noise;
+                }
+
+                $noise[] = $row;
+            }
+        } else {
+            for ($i = 0; $i < $n; ++$i) {
+                $noise[] = [0.0, 0.0, 0.0];
+            }
+        }
+
+        $center = [];
+
+        for ($i = 0; $i < $n; ++$i) {
+            $center[] = $this->center;
+        }
+
+        $coords = NumPower::array($coords);
+        $noise = NumPower::array($noise);
+        $center = NumPower::array($center);
+
+        $samples = NumPower::add(
+            NumPower::add(
+                NumPower::multiply($coords, $this->scale),
+                $center
+            ),
+            $noise
+        );
+
+        return Labeled::quick($samples->toArray(), $t);
+    }
+}
diff --git a/src/NeuralNet/Networks/Network.php b/src/NeuralNet/Networks/Network.php
index 6554940b3..df51a1a78 100644
--- a/src/NeuralNet/Networks/Network.php
+++ b/src/NeuralNet/Networks/Network.php
@@ -17,6 +17,7 @@
 use Traversable;
 
 use function array_reverse;
+use function array_is_list;
 
 /**
  * Network
@@ -185,12 +186,22 @@ public function initialize() : void
      */
     public function infer(Dataset $dataset) : NDArray
     {
-        $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]);
+        if ($dataset->empty()) {
+            return NumPower::array([]);
+        }
+
+        $input = NumPower::transpose($this->samplesToInput($dataset->samples()), [1, 0]);
 
         foreach ($this->layers() as $layer) {
             $input = $layer->infer($input);
         }
 
+        $shape = $input->shape();
+
+        if (count($shape) === 1) {
+            $input = NumPower::reshape($input, [1, $shape[0]]);
+        }
+
         return NumPower::transpose($input, [1, 0]);
     }
 
@@ -203,7 +214,11 @@ public function infer(Dataset $dataset) : NDArray
      */
     public function roundtrip(Labeled $dataset) : float
     {
-        $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]);
+        if ($dataset->empty()) {
+            return 0.0;
+        }
+
+        $input = NumPower::transpose($this->samplesToInput($dataset->samples()), [1, 0]);
 
         $this->feed($input);
 
@@ -272,4 +287,61 @@ public function exportGraphviz() : Encoding
 
         return new Encoding($dot);
     }
+
+    /**
+     * Convert dataset samples (row-major PHP arrays) to a stable 2D NDArray.
+     *
+     * This method exists because dataset samples originate as PHP arrays and are
+     * not guaranteed to be in a form that NumPower can always infer as a dense
+     * 2D numeric matrix. For example:
+     *
+     * - PHP arrays can have non-packed keys (e.g. 3, 7, 8 instead of 0, 1, 2).
+     * - Rows can have non-packed keys (e.g. 1, 2 instead of 0, 1).
+     * - In some edge cases (such as a single row/column), NumPower may infer a
+     *   rank-1 array.
+     *
+     * If the resulting NDArray is not rank-2, calling NumPower::transpose(..., [1, 0])
+     * will throw "axes don't match array". To make transpose stable we:
+     *
+     * - Reindex the outer and inner arrays with array_values() to force packed
+     *   row/column ordering.
+     * - Ensure the NDArray is 2D by reshaping rank-1 arrays to [1, n].
+     *
+     * The returned NDArray is row-major with shape [nSamples, nFeatures].
+     *
+     * @param list<array> $samples
+     * @return NDArray
+     */
+    protected function samplesToInput(array $samples) : NDArray
+    {
+        $packed = array_is_list($samples);
+
+        if ($packed) {
+            foreach ($samples as $sample) {
+                if (!array_is_list($sample)) {
+                    $packed = false;
+
+                    break;
+                }
+            }
+        }
+
+        if (!$packed) {
+            $samples = array_values($samples);
+
+            foreach ($samples as $i => $sample) {
+                $samples[$i] = array_values($sample);
+            }
+        }
+
+        $input = NumPower::array($samples);
+
+        $shape = $input->shape();
+
+        if (count($shape) === 1) {
+            $input = NumPower::reshape($input, [1, $shape[0]]);
+        }
+
+        return $input;
+    }
 }
diff --git a/src/NeuralNet/Parameters/Parameter.php b/src/NeuralNet/Parameters/Parameter.php
index 0cef2e87a..6741a0e49 100644
--- a/src/NeuralNet/Parameters/Parameter.php
+++ b/src/NeuralNet/Parameters/Parameter.php
@@ -90,9 +90,14 @@ public function update(NDArray $gradient, Optimizer $optimizer) : void
 
     /**
      * Perform a deep copy of the object upon cloning.
+     *
+     * Cloning an NDArray directly may trigger native memory corruption in some
+     * NumPower builds (e.g. heap corruption/segfaults when parameters are
+     * snapshotted during training). To make cloning deterministic and stable we
+     * deep-copy through a PHP array roundtrip: NDArray -> PHP array -> NDArray.
      */
     public function __clone() : void
     {
-        $this->param = clone $this->param;
+        $this->param = NumPower::array($this->param->toArray());
     }
 }
diff --git a/src/Regressors/MLPRegressor/MLPRegressor.php b/src/Regressors/MLPRegressor/MLPRegressor.php
new file mode 100644
index 000000000..b95fe7e49
--- /dev/null
+++ b/src/Regressors/MLPRegressor/MLPRegressor.php
@@ -0,0 +1,561 @@
+<?php
+
+namespace Rubix\ML\Regressors\MLPRegressor;
+
+use Rubix\ML\Online;
+use Rubix\ML\Learner;
+use Rubix\ML\Verbose;
+use Rubix\ML\DataType;
+use Rubix\ML\Encoding;
+use Rubix\ML\Estimator;
+use Rubix\ML\Persistable;
+use Rubix\ML\EstimatorType;
+use Rubix\ML\Helpers\Params;
+use Rubix\ML\Datasets\Dataset;
+use Rubix\ML\Traits\LoggerAware;
+use Rubix\ML\NeuralNet\Snapshots\Snapshot;
+use Rubix\ML\NeuralNet\Networks\Network;
+use Rubix\ML\NeuralNet\Layers\Dense\Dense;
+use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
+use Rubix\ML\Traits\AutotrackRevisions;
+use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
+use Rubix\ML\NeuralNet\Layers\Continuous\Continuous;
+use Rubix\ML\CrossValidation\Metrics\RMSE;
+use Rubix\ML\NeuralNet\Layers\Placeholder1D\Placeholder1D;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Initializers\Xavier\XavierUniform;
+use Rubix\ML\CrossValidation\Metrics\Metric;
+use Rubix\ML\Specifications\DatasetIsLabeled;
+use Rubix\ML\Specifications\DatasetIsNotEmpty;
+use Rubix\ML\Specifications\SpecificationChain;
+use Rubix\ML\NeuralNet\CostFunctions\LeastSquares\LeastSquares;
+use Rubix\ML\NeuralNet\CostFunctions\Base\Contracts\RegressionLoss;
+use Rubix\ML\Specifications\DatasetHasDimensionality;
+use Rubix\ML\Specifications\LabelsAreCompatibleWithLearner;
+use Rubix\ML\Specifications\EstimatorIsCompatibleWithMetric;
+use Rubix\ML\Specifications\SamplesAreCompatibleWithEstimator;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+use Generator;
+
+use function is_nan;
+use function count;
+use function get_object_vars;
+use function number_format;
+
+/**
+ * MLP Regressor
+ *
+ * A multilayer feed forward neural network with a continuous output layer suitable for
+ * regression problems. Like the Multilayer Perceptron classifier, the MLP Regressor is
+ * able to handle complex non-linear regression problems by forming higher-order
+ * representations of the input features using intermediate hidden layers.
+ *
+ * References:
+ * [1] G. E. Hinton. (1989). Connectionist learning procedures.
+ * [2] L. Prechelt. (1997). Early Stopping - but when?
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class MLPRegressor implements Estimator, Learner, Online, Verbose, Persistable
+{
+    use AutotrackRevisions, LoggerAware;
+
+    /**
+     * An array composing the user-specified hidden layers of the network in order.
+     *
+     * @var Hidden[]
+     */
+    protected array $hiddenLayers = [
+        //
+    ];
+
+    /**
+     * The number of training samples to process at a time.
+     *
+     * @var positive-int
+     */
+    protected int $batchSize;
+
+    /**
+     * The gradient descent optimizer used to update the network parameters.
+     *
+     * @var Optimizer
+     */
+    protected Optimizer $optimizer;
+
+    /**
+     * The maximum number of training epochs. i.e. the number of times to iterate before terminating.
+     *
+     * @var int<0,max>
+     */
+    protected int $epochs;
+
+    /**
+     * The minimum change in the training loss necessary to continue training.
+     *
+     * @var float
+     */
+    protected float $minChange;
+
+    /**
+     * The number of epochs to train before evaluating the model with the holdout set.
+     *
+     * @var int
+     */
+    protected $evalInterval;
+
+    /**
+     * The number of epochs without improvement in the validation score to wait before considering an early stop.
+     *
+     * @var positive-int
+     */
+    protected int $window;
+
+    /**
+     * The proportion of training samples to use for validation and progress monitoring.
+     *
+     * @var float
+     */
+    protected float $holdOut;
+
+    /**
+     * The function that computes the loss associated with an erroneous activation during training.
+     *
+     * @var RegressionLoss
+     */
+    protected RegressionLoss $costFn;
+
+    /**
+     * The metric used to score the generalization performance of the model during training.
+     *
+     * @var Metric
+     */
+    protected Metric $metric;
+
+    /**
+     * The underlying neural network instance.
+     *
+     * @var Network|null
+     */
+    protected ?Network $network = null;
+
+    /**
+     * The validation scores at each epoch from the last training session.
+     *
+     * @var float[]|null
+     */
+    protected ?array $scores = null;
+
+    /**
+     * The loss at each epoch from the last training session.
+     *
+     * @var float[]|null
+     */
+    protected ?array $losses = null;
+
+    /**
+     * @param Hidden[] $hiddenLayers
+     * @param int $batchSize
+     * @param Optimizer|null $optimizer
+     * @param int $epochs
+     * @param float $minChange
+     * @param int $evalInterval
+     * @param int $window
+     * @param float $holdOut
+     * @param RegressionLoss|null $costFn
+     * @param Metric|null $metric
+     * @throws InvalidArgumentException
+     */
+    public function __construct(
+        array $hiddenLayers = [],
+        int $batchSize = 128,
+        ?Optimizer $optimizer = null,
+        int $epochs = 1000,
+        float $minChange = 1e-4,
+        int $evalInterval = 3,
+        int $window = 5,
+        float $holdOut = 0.1,
+        ?RegressionLoss $costFn = null,
+        ?Metric $metric = null
+    ) {
+        foreach ($hiddenLayers as $layer) {
+            if (!$layer instanceof Hidden) {
+                throw new InvalidArgumentException('Hidden layer'
+                    . ' must implement the Hidden interface.');
+            }
+        }
+
+        if ($batchSize < 1) {
+            throw new InvalidArgumentException('Batch size must be'
+                . " greater than 0, $batchSize given.");
+        }
+
+        if ($epochs < 0) {
+            throw new InvalidArgumentException('Number of epochs'
+                . " must be greater than 0, $epochs given.");
+        }
+
+        if ($minChange < 0.0) {
+            throw new InvalidArgumentException('Minimum change must be'
+                . " greater than 0, $minChange given.");
+        }
+
+        if ($evalInterval < 1) {
+            throw new InvalidArgumentException('Eval interval must be'
+                . " greater than 0, $evalInterval given.");
+        }
+
+        if ($window < 1) {
+            throw new InvalidArgumentException('Window must be'
+                . " greater than 0, $window given.");
+        }
+
+        if ($holdOut < 0.0 or $holdOut > 0.5) {
+            throw new InvalidArgumentException('Hold out ratio must be'
+                . " between 0 and 0.5, $holdOut given.");
+        }
+
+        if ($metric) {
+            EstimatorIsCompatibleWithMetric::with($this, $metric)->check();
+        }
+
+        $this->hiddenLayers = $hiddenLayers;
+        $this->batchSize = $batchSize;
+        $this->optimizer = $optimizer ?? new Adam();
+        $this->epochs = $epochs;
+        $this->minChange = $minChange;
+        $this->evalInterval = $evalInterval;
+        $this->window = $window;
+        $this->holdOut = $holdOut;
+        $this->costFn = $costFn ?? new LeastSquares();
+        $this->metric = $metric ?? new RMSE();
+    }
+
+    /**
+     * Return the estimator type.
+     *
+     * @internal
+     *
+     * @return EstimatorType
+     */
+    public function type() : EstimatorType
+    {
+        return EstimatorType::regressor();
+    }
+
+    /**
+     * Return the data types that the estimator is compatible with.
+     *
+     * @internal
+     *
+     * @return list<DataType>
+     */
+    public function compatibility() : array
+    {
+        return [
+            DataType::continuous(),
+        ];
+    }
+
+    /**
+     * Return the settings of the hyper-parameters in an associative array.
+     *
+     * @internal
+     *
+     * @return mixed[]
+     */
+    public function params() : array
+    {
+        return [
+            'hidden layers' => $this->hiddenLayers,
+            'batch size' => $this->batchSize,
+            'optimizer' => $this->optimizer,
+            'epochs' => $this->epochs,
+            'min change' => $this->minChange,
+            'eval interval' => $this->evalInterval,
+            'window' => $this->window,
+            'hold out' => $this->holdOut,
+            'cost fn' => $this->costFn,
+            'metric' => $this->metric,
+        ];
+    }
+
+    /**
+     * Has the learner been trained?
+     *
+     * @return bool
+     */
+    public function trained() : bool
+    {
+        return isset($this->network);
+    }
+
+    /**
+     * Return an iterable progress table with the steps from the last training session.
+     *
+     * @return Generator<mixed[]>
+     */
+    public function steps() : Generator
+    {
+        if (!$this->losses) {
+            return;
+        }
+
+        foreach ($this->losses as $epoch => $loss) {
+            yield [
+                'epoch' => $epoch,
+                'score' => $this->scores[$epoch] ?? null,
+                'loss' => $loss,
+            ];
+        }
+    }
+
+    /**
+     * Return the validation score at each epoch.
+     *
+     * @return float[]|null
+     */
+    public function scores() : ?array
+    {
+        return $this->scores;
+    }
+
+    /**
+     * Return the training loss at each epoch.
+     *
+     * @return float[]|null
+     */
+    public function losses() : ?array
+    {
+        return $this->losses;
+    }
+
+    /**
+     * Return the underlying neural network instance or null if not trained.
+     *
+     * @return Network|null
+     */
+    public function network() : ?Network
+    {
+        return $this->network;
+    }
+
+    /**
+     * Train the estimator with a dataset.
+     *
+     * @param \Rubix\ML\Datasets\Labeled $dataset
+     */
+    public function train(Dataset $dataset) : void
+    {
+        DatasetIsNotEmpty::with($dataset)->check();
+
+        $hiddenLayers = $this->hiddenLayers;
+
+        $hiddenLayers[] = new Dense(1, 0.0, true, new XavierUniform());
+
+        $this->network = new Network(
+            new Placeholder1D($dataset->numFeatures()),
+            $hiddenLayers,
+            new Continuous($this->costFn),
+            $this->optimizer
+        );
+
+        $this->network->initialize();
+
+        $this->partial($dataset);
+    }
+
+    /**
+     * Train the network using mini-batch gradient descent with backpropagation.
+     *
+     * @param \Rubix\ML\Datasets\Labeled $dataset
+     * @throws RuntimeException
+     */
+    public function partial(Dataset $dataset) : void
+    {
+        if (!$this->network) {
+            $this->train($dataset);
+
+            return;
+        }
+
+        SpecificationChain::with([
+            new DatasetIsLabeled($dataset),
+            new DatasetIsNotEmpty($dataset),
+            new SamplesAreCompatibleWithEstimator($dataset, $this),
+            new LabelsAreCompatibleWithLearner($dataset, $this),
+            new DatasetHasDimensionality($dataset, $this->network->input()->width()),
+        ])->check();
+
+        if ($this->logger) {
+            $this->logger->info("Training $this");
+
+            $numParams = number_format($this->network->numParams());
+
+            $this->logger->info("{$numParams} trainable parameters");
+        }
+
+        [$testing, $training] = $dataset->randomize()->split($this->holdOut);
+
+        [$minScore, $maxScore] = $this->metric->range()->list();
+
+        $bestScore = $minScore;
+        $bestEpoch = $numWorseEpochs = 0;
+        $loss = 0.0;
+        $score = $snapshot = null;
+        $prevLoss = INF;
+
+        $this->scores = $this->losses = [];
+
+        for ($epoch = 1; $epoch <= $this->epochs; ++$epoch) {
+            $batches = $training->randomize()->batch($this->batchSize);
+
+            $loss = 0.0;
+
+            foreach ($batches as $batch) {
+                $loss += $this->network->roundtrip($batch);
+            }
+
+            $loss /= count($batches);
+
+            $lossChange = abs($prevLoss - $loss);
+
+            $this->losses[$epoch] = $loss;
+
+            if (is_nan($loss)) {
+                if ($this->logger) {
+                    $this->logger->warning('Numerical instability detected');
+                }
+
+                break;
+            }
+
+            if ($epoch % $this->evalInterval === 0 && !$testing->empty()) {
+                $predictions = $this->predict($testing);
+
+                $score = $this->metric->score($predictions, $testing->labels());
+
+                $this->scores[$epoch] = $score;
+            }
+
+            if ($this->logger) {
+                $message = "Epoch: $epoch, {$this->costFn}: $loss";
+
+                if (isset($score)) {
+                    $message .= ", {$this->metric}: $score";
+                }
+
+                $this->logger->info($message);
+            }
+
+            if (isset($score)) {
+                if ($score >= $maxScore) {
+                    break;
+                }
+
+                if ($score > $bestScore) {
+                    $bestScore = $score;
+                    $bestEpoch = $epoch;
+
+                    $snapshot = Snapshot::take($this->network);
+
+                    $numWorseEpochs = 0;
+                } else {
+                    ++$numWorseEpochs;
+                }
+
+                if ($numWorseEpochs >= $this->window) {
+                    break;
+                }
+
+                unset($score);
+            }
+
+            if ($lossChange < $this->minChange) {
+                break;
+            }
+
+            $prevLoss = $loss;
+        }
+
+        if ($snapshot and (end($this->scores) < $bestScore or is_nan($loss))) {
+            $snapshot->restore();
+
+            if ($this->logger) {
+                $this->logger->info("Model state restored to epoch $bestEpoch");
+            }
+        }
+
+        if ($this->logger) {
+            $this->logger->info('Training complete');
+        }
+    }
+
+    /**
+     * Feed a sample through the network and make a prediction based on the
+     * activation of the output neuron.
+     *
+     * @param Dataset $dataset
+     * @throws RuntimeException
+     * @return list<int|float>
+     */
+    public function predict(Dataset $dataset) : array
+    {
+        if (!$this->network) {
+            throw new RuntimeException('Estimator has not been trained.');
+        }
+
+        DatasetHasDimensionality::with($dataset, $this->network->input()->width())->check();
+
+        $activations = $this->network->infer($dataset);
+
+        $activations = array_column($activations->toArray(), 0);
+
+        return $activations;
+    }
+
+    /**
+     * Export the network architecture as a graph in dot format.
+     *
+     * @throws RuntimeException
+     * @return Encoding
+     */
+    public function exportGraphviz() : Encoding
+    {
+        if (!$this->network) {
+            throw new RuntimeException('Must train network first.');
+        }
+
+        return $this->network->exportGraphviz();
+    }
+
+    /**
+     * Return an associative array containing the data used to serialize the object.
+     *
+     * @return mixed[]
+     */
+    public function __serialize() : array
+    {
+        $properties = get_object_vars($this);
+
+        unset($properties['losses'], $properties['scores'], $properties['logger']);
+
+        return $properties;
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return 'MLP Regressor (' . Params::stringify($this->params()) . ')';
+    }
+}
diff --git a/tests/Datasets/Generators/SwissRoll/SwissRollTest.php b/tests/Datasets/Generators/SwissRoll/SwissRollTest.php
new file mode 100644
index 000000000..437604c21
--- /dev/null
+++ b/tests/Datasets/Generators/SwissRoll/SwissRollTest.php
@@ -0,0 +1,47 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\Datasets\Generators\SwissRoll;
+
+use Rubix\ML\Datasets\Generators\SwissRoll\SwissRoll;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use Rubix\ML\Datasets\Dataset;
+use Rubix\ML\Datasets\Labeled;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Generators')]
+#[CoversClass(SwissRoll::class)]
+class SwissRollTest extends TestCase
+{
+    protected const int DATASET_SIZE = 30;
+
+    protected SwissRoll $generator;
+
+    protected function setUp() : void
+    {
+        $this->generator = new SwissRoll(x: 0.0, y: 0.0, z: 0.0, scale: 1.0, depth: 12.0, noise: 0.3);
+    }
+
+    #[Test]
+    #[TestDox('Dimensions returns 3')]
+    public function testDimensions() : void
+    {
+        self::assertEquals(3, $this->generator->dimensions());
+    }
+
+    #[Test]
+    #[TestDox('Generate returns a labeled dataset of the requested size')]
+    public function testGenerate() : void
+    {
+        $dataset = $this->generator->generate(self::DATASET_SIZE);
+
+        self::assertInstanceOf(Labeled::class, $dataset);
+        self::assertInstanceOf(Dataset::class, $dataset);
+
+        self::assertCount(self::DATASET_SIZE, $dataset);
+    }
+}
diff --git a/tests/NeuralNet/Layers/Swish/SwishTest.php b/tests/NeuralNet/Layers/Swish/SwishTest.php
index 5f8d55503..f0b2bc2be 100644
--- a/tests/NeuralNet/Layers/Swish/SwishTest.php
+++ b/tests/NeuralNet/Layers/Swish/SwishTest.php
@@ -73,7 +73,7 @@ public static function initializeForwardBackInferProvider() : array
                 'backExpected' => [
                     [0.2319176, 0.7695808, 0.0450083],
                     [0.2749583, 0.1099833, 0.0108810],
-                    [0.1252499, -0.0012326, 0.2314345],
+                    [0.1252493, -0.0012326, 0.2314345],
                 ],
                 'inferExpected' => [
                     [0.7306671, 2.3094806, -0.0475070],
diff --git a/tests/NeuralNet/Networks/NetworkTest.php b/tests/NeuralNet/Networks/NetworkTest.php
index 0197c225d..0406193cb 100644
--- a/tests/NeuralNet/Networks/NetworkTest.php
+++ b/tests/NeuralNet/Networks/NetworkTest.php
@@ -6,6 +6,8 @@
 
 use PHPUnit\Framework\Attributes\CoversClass;
 use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
 use Rubix\ML\Datasets\Labeled;
 use Rubix\ML\NeuralNet\Layers\Base\Contracts\Hidden;
 use Rubix\ML\NeuralNet\Layers\Base\Contracts\Input;
@@ -19,6 +21,7 @@
 use Rubix\ML\NeuralNet\ActivationFunctions\ReLU\ReLU;
 use Rubix\ML\NeuralNet\CostFunctions\CrossEntropy\CrossEntropy;
 use PHPUnit\Framework\TestCase;
+use ReflectionMethod;
 
 #[Group('NeuralNet')]
 #[CoversClass(Network::class)]
@@ -71,6 +74,8 @@ classes: ['yes', 'no', 'maybe'],
         );
     }
 
+    #[Test]
+    #[TestDox('Layers iterator yields all layers')]
     public function testLayers() : void
     {
         $count = 0;
@@ -82,20 +87,66 @@ public function testLayers() : void
         self::assertSame(7, $count);
     }
 
+    #[Test]
+    #[TestDox('Input layer is Placeholder1D')]
     public function testInput() : void
     {
         self::assertInstanceOf(Placeholder1D::class, $this->network->input());
     }
 
+    #[Test]
+    #[TestDox('Hidden layers count')]
     public function testHidden() : void
     {
         self::assertCount(5, $this->network->hidden());
     }
 
+    #[Test]
+    #[TestDox('Num params')]
     public function testNumParams() : void
     {
         $this->network->initialize();
 
         self::assertEquals(103, $this->network->numParams());
     }
+
+    #[Test]
+    #[TestDox('samplesToInput normalizes samples into 2D NDArray')]
+    public function testSamplesToInput() : void
+    {
+        $method = new ReflectionMethod(Network::class, 'samplesToInput');
+        $method->setAccessible(true);
+
+        $input = $method->invoke($this->network, $this->dataset->samples());
+
+        self::assertEquals([3, 2], $input->shape());
+
+        $samples = [
+            3 => [
+                1 => 1.0,
+                2 => 2.5,
+            ],
+            7 => [
+                1 => 0.1,
+                2 => 0.0,
+            ],
+            8 => [
+                1 => 0.002,
+                2 => -6.0,
+            ],
+        ];
+
+        $input = $method->invoke($this->network, $samples);
+
+        self::assertEquals([3, 2], $input->shape());
+
+        $samples = [
+            [1.0],
+            [2.5],
+        ];
+
+        $input = $method->invoke($this->network, $samples);
+
+        self::assertEquals([2, 1], $input->shape());
+    }
 }
diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php
new file mode 100644
index 000000000..5366c806e
--- /dev/null
+++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php
@@ -0,0 +1,216 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\Regressors\MLPRegressors;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use Rubix\ML\DataType;
+use Rubix\ML\EstimatorType;
+use Rubix\ML\Datasets\Labeled;
+use Rubix\ML\Loggers\BlackHole;
+use Rubix\ML\Datasets\Unlabeled;
+use Rubix\ML\NeuralNet\Layers\Dense\Dense;
+use Rubix\ML\Regressors\MLPRegressor\MLPRegressor;
+use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
+use Rubix\ML\NeuralNet\Layers\Activation\Activation;
+use Rubix\ML\CrossValidation\Metrics\RMSE;
+use Rubix\ML\Datasets\Generators\SwissRoll\SwissRoll;
+use Rubix\ML\Transformers\ZScaleStandardizer;
+use Rubix\ML\CrossValidation\Metrics\RSquared;
+use Rubix\ML\NeuralNet\ActivationFunctions\SiLU\SiLU;
+use Rubix\ML\NeuralNet\CostFunctions\LeastSquares\LeastSquares;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+use PHPUnit\Framework\TestCase;
+use function Apphp\PrettyPrint\pp;
+
+#[Group('Regressors')]
+#[CoversClass(MLPRegressor::class)]
+class MLPRegressorTest extends TestCase
+{
+    /**
+     * The number of samples in the training set.
+     */
+    protected const int TRAIN_SIZE = 512;
+
+    /**
+     * The number of samples in the validation set.
+     */
+    protected const int TEST_SIZE = 256;
+
+    /**
+     * The minimum validation score required to pass the test.
+     */
+    protected const float MIN_SCORE = 0.9;
+
+    /**
+     * Constant used to see the random number generator.
+     */
+    protected const int RANDOM_SEED = 0;
+
+    protected SwissRoll $generator;
+
+    protected MLPRegressor $estimator;
+
+    protected RSquared $metric;
+
+    protected function setUp() : void
+    {
+        $this->generator = new SwissRoll(x: 4.0, y: -7.0, z: 0.0, scale: 1.0, depth: 21.0, noise: 0.5);
+
+        $this->estimator = new MLPRegressor(
+            hiddenLayers: [
+                new Dense(32),
+                new Activation(new SiLU()),
+                new Dense(16),
+                new Activation(new SiLU()),
+                new Dense(8),
+                new Activation(new SiLU()),
+            ],
+            batchSize: 32,
+            optimizer: new Adam(0.01),
+            epochs: 100,
+            minChange: 1e-4,
+            evalInterval: 3,
+            window: 5,
+            holdOut: 0.1,
+            costFn: new LeastSquares(),
+            metric: new RMSE()
+        );
+
+        $this->metric = new RSquared();
+
+        $this->estimator->setLogger(new BlackHole());
+
+        srand(self::RANDOM_SEED);
+    }
+
+    #[Test]
+    #[TestDox('Assert pre conditions')]
+    public function testAssertPreConditions() : void
+    {
+        self::assertFalse($this->estimator->trained());
+    }
+
+    #[Test]
+    #[TestDox('Bad batch size')]
+    public function testBadBatchSize() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new MLPRegressor(hiddenLayers: [], batchSize: -100);
+    }
+
+    #[Test]
+    #[TestDox('Type')]
+    public function testType() : void
+    {
+        self::assertEquals(EstimatorType::regressor(), $this->estimator->type());
+    }
+
+    #[Test]
+    #[TestDox('Compatibility')]
+    public function testCompatibility() : void
+    {
+        $expected = [
+            DataType::continuous(),
+        ];
+
+        self::assertEquals($expected, $this->estimator->compatibility());
+    }
+
+    #[Test]
+    #[TestDox('Params')]
+    public function testParams() : void
+    {
+        $expected = [
+            'hidden layers' => [
+                new Dense(32),
+                new Activation(new SiLU()),
+                new Dense(16),
+                new Activation(new SiLU()),
+                new Dense(8),
+                new Activation(new SiLU()),
+            ],
+            'batch size' => 32,
+            'optimizer' => new Adam(0.01),
+            'epochs' => 100,
+            'min change' => 1e-4,
+            'eval interval' => 3,
+            'window' => 5,
+            'hold out' => 0.1,
+            'cost fn' => new LeastSquares(),
+            'metric' => new RMSE(),
+        ];
+
+        self::assertEquals($expected, $this->estimator->params());
+    }
+
+    #[Test]
+    #[TestDox('Train partial predict')]
+    public function testTrainPartialPredict() : void
+    {
+        $dataset = $this->generator->generate(self::TRAIN_SIZE + self::TEST_SIZE);
+
+        $dataset->apply(new ZScaleStandardizer());
+
+        $testing = $dataset->randomize()->take(self::TEST_SIZE);
+
+        $folds = $dataset->fold(3);
+
+        $this->estimator->train($folds[0]);
+        $this->estimator->partial($folds[1]);
+        $this->estimator->partial($folds[2]);
+
+        self::assertTrue($this->estimator->trained());
+
+        $dot = $this->estimator->exportGraphviz();
+
+        // Graphviz::dotToImage($dot)->saveTo(new Filesystem('test.png'));
+
+        self::assertStringStartsWith('digraph Tree {', (string) $dot);
+
+        $losses = $this->estimator->losses();
+
+        self::assertIsArray($losses);
+        self::assertContainsOnlyFloat($losses);
+
+        $scores = $this->estimator->scores();
+
+        self::assertIsArray($scores);
+        self::assertContainsOnlyFloat($scores);
+
+        $predictions = $this->estimator->predict($testing);
+
+        /** @var list<int|float> $labels */
+        $labels = $testing->labels();
+        $score = $this->metric->score(
+            predictions: $predictions,
+            labels: $labels
+        );
+
+        self::assertGreaterThanOrEqual(self::MIN_SCORE, $score);
+    }
+
+    #[Test]
+    #[TestDox('Train incompatible')]
+    public function testTrainIncompatible() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        $this->estimator->train(Labeled::quick(samples: [['bad']], labels: [2]));
+    }
+
+    #[Test]
+    #[TestDox('Predict untrained')]
+    public function testPredictUntrained() : void
+    {
+        $this->expectException(RuntimeException::class);
+
+        $this->estimator->predict(Unlabeled::quick());
+    }
+}

From 13acae649e0d8449ffb7d548ea53563fb85ea0d5 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 14 Feb 2026 17:24:14 +0200
Subject: [PATCH 30/36] ML-396 removed unneeded export function

---
 tests/Regressors/MLPRegressors/MLPRegressorTest.php | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php
index 5366c806e..839711455 100644
--- a/tests/Regressors/MLPRegressors/MLPRegressorTest.php
+++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php
@@ -26,7 +26,6 @@
 use Rubix\ML\Exceptions\InvalidArgumentException;
 use Rubix\ML\Exceptions\RuntimeException;
 use PHPUnit\Framework\TestCase;
-use function Apphp\PrettyPrint\pp;
 
 #[Group('Regressors')]
 #[CoversClass(MLPRegressor::class)]

From 3b65a47049dc2ca121800fcb47a4ef77bd38b00c Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 14 Feb 2026 17:56:10 +0200
Subject: [PATCH 31/36] ML-396 added test for NumPower

---
 tests/NeuralNet/NumPower/NumPowerTest.php | 50 +++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 tests/NeuralNet/NumPower/NumPowerTest.php

diff --git a/tests/NeuralNet/NumPower/NumPowerTest.php b/tests/NeuralNet/NumPower/NumPowerTest.php
new file mode 100644
index 000000000..20a2ee602
--- /dev/null
+++ b/tests/NeuralNet/NumPower/NumPowerTest.php
@@ -0,0 +1,50 @@
+<?php
+
+namespace Rubix\ML\Tests\NeuralNet\NumPower;
+
+use NumPower;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use PHPUnit\Framework\TestCase;
+
+#[Group('NumPower')]
+class NumPowerTest extends TestCase
+{
+    #[Test]
+    #[TestDox('NumPower transpose swaps axes')]
+    public function testNumPowerTransposeSwapsAxes() : void
+    {
+        $rows = [];
+
+        for ($i = 0; $i < 3; ++$i) {
+            $row = [];
+
+            for ($j = 0; $j < 256; ++$j) {
+                $row[] = (float) ($i * 1000 + $j);
+            }
+
+            $rows[] = $row;
+        }
+
+        $x = NumPower::array($rows);
+
+        $t = NumPower::transpose($x, [1, 0]);
+
+        self::assertSame([256, 3], $t->shape());
+
+        $a = $t->toArray();
+
+        self::assertEqualsWithDelta(0.0, (float) $a[0][0], 1e-12);
+        self::assertEqualsWithDelta(1000.0, (float) $a[0][1], 1e-12);
+        self::assertEqualsWithDelta(2000.0, (float) $a[0][2], 1e-12);
+
+        self::assertEqualsWithDelta(255.0, (float) $a[255][0], 1e-12);
+        self::assertEqualsWithDelta(1255.0, (float) $a[255][1], 1e-12);
+        self::assertEqualsWithDelta(2255.0, (float) $a[255][2], 1e-12);
+
+        self::assertEqualsWithDelta(42.0, (float) $a[42][0], 1e-12);
+        self::assertEqualsWithDelta(1042.0, (float) $a[42][1], 1e-12);
+        self::assertEqualsWithDelta(2042.0, (float) $a[42][2], 1e-12);
+    }
+}

From d7404f81ef8629b4095f0dfc7f10c3aea60e6756 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 14 Feb 2026 18:44:28 +0200
Subject: [PATCH 32/36] ML-396 added USE_NUMPOWER_TRANSPOSE option to Network

---
 src/NeuralNet/Networks/Network.php            |  93 ++++-----
 tests/NeuralNet/Networks/NetworkTest.php      |  40 ----
 .../MLPRegressors/MLPRegressorTest.php        | 182 ++++++++++++++++++
 3 files changed, 231 insertions(+), 84 deletions(-)

diff --git a/src/NeuralNet/Networks/Network.php b/src/NeuralNet/Networks/Network.php
index df51a1a78..929813652 100644
--- a/src/NeuralNet/Networks/Network.php
+++ b/src/NeuralNet/Networks/Network.php
@@ -73,6 +73,8 @@ class Network
      */
     protected Optimizer $optimizer;
 
+    protected const USE_NUMPOWER_TRANSPOSE = false;
+
     /**
      * @param Input $input
      * @param Hidden[] $hidden
@@ -190,7 +192,11 @@ public function infer(Dataset $dataset) : NDArray
             return NumPower::array([]);
         }
 
-        $input = NumPower::transpose($this->samplesToInput($dataset->samples()), [1, 0]);
+        if (self::USE_NUMPOWER_TRANSPOSE) {
+            $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]);
+        } else {
+            $input = NumPower::array($this->rowsToColumns($dataset->samples()));
+        }
 
         foreach ($this->layers() as $layer) {
             $input = $layer->infer($input);
@@ -202,7 +208,11 @@ public function infer(Dataset $dataset) : NDArray
             $input = NumPower::reshape($input, [1, $shape[0]]);
         }
 
-        return NumPower::transpose($input, [1, 0]);
+        if (self::USE_NUMPOWER_TRANSPOSE) {
+            return NumPower::transpose($input, [1, 0]);
+        } else {
+            return NumPower::array($this->columnsToRows($input->toArray()));
+        }
     }
 
     /**
@@ -218,7 +228,11 @@ public function roundtrip(Labeled $dataset) : float
             return 0.0;
         }
 
-        $input = NumPower::transpose($this->samplesToInput($dataset->samples()), [1, 0]);
+        if (self::USE_NUMPOWER_TRANSPOSE) {
+            $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]);
+        } else {
+            $input = NumPower::array($this->rowsToColumns($dataset->samples()));
+        }
 
         $this->feed($input);
 
@@ -289,59 +303,50 @@ public function exportGraphviz() : Encoding
     }
 
     /**
-     * Convert dataset samples (row-major PHP arrays) to a stable 2D NDArray.
-     *
-     * This method exists because dataset samples originate as PHP arrays and are
-     * not guaranteed to be in a form that NumPower can always infer as a dense
-     * 2D numeric matrix. For example:
-     *
-     * - PHP arrays can have non-packed keys (e.g. 3, 7, 8 instead of 0, 1, 2).
-     * - Rows can have non-packed keys (e.g. 1, 2 instead of 0, 1).
-     * - In some edge cases (such as a single row/column), NumPower may infer a
-     *   rank-1 array.
-     *
-     * If the resulting NDArray is not rank-2, calling NumPower::transpose(..., [1, 0])
-     * will throw "axes don't match array". To make transpose stable we:
-     *
-     * - Reindex the outer and inner arrays with array_values() to force packed
-     *   row/column ordering.
-     * - Ensure the NDArray is 2D by reshaping rank-1 arrays to [1, n].
-     *
-     * The returned NDArray is row-major with shape [nSamples, nFeatures].
-     *
-     * @param list<array> $samples
-     * @return NDArray
+     * @param list<list<int|float|string>> $rows
+     * @return list<list<int|float|string>>
      */
-    protected function samplesToInput(array $samples) : NDArray
+    private function rowsToColumns(array $rows) : array
     {
-        $packed = array_is_list($samples);
+        $numSamples = count($rows);
+        $numFeatures = isset($rows[0]) && is_array($rows[0]) ? count($rows[0]) : 0;
 
-        if ($packed) {
-            foreach ($samples as $sample) {
-                if (!array_is_list($sample)) {
-                    $packed = false;
+        $columns = [];
 
-                    break;
-                }
+        for ($j = 0; $j < $numFeatures; ++$j) {
+            $column = [];
+
+            for ($i = 0; $i < $numSamples; ++$i) {
+                $column[] = $rows[$i][$j];
             }
+
+            $columns[] = $column;
         }
 
-        if (!$packed) {
-            $samples = array_values($samples);
+        return $columns;
+    }
 
-            foreach ($samples as $i => $sample) {
-                $samples[$i] = array_values($sample);
-            }
-        }
+    /**
+     * @param list<list<int|float|string>> $columns
+     * @return list<list<int|float|string>>
+     */
+    private function columnsToRows(array $columns) : array
+    {
+        $numFeatures = count($columns);
+        $numSamples = isset($columns[0]) && is_array($columns[0]) ? count($columns[0]) : 0;
 
-        $input = NumPower::array($samples);
+        $rows = [];
 
-        $shape = $input->shape();
+        for ($i = 0; $i < $numSamples; ++$i) {
+            $row = [];
 
-        if (count($shape) === 1) {
-            $input = NumPower::reshape($input, [1, $shape[0]]);
+            for ($j = 0; $j < $numFeatures; ++$j) {
+                $row[] = $columns[$j][$i];
+            }
+
+            $rows[] = $row;
         }
 
-        return $input;
+        return $rows;
     }
 }
diff --git a/tests/NeuralNet/Networks/NetworkTest.php b/tests/NeuralNet/Networks/NetworkTest.php
index 0406193cb..199f1e9f4 100644
--- a/tests/NeuralNet/Networks/NetworkTest.php
+++ b/tests/NeuralNet/Networks/NetworkTest.php
@@ -109,44 +109,4 @@ public function testNumParams() : void
 
         self::assertEquals(103, $this->network->numParams());
     }
-
-    #[Test]
-    #[TestDox('samplesToInput normalizes samples into 2D NDArray')]
-    public function testSamplesToInput() : void
-    {
-        $method = new ReflectionMethod(Network::class, 'samplesToInput');
-        $method->setAccessible(true);
-
-        $input = $method->invoke($this->network, $this->dataset->samples());
-
-        self::assertEquals([3, 2], $input->shape());
-
-        $samples = [
-            3 => [
-                1 => 1.0,
-                2 => 2.5,
-            ],
-            7 => [
-                1 => 0.1,
-                2 => 0.0,
-            ],
-            8 => [
-                1 => 0.002,
-                2 => -6.0,
-            ],
-        ];
-
-        $input = $method->invoke($this->network, $samples);
-
-        self::assertEquals([3, 2], $input->shape());
-
-        $samples = [
-            [1.0],
-            [2.5],
-        ];
-
-        $input = $method->invoke($this->network, $samples);
-
-        self::assertEquals([2, 1], $input->shape());
-    }
 }
diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php
index 839711455..ddd633628 100644
--- a/tests/Regressors/MLPRegressors/MLPRegressorTest.php
+++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php
@@ -26,6 +26,7 @@
 use Rubix\ML\Exceptions\InvalidArgumentException;
 use Rubix\ML\Exceptions\RuntimeException;
 use PHPUnit\Framework\TestCase;
+use function Apphp\PrettyPrint\pp;
 
 #[Group('Regressors')]
 #[CoversClass(MLPRegressor::class)]
@@ -159,9 +160,15 @@ public function testTrainPartialPredict() : void
 
         $testing = $dataset->randomize()->take(self::TEST_SIZE);
 
+        $testingSamplesBefore = $testing->samples();
+        $testingLabelsBefore = $testing->labels();
+
         $folds = $dataset->fold(3);
 
         $this->estimator->train($folds[0]);
+
+        $predictionsBefore = $this->estimator->predict($testing);
+
         $this->estimator->partial($folds[1]);
         $this->estimator->partial($folds[2]);
 
@@ -177,14 +184,69 @@ public function testTrainPartialPredict() : void
 
         self::assertIsArray($losses);
         self::assertContainsOnlyFloat($losses);
+        self::assertNotEmpty($losses);
+
+        foreach ($losses as $epoch => $loss) {
+            self::assertIsInt($epoch);
+            self::assertGreaterThanOrEqual(1, $epoch);
+            self::assertFalse(is_nan($loss));
+            self::assertTrue(is_finite($loss));
+        }
 
         $scores = $this->estimator->scores();
 
         self::assertIsArray($scores);
         self::assertContainsOnlyFloat($scores);
+        self::assertNotEmpty($scores);
+
+        foreach ($scores as $epoch => $value) {
+            self::assertIsInt($epoch);
+            self::assertGreaterThanOrEqual(1, $epoch);
+            self::assertFalse(is_nan($value));
+            self::assertTrue(is_finite($value));
+            self::assertSame(0, $epoch % 3);
+        }
 
         $predictions = $this->estimator->predict($testing);
 
+        self::assertCount($testing->numSamples(), $predictions);
+
+        foreach ($predictions as $prediction) {
+            self::assertIsNumeric($prediction);
+            self::assertFalse(is_nan((float) $prediction));
+            self::assertTrue(is_finite((float) $prediction));
+        }
+
+        $predictions2 = $this->estimator->predict($testing);
+
+        self::assertCount($testing->numSamples(), $predictions2);
+
+        foreach ($predictions2 as $i => $prediction) {
+            self::assertEqualsWithDelta((float) $predictions[$i], (float) $prediction, 1e-12);
+        }
+
+        self::assertEquals($testingSamplesBefore, $testing->samples());
+        self::assertEquals($testingLabelsBefore, $testing->labels());
+
+        $delta = 0.0;
+
+        foreach ($predictions as $i => $prediction) {
+            $delta += abs((float) $prediction - (float) $predictionsBefore[$i]);
+        }
+
+        self::assertGreaterThan(0.0, $delta);
+
+        $min = (float) $predictions[0];
+        $max = (float) $predictions[0];
+
+        foreach ($predictions as $prediction) {
+            $p = (float) $prediction;
+            $min = min($min, $p);
+            $max = max($max, $p);
+        }
+
+        self::assertGreaterThan(0.0, $max - $min);
+
         /** @var list<int|float> $labels */
         $labels = $testing->labels();
         $score = $this->metric->score(
@@ -192,9 +254,129 @@ public function testTrainPartialPredict() : void
             labels: $labels
         );
 
+        self::assertFalse(is_nan($score));
+        self::assertTrue(is_finite($score));
+        self::assertGreaterThan(-10.0, $score);
+
+        $copy = unserialize(serialize($this->estimator));
+
+        self::assertInstanceOf(MLPRegressor::class, $copy);
+        self::assertTrue($copy->trained());
+
+        $predictionsAfter = $copy->predict($testing);
+
+        self::assertCount($testing->numSamples(), $predictionsAfter);
+
+        foreach ($predictionsAfter as $i => $prediction) {
+            self::assertEqualsWithDelta((float) $predictions[$i], (float) $prediction, 1e-8);
+        }
+
         self::assertGreaterThanOrEqual(self::MIN_SCORE, $score);
     }
 
+    #[Test]
+    #[TestDox('Predict count matches number of samples')]
+    public function testPredictCountMatchesNumberOfSamples() : void
+    {
+        [$testing] = $this->trainEstimatorAndGetTestingSet();
+
+        $predictions = $this->estimator->predict($testing);
+
+        self::assertCount($testing->numSamples(), $predictions);
+    }
+
+    #[Test]
+    #[TestDox('Predict returns numeric finite values')]
+    public function testPredictReturnsNumericFiniteValues() : void
+    {
+        [$testing] = $this->trainEstimatorAndGetTestingSet();
+
+        $predictions = $this->estimator->predict($testing);
+
+        self::assertCount($testing->numSamples(), $predictions);
+
+        foreach ($predictions as $prediction) {
+            self::assertIsNumeric($prediction);
+            self::assertFalse(is_nan((float) $prediction));
+            self::assertTrue(is_finite((float) $prediction));
+        }
+    }
+
+    #[Test]
+    #[TestDox('Predict is repeatable for same model and dataset')]
+    public function testPredictIsRepeatableForSameModelAndDataset() : void
+    {
+        [$testing] = $this->trainEstimatorAndGetTestingSet();
+
+        $predictions1 = $this->estimator->predict($testing);
+        $predictions2 = $this->estimator->predict($testing);
+
+        self::assertCount($testing->numSamples(), $predictions1);
+        self::assertCount($testing->numSamples(), $predictions2);
+
+        foreach ($predictions1 as $i => $prediction) {
+            self::assertEqualsWithDelta((float) $prediction, (float) $predictions2[$i], 1e-12);
+        }
+    }
+
+    #[Test]
+    #[TestDox('Predict does not mutate dataset samples or labels')]
+    public function testPredictDoesNotMutateDataset() : void
+    {
+        [$testing] = $this->trainEstimatorAndGetTestingSet();
+
+        $samplesBefore = $testing->samples();
+        $labelsBefore = $testing->labels();
+
+        $predictions = $this->estimator->predict($testing);
+
+        self::assertCount($testing->numSamples(), $predictions);
+        self::assertEquals($samplesBefore, $testing->samples());
+        self::assertEquals($labelsBefore, $testing->labels());
+    }
+
+    #[Test]
+    #[TestDox('Serialization preserves predict output')]
+    public function testSerializationPreservesPredictOutput() : void
+    {
+        [$testing] = $this->trainEstimatorAndGetTestingSet();
+
+        $predictionsBefore = $this->estimator->predict($testing);
+
+        $copy = unserialize(serialize($this->estimator));
+
+        self::assertInstanceOf(MLPRegressor::class, $copy);
+        self::assertTrue($copy->trained());
+
+        $predictionsAfter = $copy->predict($testing);
+
+        self::assertCount($testing->numSamples(), $predictionsAfter);
+
+        foreach ($predictionsAfter as $i => $prediction) {
+            self::assertEqualsWithDelta((float) $predictionsBefore[$i], (float) $prediction, 1e-8);
+        }
+    }
+
+    /**
+     * @return array{0: Unlabeled}
+     */
+    private function trainEstimatorAndGetTestingSet() : array
+    {
+        $dataset = $this->generator->generate(self::TRAIN_SIZE + self::TEST_SIZE);
+
+        $dataset->apply(new ZScaleStandardizer());
+
+        $testing = $dataset->randomize()->take(self::TEST_SIZE);
+
+        $folds = $dataset->fold(3);
+
+        $this->estimator->train($folds[0]);
+        $this->estimator->partial($folds[1]);
+        $this->estimator->partial($folds[2]);
+
+        return [$testing];
+    }
+
     #[Test]
     #[TestDox('Train incompatible')]
     public function testTrainIncompatible() : void

From d538799498733daef3abe4945b687078550e4a79 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 14 Feb 2026 19:01:18 +0200
Subject: [PATCH 33/36] ML-396 added USE_NUMPOWER_TRANSPOSE option to Network

---
 tests/Regressors/MLPRegressors/MLPRegressorTest.php | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php
index ddd633628..1198d02b5 100644
--- a/tests/Regressors/MLPRegressors/MLPRegressorTest.php
+++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php
@@ -26,7 +26,6 @@
 use Rubix\ML\Exceptions\InvalidArgumentException;
 use Rubix\ML\Exceptions\RuntimeException;
 use PHPUnit\Framework\TestCase;
-use function Apphp\PrettyPrint\pp;
 
 #[Group('Regressors')]
 #[CoversClass(MLPRegressor::class)]

From f333c67ec7459c5c50a7b1771a891c94e0857f03 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 14 Feb 2026 23:53:37 +0200
Subject: [PATCH 34/36] ML-396 fixed issue with samples normalization

---
 composer.json                                 |  4 +-
 src/NeuralNet/Networks/Network.php            | 34 ++++----
 tests/NeuralNet/Networks/NetworkTest.php      | 25 ++++++
 .../MLPRegressors/MLPRegressorTest.php        | 78 -------------------
 4 files changed, 45 insertions(+), 96 deletions(-)

diff --git a/composer.json b/composer.json
index a703df15b..f0e963cd5 100644
--- a/composer.json
+++ b/composer.json
@@ -38,6 +38,7 @@
         "andrewdalpino/okbloomer": "^1.0",
         "psr/log": "^1.1|^2.0|^3.0",
         "rubix/tensor": "^3.0",
+        "rubixml/numpower": "dev-main",
         "symfony/polyfill-mbstring": "^1.0",
         "symfony/polyfill-php80": "^1.17",
         "symfony/polyfill-php82": "^1.27",
@@ -52,7 +53,8 @@
         "phpstan/phpstan": "^2.0",
         "phpstan/phpstan-phpunit": "^2.0",
         "phpunit/phpunit": "^12.0",
-        "swoole/ide-helper": "^5.1"
+        "swoole/ide-helper": "^5.1",
+        "apphp/pretty-print": "^0.5.1"
     },
     "suggest": {
         "ext-tensor": "For fast Matrix/Vector computing",
diff --git a/src/NeuralNet/Networks/Network.php b/src/NeuralNet/Networks/Network.php
index 929813652..c504e43bf 100644
--- a/src/NeuralNet/Networks/Network.php
+++ b/src/NeuralNet/Networks/Network.php
@@ -73,8 +73,6 @@ class Network
      */
     protected Optimizer $optimizer;
 
-    protected const USE_NUMPOWER_TRANSPOSE = false;
-
     /**
      * @param Input $input
      * @param Hidden[] $hidden
@@ -192,11 +190,8 @@ public function infer(Dataset $dataset) : NDArray
             return NumPower::array([]);
         }
 
-        if (self::USE_NUMPOWER_TRANSPOSE) {
-            $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]);
-        } else {
-            $input = NumPower::array($this->rowsToColumns($dataset->samples()));
-        }
+        $normalizedSamples = $this->normalizeSamples($dataset->samples());
+        $input = NumPower::transpose(NumPower::array($normalizedSamples), [1, 0]);
 
         foreach ($this->layers() as $layer) {
             $input = $layer->infer($input);
@@ -208,11 +203,7 @@ public function infer(Dataset $dataset) : NDArray
             $input = NumPower::reshape($input, [1, $shape[0]]);
         }
 
-        if (self::USE_NUMPOWER_TRANSPOSE) {
-            return NumPower::transpose($input, [1, 0]);
-        } else {
-            return NumPower::array($this->columnsToRows($input->toArray()));
-        }
+        return NumPower::transpose($input, [1, 0]);
     }
 
     /**
@@ -228,11 +219,8 @@ public function roundtrip(Labeled $dataset) : float
             return 0.0;
         }
 
-        if (self::USE_NUMPOWER_TRANSPOSE) {
-            $input = NumPower::transpose(NumPower::array($dataset->samples()), [1, 0]);
-        } else {
-            $input = NumPower::array($this->rowsToColumns($dataset->samples()));
-        }
+        $normalizedSamples = $this->normalizeSamples($dataset->samples());
+        $input = NumPower::transpose(NumPower::array($normalizedSamples), [1, 0]);
 
         $this->feed($input);
 
@@ -326,6 +314,18 @@ private function rowsToColumns(array $rows) : array
         return $columns;
     }
 
+    /**
+     * Normalize samples to a strict list-of-lists with sequential numeric keys.
+     * NumPower's C extension expects packed arrays and can error or behave unpredictably
+     * when given arrays with non-sequential keys (e.g. after randomize/take/fold operations).
+     * @param array $samples
+     * @return array
+     */
+    private function normalizeSamples(array $samples) : array
+    {
+        return array_map('array_values', array_values($samples));
+    }
+
     /**
      * @param list<list<int|float|string>> $columns
      * @return list<list<int|float|string>>
diff --git a/tests/NeuralNet/Networks/NetworkTest.php b/tests/NeuralNet/Networks/NetworkTest.php
index 199f1e9f4..586d1ffbc 100644
--- a/tests/NeuralNet/Networks/NetworkTest.php
+++ b/tests/NeuralNet/Networks/NetworkTest.php
@@ -109,4 +109,29 @@ public function testNumParams() : void
 
         self::assertEquals(103, $this->network->numParams());
     }
+
+    #[Test]
+    #[TestDox('Normalize samples returns packed list-of-lists for NumPower')]
+    public function testNormalizeSamplesReturnsPackedListOfLists() : void
+    {
+        $samples = [
+            10 => [2 => 1.0, 5 => 2.0, 9 => 10],
+            20 => [2 => 3.0, 7 => 4.0, 1 => 1.0],
+        ];
+
+        $method = new ReflectionMethod(Network::class, 'normalizeSamples');
+        $method->setAccessible(true);
+
+        /** @var array $normalized */
+        $normalized = $method->invoke($this->network, $samples);
+
+        self::assertTrue(array_is_list($normalized));
+        self::assertCount(2, $normalized);
+
+        foreach ($normalized as $row) {
+            self::assertTrue(array_is_list($row));
+        }
+
+        self::assertSame([[1.0, 2.0, 10], [3.0, 4.0, 1.0]], $normalized);
+    }
 }
diff --git a/tests/Regressors/MLPRegressors/MLPRegressorTest.php b/tests/Regressors/MLPRegressors/MLPRegressorTest.php
index 1198d02b5..26299b3b1 100644
--- a/tests/Regressors/MLPRegressors/MLPRegressorTest.php
+++ b/tests/Regressors/MLPRegressors/MLPRegressorTest.php
@@ -159,15 +159,9 @@ public function testTrainPartialPredict() : void
 
         $testing = $dataset->randomize()->take(self::TEST_SIZE);
 
-        $testingSamplesBefore = $testing->samples();
-        $testingLabelsBefore = $testing->labels();
-
         $folds = $dataset->fold(3);
 
         $this->estimator->train($folds[0]);
-
-        $predictionsBefore = $this->estimator->predict($testing);
-
         $this->estimator->partial($folds[1]);
         $this->estimator->partial($folds[2]);
 
@@ -183,69 +177,14 @@ public function testTrainPartialPredict() : void
 
         self::assertIsArray($losses);
         self::assertContainsOnlyFloat($losses);
-        self::assertNotEmpty($losses);
-
-        foreach ($losses as $epoch => $loss) {
-            self::assertIsInt($epoch);
-            self::assertGreaterThanOrEqual(1, $epoch);
-            self::assertFalse(is_nan($loss));
-            self::assertTrue(is_finite($loss));
-        }
 
         $scores = $this->estimator->scores();
 
         self::assertIsArray($scores);
         self::assertContainsOnlyFloat($scores);
-        self::assertNotEmpty($scores);
-
-        foreach ($scores as $epoch => $value) {
-            self::assertIsInt($epoch);
-            self::assertGreaterThanOrEqual(1, $epoch);
-            self::assertFalse(is_nan($value));
-            self::assertTrue(is_finite($value));
-            self::assertSame(0, $epoch % 3);
-        }
 
         $predictions = $this->estimator->predict($testing);
 
-        self::assertCount($testing->numSamples(), $predictions);
-
-        foreach ($predictions as $prediction) {
-            self::assertIsNumeric($prediction);
-            self::assertFalse(is_nan((float) $prediction));
-            self::assertTrue(is_finite((float) $prediction));
-        }
-
-        $predictions2 = $this->estimator->predict($testing);
-
-        self::assertCount($testing->numSamples(), $predictions2);
-
-        foreach ($predictions2 as $i => $prediction) {
-            self::assertEqualsWithDelta((float) $predictions[$i], (float) $prediction, 1e-12);
-        }
-
-        self::assertEquals($testingSamplesBefore, $testing->samples());
-        self::assertEquals($testingLabelsBefore, $testing->labels());
-
-        $delta = 0.0;
-
-        foreach ($predictions as $i => $prediction) {
-            $delta += abs((float) $prediction - (float) $predictionsBefore[$i]);
-        }
-
-        self::assertGreaterThan(0.0, $delta);
-
-        $min = (float) $predictions[0];
-        $max = (float) $predictions[0];
-
-        foreach ($predictions as $prediction) {
-            $p = (float) $prediction;
-            $min = min($min, $p);
-            $max = max($max, $p);
-        }
-
-        self::assertGreaterThan(0.0, $max - $min);
-
         /** @var list<int|float> $labels */
         $labels = $testing->labels();
         $score = $this->metric->score(
@@ -253,23 +192,6 @@ public function testTrainPartialPredict() : void
             labels: $labels
         );
 
-        self::assertFalse(is_nan($score));
-        self::assertTrue(is_finite($score));
-        self::assertGreaterThan(-10.0, $score);
-
-        $copy = unserialize(serialize($this->estimator));
-
-        self::assertInstanceOf(MLPRegressor::class, $copy);
-        self::assertTrue($copy->trained());
-
-        $predictionsAfter = $copy->predict($testing);
-
-        self::assertCount($testing->numSamples(), $predictionsAfter);
-
-        foreach ($predictionsAfter as $i => $prediction) {
-            self::assertEqualsWithDelta((float) $predictions[$i], (float) $prediction, 1e-8);
-        }
-
         self::assertGreaterThanOrEqual(self::MIN_SCORE, $score);
     }
 

From 1583ee3e4eb7a65b50383bf165f649e229aa750b Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 14 Feb 2026 23:58:25 +0200
Subject: [PATCH 35/36] ML-396 removed unneeded packages from composer

---
 composer.json | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/composer.json b/composer.json
index f0e963cd5..d7810b2de 100644
--- a/composer.json
+++ b/composer.json
@@ -37,7 +37,6 @@
         "amphp/parallel": "^1.3",
         "andrewdalpino/okbloomer": "^1.0",
         "psr/log": "^1.1|^2.0|^3.0",
-        "rubix/tensor": "^3.0",
         "rubixml/numpower": "dev-main",
         "symfony/polyfill-mbstring": "^1.0",
         "symfony/polyfill-php80": "^1.17",
@@ -52,9 +51,7 @@
         "phpstan/extension-installer": "^1.0",
         "phpstan/phpstan": "^2.0",
         "phpstan/phpstan-phpunit": "^2.0",
-        "phpunit/phpunit": "^12.0",
-        "swoole/ide-helper": "^5.1",
-        "apphp/pretty-print": "^0.5.1"
+        "phpunit/phpunit": "^12.0"
     },
     "suggest": {
         "ext-tensor": "For fast Matrix/Vector computing",

From 57037c623914b67fb53a8ef77101b081bb0fc12d Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sun, 15 Feb 2026 00:00:00 +0200
Subject: [PATCH 36/36] ML-396 removed unneeded packages from composer

---
 composer.json | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/composer.json b/composer.json
index d7810b2de..a703df15b 100644
--- a/composer.json
+++ b/composer.json
@@ -37,7 +37,7 @@
         "amphp/parallel": "^1.3",
         "andrewdalpino/okbloomer": "^1.0",
         "psr/log": "^1.1|^2.0|^3.0",
-        "rubixml/numpower": "dev-main",
+        "rubix/tensor": "^3.0",
         "symfony/polyfill-mbstring": "^1.0",
         "symfony/polyfill-php80": "^1.17",
         "symfony/polyfill-php82": "^1.27",
@@ -51,7 +51,8 @@
         "phpstan/extension-installer": "^1.0",
         "phpstan/phpstan": "^2.0",
         "phpstan/phpstan-phpunit": "^2.0",
-        "phpunit/phpunit": "^12.0"
+        "phpunit/phpunit": "^12.0",
+        "swoole/ide-helper": "^5.1"
     },
     "suggest": {
         "ext-tensor": "For fast Matrix/Vector computing",