Neural Network from Scratch in Vanilla JS
No TensorFlow. No libraries. Build a fully-connected neural network from first principles: matrix-free forward pass, sigmoid and ReLU activations, mean-squared-error loss, backpropagation using the chain rule, and stochastic gradient descent. Test it on XOR and sine function approximation.
- JavaScript arrays and functions
- Derivatives and the chain rule from calculus
- Basic understanding of what a neural network is (layers, neurons)
Layer and Network Data Structure
Each layer stores weights, biases and cached activations needed for backprop. We use plain arrays (no matrices — easier to follow):
function randomWeight() { return (Math.random() - 0.5) * 0.5; }
class Layer {
// inputSize: neurons feeding in; outputSize: neurons in this layer
constructor(inputSize, outputSize, activation = 'sigmoid') {
this.w = Array.from({length: outputSize}, () =>
Array.from({length: inputSize}, randomWeight)
); // w[j][i] = weight from input i to output j
this.b = Array(outputSize).fill(0); // biases
this.activation = activation;
// Cache for backprop (filled during forward pass)
this.input = null; // pre-activation input
this.z = null; // pre-activation (w·x + b)
this.a = null; // post-activation output
}
}
class NeuralNetwork {
constructor(topology, activations) {
// topology = [2, 4, 4, 1] → 2 inputs, two hidden layers of 4, 1 output
this.layers = [];
for (let i = 1; i < topology.length; i++) {
this.layers.push(new Layer(topology[i-1], topology[i], activations?.[i-1] ?? 'sigmoid'));
}
}
}
Activation Functions and Derivatives
const activations = {
sigmoid: {
fn: x => 1 / (1 + Math.exp(-x)),
der: x => { const s = 1/(1+Math.exp(-x)); return s*(1-s); },
},
relu: {
fn: x => Math.max(0, x),
der: x => x > 0 ? 1 : 0,
},
tanh: {
fn: x => Math.tanh(x),
der: x => 1 - Math.tanh(x)**2,
},
linear: {
fn: x => x,
der: _ => 1,
},
};
Forward Pass
Propagate input through every layer. For each layer, compute
z = W·x + b then a = activation(z):
NeuralNetwork.prototype.forward = function(input) {
let x = input;
for (const layer of this.layers) {
layer.input = x;
const act = activations[layer.activation];
// z[j] = sum_i(w[j][i] * x[i]) + b[j]
layer.z = layer.w.map((row, j) =>
row.reduce((sum, wij, i) => sum + wij * x[i], layer.b[j])
);
// a[j] = activation(z[j])
layer.a = layer.z.map(act.fn);
x = layer.a;
}
return x; // final output
};
Loss Function
// Mean Squared Error: L = (1/N) * Σ(y_pred - y_true)²
function mseLoss(pred, target) {
return pred.reduce((sum, p, i) => sum + (p - target[i])**2, 0) / pred.length;
}
// dL/d(pred[i]) = 2 * (pred[i] - target[i]) / N
function mseLossGrad(pred, target) {
const n = pred.length;
return pred.map((p, i) => 2 * (p - target[i]) / n);
}
Backpropagation
The chain rule propagates gradients backward from output to input. For each layer (in reverse):
NeuralNetwork.prototype.backward = function(lossGrad) {
const grads = []; // store weight/bias gradients per layer
let delta = lossGrad; // gradient flowing from previous layer
for (let l = this.layers.length - 1; l >= 0; l--) {
const layer = this.layers[l];
const act = activations[layer.activation];
// dL/dz[j] = delta[j] * activation'(z[j])
const dz = layer.z.map((zj, j) => delta[j] * act.der(zj));
// dL/dw[j][i] = dz[j] * input[i]
const dw = layer.w.map((row, j) => row.map((_, i) => dz[j] * layer.input[i]));
// dL/db[j] = dz[j]
const db = dz.slice();
// dL/d(input[i]) = sum_j(dz[j] * w[j][i]) — propagate to previous layer
delta = layer.input.map((_, i) =>
layer.w.reduce((sum, row, j) => sum + dz[j] * row[i], 0)
);
grads.unshift({ dw, db }); // prepend (we're going backward)
}
return grads;
};
SGD Weight Update
NeuralNetwork.prototype.update = function(grads, lr = 0.01) {
for (let l = 0; l < this.layers.length; l++) {
const layer = this.layers[l];
const { dw, db } = grads[l];
// w[j][i] -= lr * dL/dw[j][i]
for (let j = 0; j < layer.w.length; j++) {
for (let i = 0; i < layer.w[j].length; i++) {
layer.w[j][i] -= lr * dw[j][i];
}
layer.b[j] -= lr * db[j];
}
}
};
// One complete training step
function trainStep(net, inputs, targets, lr) {
let totalLoss = 0;
for (let k = 0; k < inputs.length; k++) {
const pred = net.forward(inputs[k]);
totalLoss += mseLoss(pred, targets[k]);
const grad = mseLossGrad(pred, targets[k]);
const grads = net.backward(grad);
net.update(grads, lr);
}
return totalLoss / inputs.length;
}
Train on XOR and Visualise
// XOR dataset
const XOR_X = [[0,0],[0,1],[1,0],[1,1]];
const XOR_Y = [[0],[1],[1],[0]];
// 2 inputs → 4 hidden (sigmoid) → 1 output (sigmoid)
const net = new NeuralNetwork([2, 4, 4, 1], ['sigmoid','sigmoid','sigmoid']);
// Training loop
const canvas = document.getElementById('loss-chart');
const ctx = canvas.getContext('2d');
const lossHistory = [];
let epoch = 0;
function trainEpochs(steps = 50) {
for (let i = 0; i < steps; i++) {
const loss = trainStep(net, XOR_X, XOR_Y, 0.3);
lossHistory.push(loss);
epoch++;
}
// Quick loss chart
ctx.clearRect(0, 0, canvas.width, canvas.height);
ctx.strokeStyle = '#22c55e';
ctx.lineWidth = 2;
ctx.beginPath();
lossHistory.forEach((l, i) => {
const x = i / lossHistory.length * canvas.width;
const y = (1 - Math.min(l, 1)) * canvas.height;
i === 0 ? ctx.moveTo(x, y) : ctx.lineTo(x, y);
});
ctx.stroke();
if (epoch < 3000) requestAnimationFrame(() => trainEpochs(50));
}
trainEpochs();
// Test after training
console.log('XOR results after training:');
for (let k = 0; k < XOR_X.length; k++) {
const pred = net.forward(XOR_X[k])[0].toFixed(3);
console.log(`[${XOR_X[k]}] → ${pred} (expected ${XOR_Y[k][0]})`);
}
// Expected output: values near 0, 1, 1, 0
XOR is the classic "non-linearly separable" test — a single layer can't solve it, but 2 hidden layers can. A 2-4-1 network usually solves XOR in ~2000 epochs with LR=0.3. If it doesn't converge, re-randomise weights (local minima are common with XOR).