Deep Learning — Neural Networks at Scale

Deep learning is a branch of machine learning that uses neural networks with many layers to learn hierarchical representations from raw data.

Key Architectures

Convolutional networks (CNNs) — images and vision.
Recurrent networks (RNNs, LSTMs) — sequences.
Transformers — language models, vision transformers, multimodal AI.
Generative models — GANs, VAEs, diffusion models.

Training Essentials

Stochastic gradient descent, backpropagation, regularisation, learning-rate schedules, batch normalisation and modern optimisers like AdamW.

Code Examples: Deep Learning (5 runnable snippets)

Copy any block into a file or notebook and run it end-to-end — each example stands alone.

Example 1: Fine-tune a classifier head on frozen embeddings

# Example 1: Fine-tune a classifier head on frozen embeddings -- Deep Learning
import torch
from torch import nn

torch.manual_seed(0)
emb_dim   = 384
train_emb = torch.randn(800, emb_dim)
train_y   = torch.randint(0, 4, (800,))

head = nn.Sequential(nn.Dropout(0.1), nn.Linear(emb_dim, 4))
opt  = torch.optim.AdamW(head.parameters(), lr=3e-4, weight_decay=1e-2)
loss_fn = nn.CrossEntropyLoss()

for step in range(200):
    idx     = torch.randint(0, len(train_emb), (64,))
    logits  = head(train_emb[idx])
    loss    = loss_fn(logits, train_y[idx])
    opt.zero_grad(); loss.backward(); opt.step()
    if step % 40 == 0:
        acc = (logits.argmax(1) == train_y[idx]).float().mean()
        print(f"step {step:3d}  loss={loss.item():.3f}  acc={acc.item():.3f}")

Example 2: Autoencoder for anomaly detection

# Example 2: Autoencoder for anomaly detection -- Deep Learning
import torch
from torch import nn

torch.manual_seed(0)
normal   = torch.randn(1_000, 16)                            # training
anomaly  = torch.randn(50,    16) * 3 + 4                    # held-out outliers

class AE(nn.Module):
    def __init__(self, d=16, h=4):
        super().__init__()
        self.enc = nn.Sequential(nn.Linear(d, 8), nn.ReLU(), nn.Linear(8, h))
        self.dec = nn.Sequential(nn.Linear(h, 8), nn.ReLU(), nn.Linear(8, d))
    def forward(self, x): return self.dec(self.enc(x))

ae  = AE()
opt = torch.optim.Adam(ae.parameters(), lr=1e-3)
for epoch in range(40):
    loss = ((ae(normal) - normal) ** 2).mean()
    opt.zero_grad(); loss.backward(); opt.step()

err_normal  = ((ae(normal)  - normal)  ** 2).mean(dim=1).detach()
err_anomaly = ((ae(anomaly) - anomaly) ** 2).mean(dim=1).detach()
print(f"normal  median error : {err_normal.median():.3f}")
print(f"anomaly median error : {err_anomaly.median():.3f}")

Example 3: Self-attention from scratch in NumPy

# Example 3: Self-attention from scratch in NumPy -- Deep Learning
import numpy as np

rng = np.random.default_rng(0)
T, d_model, d_k = 6, 16, 8                        # sequence length, dims

x  = rng.standard_normal((T, d_model))
Wq = rng.standard_normal((d_model, d_k)) / np.sqrt(d_model)
Wk = rng.standard_normal((d_model, d_k)) / np.sqrt(d_model)
Wv = rng.standard_normal((d_model, d_k)) / np.sqrt(d_model)

Q, K, V = x @ Wq, x @ Wk, x @ Wv
scores  = Q @ K.T / np.sqrt(d_k)
weights = np.exp(scores - scores.max(axis=-1, keepdims=True))
weights = weights / weights.sum(axis=-1, keepdims=True)
out     = weights @ V

print("attention matrix (rounded):\n", np.round(weights, 2))
print("\noutput shape :", out.shape)

Example 4: PyTorch MLP training loop

# Example 4: PyTorch MLP training loop -- Deep Learning
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

torch.manual_seed(0)
X = torch.randn(2_000, 20)
w = torch.randn(20, 1)
y = (X @ w + 0.3 * torch.randn(2_000, 1) > 0).float()

loader = DataLoader(TensorDataset(X, y), batch_size=64, shuffle=True)

model = nn.Sequential(
    nn.Linear(20, 64), nn.ReLU(),
    nn.Linear(64, 32), nn.ReLU(),
    nn.Linear(32, 1),
)
opt     = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.BCEWithLogitsLoss()

for epoch in range(5):
    total = 0.0
    for xb, yb in loader:
        opt.zero_grad()
        loss = loss_fn(model(xb), yb)
        loss.backward()
        opt.step()
        total += loss.item() * xb.size(0)
    print(f"epoch {epoch+1}: loss = {total/len(loader.dataset):.4f}")

Example 5: Keras CNN for MNIST

# Example 5: Keras CNN for MNIST -- Deep Learning
import tensorflow as tf
from tensorflow.keras import layers, models

(x_tr, y_tr), (x_te, y_te) = tf.keras.datasets.mnist.load_data()
x_tr = x_tr[..., None] / 255.0
x_te = x_te[..., None] / 255.0

model = models.Sequential([
    layers.Conv2D(32, 3, activation="relu", input_shape=(28, 28, 1)),
    layers.MaxPool2D(),
    layers.Conv2D(64, 3, activation="relu"),
    layers.GlobalAveragePooling2D(),
    layers.Dense(10, activation="softmax"),
])
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])
model.fit(x_tr, y_tr, epochs=3, batch_size=128, validation_split=0.1)
print("test acc:", round(model.evaluate(x_te, y_te, verbose=0)[1], 4))