REVISED: Wednesday, October 8, 2025
1. OCAML MULTI-LAYER NEURAL NETWORK (MLP)
(*
========================================
ocaml C:\AI2025\lesson7.ml
Lesson 7: Multi-Layer Neural Network (MLP)
========================================
Objective:
1. Implement a fully connected network with two layers.
2. Add softmax output and cross-entropy loss.
3. Perform forward pass, backward pass, and batch updates.
*)
(* -----------------------------
1. Vector and Matrix Utilities
----------------------------- *)
type vector = float list
type matrix = float list list
let vector_add v1 v2 = List.map2 (+.) v1 v2
let vector_sub v1 v2 = List.map2 (-.) v1 v2
let vector_map f v = List.map f v
let scalar_vector_mul s v = List.map (fun x -> s *. x) v
let dot_product v1 v2 = List.fold_left (+.) 0.0 (List.map2 ( *. ) v1 v2)
let mat_vec_mul m v = List.map (fun row -> dot_product row v) m
let transpose m =
let rec transpose_aux m acc =
match List.hd m with
| [] -> List.rev acc
| _ ->
let heads = List.map List.hd m in
let tails = List.map List.tl m in
transpose_aux tails (heads :: acc)
in transpose_aux m []
let outer_product v1 v2 =
List.map (fun x -> List.map (fun y -> x *. y) v2) v1
let matrix_add m1 m2 = List.map2 vector_add m1 m2
let matrix_sub m1 m2 = List.map2 vector_sub m1 m2
let scalar_matrix_mul s m = List.map (scalar_vector_mul s) m
(* -----------------------------
2. Activation Functions
----------------------------- *)
let relu x = if x > 0. then x else 0.
let relu_derivative x = if x > 0. then 1. else 0.
let softmax v =
let max_v = List.fold_left max neg_infinity v in
let exps = List.map (fun x -> exp (x -. max_v)) v in
let sum_exps = List.fold_left (+.) 0.0 exps in
List.map (fun x -> x /. sum_exps) exps
(* -----------------------------
3. Loss Function
----------------------------- *)
let cross_entropy_loss predicted target =
(* predicted and target are same length vectors *)
let eps = 1e-12 in
(* Removed an extra closing ')' that caused the previous syntax error.
The fixed line below computes - sum_i t_i * log(p_i + eps). *)
-. List.fold_left (+.) 0.0 (List.map2 (fun p t -> t *. log (p +. eps)) predicted target)
(* -----------------------------
4. Network Parameters
----------------------------- *)
let weights1 : matrix = [
[0.5; -0.3];
[0.2; 0.8];
[-0.5; 0.1]
]
let biases1 : vector = [0.1; -0.1; 0.05]
let weights2 : matrix = [
[0.3; -0.2; 0.4];
[0.1; 0.5; -0.3]
]
let biases2 : vector = [0.05; -0.05]
let learning_rate = 0.01
(* -----------------------------
5. Forward Pass
----------------------------- *)
let forward input =
(* Layer 1 *)
let z1 = vector_add (mat_vec_mul weights1 input) biases1 in
let a1 = vector_map relu z1 in
(* Layer 2 *)
let z2 = vector_add (mat_vec_mul weights2 a1) biases2 in
let a2 = softmax z2 in
(a1, a2, z1, z2)
(* -----------------------------
6. Backpropagation
----------------------------- *)
let backprop input target =
(* Forward pass *)
let (a1, a2, z1, z2) = forward input in
(* Output layer error (softmax + cross-entropy derivative) *)
let delta2 = vector_sub a2 target in
(* Gradients for second layer *)
let weight2_grad = outer_product delta2 a1 in
let bias2_grad = delta2 in
(* Hidden layer error *)
let w2_t = transpose weights2 in
let delta1_pre = mat_vec_mul w2_t delta2 in
let delta1 = List.map2 ( *. ) delta1_pre (vector_map relu_derivative z1) in
(* Gradients for first layer *)
let weight1_grad = outer_product delta1 input in
let bias1_grad = delta1 in
(* Update parameters *)
let w1_new = matrix_sub weights1 (scalar_matrix_mul learning_rate weight1_grad) in
let b1_new = vector_sub biases1 (scalar_vector_mul learning_rate bias1_grad) in
let w2_new = matrix_sub weights2 (scalar_matrix_mul learning_rate weight2_grad) in
let b2_new = vector_sub biases2 (scalar_vector_mul learning_rate bias2_grad) in
(w1_new, b1_new, w2_new, b2_new)
(* -----------------------------
7. Training Loop
----------------------------- *)
let train inputs targets epochs =
let rec loop w1 b1 w2 b2 epoch =
if epoch = 0 then (w1, b1, w2, b2)
else
let w1_curr, b1_curr, w2_curr, b2_curr =
List.fold_left2 (fun (w1_acc,b1_acc,w2_acc,b2_acc) input target ->
backprop input target
) (w1, b1, w2, b2) inputs targets
in
loop w1_curr b1_curr w2_curr b2_curr (epoch-1)
in loop weights1 biases1 weights2 biases2 epochs
(* -----------------------------
8. Example usage
----------------------------- *)
let inputs = [
[1.0; 2.0];
[0.5; -1.0];
[0.0; 0.0]
]
let targets = [
[1.0; 0.0];
[0.0; 1.0];
[1.0; 0.0]
]
let epochs = 1000
(* -----------------------------
Fix for "unused variable" warnings:
- Add a small helper `forward_with_params` that runs a forward pass
using explicit weight/bias parameters (so we can apply the trained params).
- Use the trained parameters for the printed predictions (so w1_tr etc. are actually used).
----------------------------- *)
(* Forward using explicitly supplied parameters.
This function was ADDED to apply the parameters returned by `train`. *)
let forward_with_params w1_param b1_param w2_param b2_param input =
let z1 = vector_add (mat_vec_mul w1_param input) b1_param in
(* compute z1 with supplied w1,b1 *)
let a1 = vector_map relu z1 in
(* apply relu to get hidden activations *)
let z2 = vector_add (mat_vec_mul w2_param a1) b2_param in
(* compute z2 with supplied w2,b2 *)
let a2 = softmax z2 in
(* apply softmax to get output probabilities *)
(a1, a2, z1, z2)
(* return same tuple shape as forward *)
let () =
(* Train and capture trained parameters in variables (these will now be used) *)
let w1_tr, b1_tr, w2_tr, b2_tr = train inputs targets epochs in
(* Use the trained parameters for predictions so the variables are not unused.
This also ensures printed predictions reflect the trained model. *)
List.iter2 (fun inp targ ->
let (_, pred, _, _) = forward_with_params w1_tr b1_tr w2_tr b2_tr inp in (* use trained params for forward *)
Printf.printf "Input: [%s] -> Predicted: [%s], Target: [%s]\n"
(String.concat "; " (List.map string_of_float inp))
(String.concat "; " (List.map string_of_float pred))
(String.concat "; " (List.map string_of_float targ))
) inputs targets
2. CONCLUSION
Windows PowerShell
Copyright (C) Microsoft Corporation. All rights reserved.
Install the latest PowerShell for new features and improvements! https://aka.ms/PSWindows
OCaml version: The OCaml toplevel, version 5.3.0
Coq-LSP version: 0.2.3
Loading personal and system profiles took 1143ms.
PS C:\Users\User> ocaml C:\AI2025\lessson7.ml
Input: [1.; 2.] -> Predicted: [0.253394746034; 0.746605253966], Target: [1.; 0.]
Input: [0.5; -1.] -> Predicted: [0.559716521644; 0.440283478356], Target: [0.; 1.]
Input: [0.; 0.] -> Predicted: [0.541600918545; 0.458399081455], Target: [1.; 0.]
PS C:\Users\User>
3. REFERENCES
Bird, R. (2015). Thinking Functionally with Haskell. Cambridge, England: Cambridge University Press.
Davie, A. (1992). Introduction to Functional Programming Systems Using Haskell. Cambridge, England: Cambridge University Press.
Goerzen, J. & O'Sullivan, B. & Stewart, D. (2008). Real World Haskell. Sebastopol, CA: O'Reilly Media, Inc.
Hutton, G. (2007). Programming in Haskell. New York: Cambridge University Press.
Lipovača, M. (2011). Learn You a Haskell for Great Good!: A Beginner's Guide. San Francisco, CA: No Starch Press, Inc.
Thompson, S. (2011). The Craft of Functional Programming. Edinburgh Gate, Harlow, England: Pearson Education Limited.