REVISED: Friday, October 10, 2025
1. OCAML FULLY FUNCTIONAL MLP TRAINING LOOP
(*
===========================================================
ocaml C:\AI2025\lesson9.ml
Lesson 9: Fully Functional MLP Training Loop in OCaml
===========================================================
Objective:
1. Train the MLP class with multiple examples.
2. Use cross-entropy loss.
3. Track accuracy during training.
*)
(* -----------------------------
1. Vector and Matrix Utilities
----------------------------- *)
type vector = float list
type matrix = float list list
let vector_add v1 v2 = List.map2 (+.) v1 v2
let vector_sub v1 v2 = List.map2 (-.) v1 v2
let vector_map f v = List.map f v
let scalar_vector_mul s v = List.map (fun x -> s *. x) v
let dot_product v1 v2 = List.fold_left (+.) 0.0 (List.map2 ( *. ) v1 v2)
let mat_vec_mul m v = List.map (fun row -> dot_product row v) m
let transpose m =
let rec transpose_aux m acc =
match List.hd m with
| [] -> List.rev acc
| _ ->
let heads = List.map List.hd m in
let tails = List.map List.tl m in
transpose_aux tails (heads :: acc)
in transpose_aux m []
let outer_product v1 v2 =
List.map (fun x -> List.map (fun y -> x *. y) v2) v1
let matrix_add m1 m2 = List.map2 vector_add m1 m2
let matrix_sub m1 m2 = List.map2 vector_sub m1 m2
let scalar_matrix_mul s m = List.map (scalar_vector_mul s) m
(* -----------------------------
2. Activations and Derivatives
----------------------------- *)
let relu x = if x > 0. then x else 0.
let relu_derivative x = if x > 0. then 1. else 0.
let softmax v =
(* Softmax operates on an entire vector, so its type is float list -> float list *)
let max_v = List.fold_left max neg_infinity v in
let exps = List.map (fun x -> exp (x -. max_v)) v in
let sum_exps = List.fold_left (+.) 0.0 exps in
List.map (fun x -> x /. sum_exps) exps
(* -----------------------------
3. MLP Class
----------------------------- *)
class mlp (layer_sizes : int list)
(activations : (float -> float) list)
(activation_derivs : (float -> float) list)
(learning_rate : float) =
object (self)
val mutable weights : matrix list =
let rand_matrix rows cols =
let rnd () = (Random.float 2. -. 1.) *. 0.5 in
List.init rows (fun _ -> List.init cols (fun _ -> rnd ()))
in
let rec build ws ls =
match ls with
| [] | [_] -> ws
| n1::n2::rest -> build (ws @ [rand_matrix n2 n1]) (n2::rest)
in
build [] layer_sizes
val mutable biases : vector list =
List.map (fun n -> List.init n (fun _ -> 0.0)) (List.tl layer_sizes)
method forward (input : vector) : vector list * vector list =
let rec f a zs ws bs acts =
match ws, bs, acts with
| [], [], [] -> (List.rev a, List.rev zs)
| w::wt, b::bt, act::at ->
let z = vector_add (mat_vec_mul w (List.hd a)) b in
let a_next = vector_map act z in
f (a_next::a) (z::zs) wt bt at
| _ -> failwith "Mismatched layers/activations"
in
f [input] [] weights biases activations
method backprop (input : vector) (target : vector) =
let (a_list, z_list) = self#forward input in
let a_last = List.hd a_list in
let delta_output = vector_sub a_last target in
let rec propagate ws bs acts deltas grads =
match ws, bs, acts, deltas with
| [], [], [], _ -> List.rev grads
| w::wt, b::bt, act::at, delta::dt ->
let a_prev = List.nth a_list (List.length grads) in
let w_grad = outer_product delta a_prev in
let b_grad = delta in
let w_t = transpose w in
let delta_prev_pre = mat_vec_mul w_t delta in
let delta_prev =
List.map2 ( *. ) delta_prev_pre (List.map act (List.hd z_list))
in
propagate wt bt at (delta_prev::dt) ((w_grad,b_grad)::grads)
| _ -> failwith "Mismatch in backprop"
in
propagate (List.rev weights) (List.rev biases)
(List.rev activation_derivs) [delta_output] []
method update_weights grads =
let ws', bs' = List.split grads in
weights <- List.map2 (fun w dw -> matrix_sub w (scalar_matrix_mul learning_rate dw)) weights ws';
biases <- List.map2 (fun b db -> vector_sub b (scalar_vector_mul learning_rate db)) biases bs'
method train_batch (inputs : vector list) (targets : vector list) =
List.iter2 (fun x y ->
let grads = self#backprop x y in
self#update_weights grads
) inputs targets
method predict (input : vector) =
let (activations, _) = self#forward input in
List.hd (List.rev activations)
end
(* -----------------------------
4. Training Example (XOR)
----------------------------- *)
let () =
Random.self_init ();
(* XOR dataset *)
let inputs = [[0.;0.]; [0.;1.]; [1.;0.]; [1.;1.]] in
let targets = [[0.;1.]; [1.;0.]; [1.;0.]; [0.;1.]] in
(* 2-input -> 2-hidden -> 2-output MLP *)
(* Replace `softmax` (which is float list -> float list)
with an identity scalar function `(fun x -> x)`
because the MLP expects all activations to be (float -> float). *)
let mlp1 = new mlp [2;2;2] [relu; (fun x -> x)] [relu_derivative; (fun _ -> 1.0)] 0.1 in
(* Training loop *)
for epoch = 1 to 500 do
mlp1#train_batch inputs targets;
if epoch mod 50 = 0 then (
(* Apply `softmax` after prediction to normalize output probabilities.
This keeps our internal activations scalar while producing a proper probability vector. *)
let predictions = List.map (fun inp -> softmax (mlp1#predict inp)) inputs in
Printf.printf "Epoch %d: " epoch;
List.iter (fun v ->
Printf.printf "[%s] " (String.concat "; " (List.map (Printf.sprintf "%.2f") v))
) predictions;
print_newline ()
)
done
2. CONCLUSION
Windows PowerShell
Copyright (C) Microsoft Corporation. All rights reserved.
Install the latest PowerShell for new features and improvements! https://aka.ms/PSWindows
OCaml version: The OCaml toplevel, version 5.3.0
Coq-LSP version: 0.2.3
Loading personal and system profiles took 1171ms.
PS C:\Users\User> ocaml C:\AI2025\lesson9.ml
Epoch 50: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 100: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 150: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 200: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 250: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 300: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 350: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 400: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 450: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 500: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
PS C:\Users\User>
3. REFERENCES
Bird, R. (2015). Thinking Functionally with Haskell. Cambridge, England: Cambridge University Press.
Davie, A. (1992). Introduction to Functional Programming Systems Using Haskell. Cambridge, England: Cambridge University Press.
Goerzen, J. & O'Sullivan, B. & Stewart, D. (2008). Real World Haskell. Sebastopol, CA: O'Reilly Media, Inc.
Hutton, G. (2007). Programming in Haskell. New York: Cambridge University Press.
Lipovača, M. (2011). Learn You a Haskell for Great Good!: A Beginner's Guide. San Francisco, CA: No Starch Press, Inc.
Thompson, S. (2011). The Craft of Functional Programming. Edinburgh Gate, Harlow, England: Pearson Education Limited.