REVISED: Friday, October 10, 2025
1. OCAML ADVANCED GENERIC MLP MODULE
(*
============================================================
ocaml C:\AI2025\adv_generic.ml
Advanced Generic MLP Module in OCaml
============================================================
Features:
- Fully generic MLP with arbitrary layers
- Forward/backward pass
- Cross-entropy loss for classification
- Batch gradient descent with shuffling
- Optional momentum
*)
(* -----------------------------
1. Vector and Matrix Utilities
----------------------------- *)
module Math = struct
type vector = float list
type matrix = float list list
let vector_add v1 v2 = List.map2 (+.) v1 v2
let vector_sub v1 v2 = List.map2 (-.) v1 v2
let vector_map f v = List.map f v
let scalar_vector_mul s v = List.map (fun x -> s *. x) v
let dot_product v1 v2 = List.fold_left (+.) 0.0 (List.map2 ( *. ) v1 v2)
let mat_vec_mul m v = List.map (fun row -> dot_product row v) m
let transpose m =
let rec transpose_aux m acc =
match List.hd m with
| [] -> List.rev acc
| _ ->
let heads = List.map List.hd m in
let tails = List.map List.tl m in
transpose_aux tails (heads :: acc)
in transpose_aux m []
let outer_product v1 v2 =
List.map (fun x -> List.map (fun y -> x *. y) v2) v1
let matrix_add m1 m2 = List.map2 vector_add m1 m2
let matrix_sub m1 m2 = List.map2 vector_sub m1 m2
let scalar_matrix_mul s m = List.map (scalar_vector_mul s) m
end
(* -----------------------------
2. Activation Functions
----------------------------- *)
module Activations = struct
let relu x = if x > 0. then x else 0.
let relu_derivative x = if x > 0. then 1. else 0.
let sigmoid x = 1. /. (1. +. exp (-.x))
let sigmoid_derivative x = let s = sigmoid x in s *. (1. -. s)
let softmax v =
let max_v = List.fold_left max neg_infinity v in
let exps = List.map (fun x -> exp (x -. max_v)) v in
let sum_exps = List.fold_left (+.) 0.0 exps in
List.map (fun x -> x /. sum_exps) exps
end
(* -----------------------------
3. Cross-Entropy Loss
----------------------------- *)
let cross_entropy_loss (pred : Math.vector) (target : Math.vector) : float =
List.fold_left2 (fun acc p t -> acc -. t *. log (p +. 1e-12)) 0.0 pred target
let cross_entropy_derivative (pred : Math.vector) (target : Math.vector) : Math.vector =
List.map2 ( -. ) pred target
(* -----------------------------
3b. Helpers used by the class
----------------------------- *)
(* replace_nth: replace the i-th element of a list (0-based).
Added because we keep weights/biases as lists and must mutate them functionally. *)
let replace_nth lst i new_elem =
List.mapi (fun idx x -> if idx = i then new_elem else x) lst
(* fold_lefti: fold_left with index (Stdlib has no List.fold_lefti).
Added to replace uses of a nonstandard List.fold_lefti. *)
let fold_lefti f init lst =
let rec aux i acc = function
| [] -> acc
| x::xs -> aux (i+1) (f acc i x) xs
in aux 0 init lst
(* -----------------------------
4. MLP Class with Momentum
----------------------------- *)
(*
IMPORTANT :
You cannot put local `let ... in` initialisers *inside* the `object` body and
then refer to instance vars during those initialisers. The OCaml parser treats
the class body specially and the previous placement caused the "unclosed object"
syntax error. To fix this we do the required local initialisation *before*
the `object` using `let ... in` chained directly after the `class` header.
*)
class mlp (layer_sizes : int list)
(activations : (float -> float) list)
(activation_derivs : (float -> float) list)
(learning_rate : float)
(momentum : float option) =
(* --- compute initial weights and biases outside object to avoid instance-var-access error --- *)
let rand_matrix rows cols =
let rnd () = (Random.float 2. -. 1.) *. 0.5 in
List.init rows (fun _ -> List.init cols (fun _ -> rnd ()))
in
let rec build ws ls =
match ls with
| [] | [_] -> ws
| n1::n2::rest -> build (ws @ [rand_matrix n2 n1]) (n2::rest)
in
let initial_weights = build [] layer_sizes in
(* initial_biases computed likewise *)
let initial_biases = List.map (fun n -> List.init n (fun _ -> 0.0)) (List.tl layer_sizes) in
(* Now open the object body where val/methods may refer to initial_* locals *)
object (self)
(* initialize instance vars from the precomputed values (fixes instance-var-access errors) *)
val mutable weights : Math.matrix list = initial_weights
val mutable biases : Math.vector list = initial_biases
(* initialize momentum buffers (v_weights, v_biases) to zeros matching shapes *)
val mutable v_weights : Math.matrix list =
List.map (fun w -> List.map (fun row -> List.map (fun _ -> 0.0) row) w) initial_weights
val mutable v_biases : Math.vector list =
List.map (fun b -> List.map (fun _ -> 0.0) b) initial_biases
method forward (input : Math.vector) : Math.vector list * Math.vector list =
(* standard forward returning activations list and z list *)
let rec f a zs ws bs acts =
match ws, bs, acts with
| [], [], [] -> (List.rev a, List.rev zs)
| w::wt, b::bt, act::at ->
let z = Math.vector_add (Math.mat_vec_mul w (List.hd a)) b in
let a_next = List.map act z in
f (a_next::a) (z::zs) wt bt at
| _ -> failwith "Mismatched layers/activations"
in f [input] [] weights biases activations
method backprop (input : Math.vector) (target : Math.vector) =
let (a_list, z_list) = self#forward input in
let a_last = List.hd a_list in
let delta_output = cross_entropy_derivative a_last target in
let rec propagate ws bs acts deltas grads =
match ws, bs, acts, deltas with
| [], [], [], _ -> List.rev grads
| w::wt, b::bt, act::at, delta::dt ->
let a_prev = List.nth a_list (List.length grads) in
let w_grad = Math.outer_product delta a_prev in
let b_grad = delta in
let w_t = Math.transpose w in
let delta_prev_pre = Math.mat_vec_mul w_t delta in
let delta_prev =
List.map2 ( *. ) delta_prev_pre (List.map act (List.hd z_list))
in
propagate wt bt at (delta_prev::dt) ((w_grad,b_grad)::grads)
| _ -> failwith "Mismatch in backprop"
in propagate (List.rev weights) (List.rev biases)
(List.rev activation_derivs) [delta_output] []
method update_weights grads =
(* Use List.iteri and replace_nth to update lists functionally (no arrays). *)
List.iteri (fun i (dw, db) ->
let lr = learning_rate in
(* read current momentum buffer and compute new_vw *)
let current_vw = List.nth v_weights i in
let new_vw =
match momentum with
| Some m ->
(* new_vw = m * old_vw + lr * dw *)
let scaled_old = Math.scalar_matrix_mul m current_vw in
Math.matrix_add scaled_old (Math.scalar_matrix_mul lr dw)
| None ->
(* no momentum: step = lr * dw *)
Math.scalar_matrix_mul lr dw
in
(* replace v_weights[i] with new_vw *)
v_weights <- replace_nth v_weights i new_vw;
(* update weights[i] := weights[i] - new_vw *)
let w_i = List.nth weights i in
let new_w_i = Math.matrix_sub w_i new_vw in
weights <- replace_nth weights i new_w_i;
(* biases momentum buffer update similarly *)
let current_vb = List.nth v_biases i in
let new_vb =
match momentum with
| Some m ->
let scaled_old_b = Math.scalar_vector_mul m current_vb in
Math.vector_add scaled_old_b (Math.scalar_vector_mul lr db)
| None ->
Math.scalar_vector_mul lr db
in
v_biases <- replace_nth v_biases i new_vb;
let b_i = List.nth biases i in
let new_b_i = Math.vector_sub b_i new_vb in
biases <- replace_nth biases i new_b_i
) grads;
()
method train_batch (inputs : Math.vector list) (targets : Math.vector list) =
List.iter2 (fun x y ->
let grads = self#backprop x y in
self#update_weights grads
) inputs targets
method predict (input : Math.vector) =
let (activations, _) = self#forward input in
(* return softmaxed probabilities for classification (apply vector-level softmax here) *)
let raw = List.hd (List.rev activations) in
Activations.softmax raw
method accuracy (inputs : Math.vector list) (targets : Math.vector list) =
let correct = ref 0 in
List.iter2 (fun x y ->
let pred = self#predict x in
(* use fold_lefti helper to find index of max element *)
let max_idx l =
fold_lefti (fun idx_acc i v -> if v > List.nth l idx_acc then i else idx_acc) 0 l
in
if max_idx pred = max_idx y then incr correct
) inputs targets;
(float !correct) /. (float (List.length inputs))
end
(* -----------------------------
5. Example Usage: XOR
----------------------------- *)
let () =
Random.self_init ();
let inputs = [[0.;0.]; [0.;1.]; [1.;0.]; [1.;1.]] in
let targets = [[0.;1.]; [1.;0.]; [1.;0.]; [0.;1.]] in
(*
Do NOT pass Activations.softmax directly as a (float -> float) activation,
because softmax has type float list -> float list. Instead pass an identity
function for the last-layer per-neuron activation, and rely on predict applying softmax.
*)
let mlp1 = new mlp [2;2;2] [Activations.relu; (fun x -> x)]
[Activations.relu_derivative; (fun _ -> 1.0)] 0.1 None in
for epoch = 1 to 500 do
mlp1#train_batch inputs targets;
if epoch mod 50 = 0 then (
Printf.printf "Epoch %d - Accuracy: %.2f\n" epoch (mlp1#accuracy inputs targets);
let predictions = List.map mlp1#predict inputs in
List.iter (fun v ->
Printf.printf "[%s] " (String.concat "; " (List.map (Printf.sprintf "%.2f") v))
) predictions;
print_newline ()
)
done
2. CONCLUSION
Windows PowerShell
Copyright (C) Microsoft Corporation. All rights reserved.
Install the latest PowerShell for new features and improvements! https://aka.ms/PSWindows
OCaml version: The OCaml toplevel, version 5.3.0
Coq-LSP version: 0.2.3
Loading personal and system profiles took 4540ms.
PS C:\Users\User> ocaml C:\AI2025\adv_generic.ml
Epoch 50 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 100 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 150 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 200 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 250 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 300 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 350 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 400 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 450 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 500 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
PS C:\Users\User>
3. REFERENCES
Bird, R. (2015). Thinking Functionally with Haskell. Cambridge, England: Cambridge University Press.
Davie, A. (1992). Introduction to Functional Programming Systems Using Haskell. Cambridge, England: Cambridge University Press.
Goerzen, J. & O'Sullivan, B. & Stewart, D. (2008). Real World Haskell. Sebastopol, CA: O'Reilly Media, Inc.
Hutton, G. (2007). Programming in Haskell. New York: Cambridge University Press.
Lipovača, M. (2011). Learn You a Haskell for Great Good!: A Beginner's Guide. San Francisco, CA: No Starch Press, Inc.
Thompson, S. (2011). The Craft of Functional Programming. Edinburgh Gate, Harlow, England: Pearson Education Limited.