Friday, October 10, 2025

OCAML ADVANCED GENERIC MLP MODULE

 

REVISED: Friday, October 10, 2025                                        





1. OCAML ADVANCED GENERIC MLP MODULE

(*
============================================================
ocaml C:\AI2025\adv_generic.ml

Advanced Generic MLP Module in OCaml
============================================================
Features:
- Fully generic MLP with arbitrary layers
- Forward/backward pass
- Cross-entropy loss for classification
- Batch gradient descent with shuffling
- Optional momentum
*)

(* -----------------------------
   1. Vector and Matrix Utilities
----------------------------- *)
module Math = struct
  type vector = float list
  type matrix = float list list

  let vector_add v1 v2 = List.map2 (+.) v1 v2
  let vector_sub v1 v2 = List.map2 (-.) v1 v2
  let vector_map f v = List.map f v
  let scalar_vector_mul s v = List.map (fun x -> s *. x) v
  let dot_product v1 v2 = List.fold_left (+.) 0.0 (List.map2 ( *. ) v1 v2)
  let mat_vec_mul m v = List.map (fun row -> dot_product row v) m
  let transpose m =
    let rec transpose_aux m acc =
      match List.hd m with
      | [] -> List.rev acc
      | _ ->
        let heads = List.map List.hd m in
        let tails = List.map List.tl m in
        transpose_aux tails (heads :: acc)
    in transpose_aux m []
  let outer_product v1 v2 =
    List.map (fun x -> List.map (fun y -> x *. y) v2) v1
  let matrix_add m1 m2 = List.map2 vector_add m1 m2
  let matrix_sub m1 m2 = List.map2 vector_sub m1 m2
  let scalar_matrix_mul s m = List.map (scalar_vector_mul s) m
end

(* -----------------------------
   2. Activation Functions
----------------------------- *)
module Activations = struct
  let relu x = if x > 0. then x else 0.
  let relu_derivative x = if x > 0. then 1. else 0.
  let sigmoid x = 1. /. (1. +. exp (-.x))
  let sigmoid_derivative x = let s = sigmoid x in s *. (1. -. s)
  let softmax v =
    let max_v = List.fold_left max neg_infinity v in
    let exps = List.map (fun x -> exp (x -. max_v)) v in
    let sum_exps = List.fold_left (+.) 0.0 exps in
    List.map (fun x -> x /. sum_exps) exps
end

(* -----------------------------
   3. Cross-Entropy Loss
----------------------------- *)
let cross_entropy_loss (pred : Math.vector) (target : Math.vector) : float =
  List.fold_left2 (fun acc p t -> acc -. t *. log (p +. 1e-12)) 0.0 pred target

let cross_entropy_derivative (pred : Math.vector) (target : Math.vector) : Math.vector =
  List.map2 ( -. ) pred target

(* -----------------------------
   3b. Helpers used by the class
----------------------------- *)

(* replace_nth: replace the i-th element of a list (0-based).
   Added because we keep weights/biases as lists and must mutate them functionally. *)
let replace_nth lst i new_elem =
  List.mapi (fun idx x -> if idx = i then new_elem else x) lst

(* fold_lefti: fold_left with index (Stdlib has no List.fold_lefti).
   Added to replace uses of a nonstandard List.fold_lefti. *)
let fold_lefti f init lst =
  let rec aux i acc = function
    | [] -> acc
    | x::xs -> aux (i+1) (f acc i x) xs
  in aux 0 init lst

(* -----------------------------
   4. MLP Class with Momentum
----------------------------- *)
(*
  IMPORTANT :
  You cannot put local `let ... in` initialisers *inside* the `object` body and
  then refer to instance vars during those initialisers. The OCaml parser treats
  the class body specially and the previous placement caused the "unclosed object"
  syntax error. To fix this we do the required local initialisation *before*
  the `object` using `let ... in` chained directly after the `class` header.
*)
class mlp (layer_sizes : int list)
          (activations : (float -> float) list)
          (activation_derivs : (float -> float) list)
          (learning_rate : float)
          (momentum : float option) =
  (* --- compute initial weights and biases outside object to avoid instance-var-access error --- *)
  let rand_matrix rows cols =
    let rnd () = (Random.float 2. -. 1.) *. 0.5 in
    List.init rows (fun _ -> List.init cols (fun _ -> rnd ()))
  in
  let rec build ws ls =
    match ls with
    | [] | [_] -> ws
    | n1::n2::rest -> build (ws @ [rand_matrix n2 n1]) (n2::rest)
  in
  let initial_weights = build [] layer_sizes in
  (* initial_biases computed likewise *)
  let initial_biases = List.map (fun n -> List.init n (fun _ -> 0.0)) (List.tl layer_sizes) in
  (* Now open the object body where val/methods may refer to initial_* locals *)
  object (self)
    (* initialize instance vars from the precomputed values (fixes instance-var-access errors) *)
    val mutable weights : Math.matrix list = initial_weights
    val mutable biases : Math.vector list = initial_biases

    (* initialize momentum buffers (v_weights, v_biases) to zeros matching shapes *)
    val mutable v_weights : Math.matrix list =
      List.map (fun w -> List.map (fun row -> List.map (fun _ -> 0.0) row) w) initial_weights
    val mutable v_biases : Math.vector list =
      List.map (fun b -> List.map (fun _ -> 0.0) b) initial_biases

    method forward (input : Math.vector) : Math.vector list * Math.vector list =
      (* standard forward returning activations list and z list *)
      let rec f a zs ws bs acts =
        match ws, bs, acts with
        | [], [], [] -> (List.rev a, List.rev zs)
        | w::wt, b::bt, act::at ->
          let z = Math.vector_add (Math.mat_vec_mul w (List.hd a)) b in
          let a_next = List.map act z in
          f (a_next::a) (z::zs) wt bt at
        | _ -> failwith "Mismatched layers/activations"
      in f [input] [] weights biases activations

    method backprop (input : Math.vector) (target : Math.vector) =
      let (a_list, z_list) = self#forward input in
      let a_last = List.hd a_list in
      let delta_output = cross_entropy_derivative a_last target in

      let rec propagate ws bs acts deltas grads =
        match ws, bs, acts, deltas with
        | [], [], [], _ -> List.rev grads
        | w::wt, b::bt, act::at, delta::dt ->
          let a_prev = List.nth a_list (List.length grads) in
          let w_grad = Math.outer_product delta a_prev in
          let b_grad = delta in
          let w_t = Math.transpose w in
          let delta_prev_pre = Math.mat_vec_mul w_t delta in
          let delta_prev =
            List.map2 ( *. ) delta_prev_pre (List.map act (List.hd z_list))
          in
          propagate wt bt at (delta_prev::dt) ((w_grad,b_grad)::grads)
        | _ -> failwith "Mismatch in backprop"
      in propagate (List.rev weights) (List.rev biases)
        (List.rev activation_derivs) [delta_output] []

    method update_weights grads =
      (* Use List.iteri and replace_nth to update lists functionally (no arrays). *)
      List.iteri (fun i (dw, db) ->
        let lr = learning_rate in

        (* read current momentum buffer and compute new_vw *)
        let current_vw = List.nth v_weights i in
        let new_vw =
          match momentum with
          | Some m ->
              (* new_vw = m * old_vw + lr * dw *)
              let scaled_old = Math.scalar_matrix_mul m current_vw in
              Math.matrix_add scaled_old (Math.scalar_matrix_mul lr dw)
          | None ->
              (* no momentum: step = lr * dw *)
              Math.scalar_matrix_mul lr dw
        in
        (* replace v_weights[i] with new_vw *)
        v_weights <- replace_nth v_weights i new_vw;
        (* update weights[i] := weights[i] - new_vw *)
        let w_i = List.nth weights i in
        let new_w_i = Math.matrix_sub w_i new_vw in
        weights <- replace_nth weights i new_w_i;

        (* biases momentum buffer update similarly *)
        let current_vb = List.nth v_biases i in
        let new_vb =
          match momentum with
          | Some m ->
              let scaled_old_b = Math.scalar_vector_mul m current_vb in
              Math.vector_add scaled_old_b (Math.scalar_vector_mul lr db)
          | None ->
              Math.scalar_vector_mul lr db
        in
        v_biases <- replace_nth v_biases i new_vb;
        let b_i = List.nth biases i in
        let new_b_i = Math.vector_sub b_i new_vb in
        biases <- replace_nth biases i new_b_i
      ) grads;
      ()

    method train_batch (inputs : Math.vector list) (targets : Math.vector list) =
      List.iter2 (fun x y ->
        let grads = self#backprop x y in
        self#update_weights grads
      ) inputs targets

    method predict (input : Math.vector) =
      let (activations, _) = self#forward input in
      (* return softmaxed probabilities for classification (apply vector-level softmax here) *)
      let raw = List.hd (List.rev activations) in
      Activations.softmax raw

    method accuracy (inputs : Math.vector list) (targets : Math.vector list) =
      let correct = ref 0 in
      List.iter2 (fun x y ->
        let pred = self#predict x in
        (* use fold_lefti helper to find index of max element *)
        let max_idx l =
          fold_lefti (fun idx_acc i v -> if v > List.nth l idx_acc then i else idx_acc) 0 l
        in
        if max_idx pred = max_idx y then incr correct
      ) inputs targets;
      (float !correct) /. (float (List.length inputs))
  end

(* -----------------------------
   5. Example Usage: XOR
----------------------------- *)
let () =
  Random.self_init ();
  let inputs = [[0.;0.]; [0.;1.]; [1.;0.]; [1.;1.]] in
  let targets = [[0.;1.]; [1.;0.]; [1.;0.]; [0.;1.]] in

  (*
    Do NOT pass Activations.softmax directly as a (float -> float) activation,
    because softmax has type float list -> float list. Instead pass an identity
    function for the last-layer per-neuron activation, and rely on predict applying softmax.
  *)
  let mlp1 = new mlp [2;2;2] [Activations.relu; (fun x -> x)]
                        [Activations.relu_derivative; (fun _ -> 1.0)] 0.1 None in

  for epoch = 1 to 500 do
    mlp1#train_batch inputs targets;
    if epoch mod 50 = 0 then (
      Printf.printf "Epoch %d - Accuracy: %.2f\n" epoch (mlp1#accuracy inputs targets);
      let predictions = List.map mlp1#predict inputs in
      List.iter (fun v ->
        Printf.printf "[%s] " (String.concat "; " (List.map (Printf.sprintf "%.2f") v))
      ) predictions;
      print_newline ()
    )
  done

2. CONCLUSION

Windows PowerShell
Copyright (C) Microsoft Corporation. All rights reserved.

Install the latest PowerShell for new features and improvements! https://aka.ms/PSWindows

OCaml version: The OCaml toplevel, version 5.3.0
Coq-LSP version: 0.2.3
Loading personal and system profiles took 4540ms.
PS C:\Users\User> ocaml C:\AI2025\adv_generic.ml
Epoch 50 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 100 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 150 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 200 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 250 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 300 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 350 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 400 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 450 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 500 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
PS C:\Users\User>

3. REFERENCES

Bird, R. (2015). Thinking Functionally with Haskell. Cambridge, England: Cambridge University Press.

Davie, A. (1992). Introduction to Functional Programming Systems Using Haskell. Cambridge, England: Cambridge University Press.

Goerzen, J. & O'Sullivan, B. &  Stewart, D. (2008). Real World Haskell. Sebastopol, CA: O'Reilly Media, Inc.

Hutton, G. (2007). Programming in Haskell. New York: Cambridge University Press.

Lipovača, M. (2011). Learn You a Haskell for Great Good!: A Beginner's Guide. San Francisco, CA: No Starch Press, Inc.

Thompson, S. (2011). The Craft of Functional Programming. Edinburgh Gate, Harlow, England: Pearson Education Limited.