Thursday, October 9, 2025

OCAML FULLY PARAMETERIZED MLP CLASS

 

REVISED: Thursday, October 9, 2025                                        





1. OCAML FULLY PARAMETERIZED MLP CLASS
(*
========================================
ocaml C:\AI2025\lesson8.ml

Lesson 8: Fully Parameterized MLP Class
========================================

Objective:
1. Define an MLP class with any number of layers.
2. Support custom activation functions.
3. Perform forward, backward, and batch updates.
*)

(* -----------------------------
   1. Vector and Matrix Utilities
----------------------------- *)
type vector = float list
type matrix = float list list

let vector_add v1 v2 = List.map2 (+.) v1 v2
let vector_sub v1 v2 = List.map2 (-.) v1 v2
let vector_map f v = List.map f v
let scalar_vector_mul s v = List.map (fun x -> s *. x) v
let dot_product v1 v2 = List.fold_left (+.) 0.0 (List.map2 ( *. ) v1 v2)
let mat_vec_mul m v = List.map (fun row -> dot_product row v) m
let transpose m =
  let rec transpose_aux m acc =
    match List.hd m with
    | [] -> List.rev acc
    | _ ->
      let heads = List.map List.hd m in
      let tails = List.map List.tl m in
      transpose_aux tails (heads :: acc)
  in
  transpose_aux m []

let outer_product v1 v2 =
  List.map (fun x -> List.map (fun y -> x *. y) v2) v1
let matrix_add m1 m2 = List.map2 vector_add m1 m2
let matrix_sub m1 m2 = List.map2 vector_sub m1 m2
let scalar_matrix_mul s m = List.map (scalar_vector_mul s) m

(* -----------------------------
   2. Common Activations
----------------------------- *)
let relu x = if x > 0. then x else 0.
let relu_derivative x = if x > 0. then 1. else 0.
let softmax v =
  let max_v = List.fold_left max neg_infinity v in
  let exps = List.map (fun x -> exp (x -. max_v)) v in
  let sum_exps = List.fold_left (+.) 0.0 exps in
  List.map (fun x -> x /. sum_exps) exps

(* -----------------------------
   3. Fully Parameterized MLP Class
----------------------------- *)
class ['input, 'output] mlp
    (layer_sizes : int list)
    (activations : (float -> float) list)         (* per-neuron activations *)
    (activation_derivs : (float -> float) list)  (* per-neuron derivatives *)
    (learning_rate : float) =
  object (self)
    (* Initialize weights and biases randomly *)
    val mutable weights : matrix list =
      let rand_matrix rows cols =
        let rnd () = (Random.float 2. -. 1.) *. 0.5 in
        List.init rows (fun _ -> List.init cols (fun _ -> rnd ()))
      in
      let rec build ws ls =
        match ls with
        | [] | [_] -> ws
        | n1 :: n2 :: rest -> build (ws @ [rand_matrix n2 n1]) (n2 :: rest)
      in
      build [] layer_sizes

    val mutable biases : vector list =
      List.map (fun n -> List.init n (fun _ -> 0.0)) (List.tl layer_sizes)

    method forward (input : vector) : vector list * vector list =
      (* Returns list of activations (a0..aL) and pre-activations z (z1..zL).
         Note: activations list includes the input as a0. *)
      let rec f a zs ws bs acts =
        match ws, bs, acts with
        | [], [], [] -> (List.rev a, List.rev zs)
        | w :: wt, b :: bt, act :: at ->
            let z = vector_add (mat_vec_mul w (List.hd a)) b in
            let a_next = vector_map act z in
            f (a_next :: a) (z :: zs) wt bt at
        | _ -> failwith "Mismatched layers/activations"
      in
      let (a_list, z_list) = f [input] [] weights biases activations in
      (*  apply softmax to the final layer output vector-level
         Reason: 'activations' contains per-neuron functions (float->float).
         softmax is vector-level, so we apply it explicitly to last z. *)
      (match List.rev z_list with
       | [] -> (a_list, z_list)
       | last_z :: _ ->
           (* replace last activation in a_list with softmax(last_z) *)
           let rec replace_last lst new_last =
             match lst with
             | [] -> [new_last]
             | [_] -> [new_last]          (* if single element, replace it *)
             | x :: xs -> x :: replace_last xs new_last
           in
           let a_list_replaced = replace_last a_list (softmax last_z) in
           (a_list_replaced, z_list)
      )

    method backprop (input : vector) (target : vector) =
      let (activations_list, z_list) = self#forward input in
      (* activations_list = [a0; a1; ...; aL], z_list = [z1; ...; zL] *)

      (* Output activation (last) is probabilities after softmax *)
      let a_last = List.hd (List.rev activations_list) in

      (* Output layer error (softmax + cross-entropy derivative)
         For softmax + one-hot target, delta = a_last - target (element-wise). *)
      let delta_last = vector_sub a_last target in

      (*  index-based backprop loop
         Reason: previous recursive propagate mixed argument order and caused types to mismatch.
         This loop iterates layers from L down to 1, computing grads and previous deltas. *)
      let l_count = List.length weights in
      let rec loop l delta acc =
        if l = 0 then List.rev acc
        else
          let w_l = List.nth weights (l - 1) in                                 
 (* weight matrix for layer l *)
          let a_prev = List.nth activations_list (l - 1) in                       
 (* activation a_{l-1} *)
          let z_prev = List.nth z_list (l - 1) in                                 
 (* pre-activation z_l *)
          let act_deriv = List.nth activation_derivs (l - 1) in                    
(* derivative for layer l *)
          let w_grad = outer_product delta a_prev in                              
 (* gradient w.r.t weights *)
          let b_grad = delta in                                                    
 (* gradient w.r.t biases *)
          let w_t = transpose w_l in
          let delta_prev_pre = mat_vec_mul w_t delta in                           
 (* propagated error before derivative *)
          let delta_prev = List.map2 ( *. ) delta_prev_pre (List.map act_deriv z_prev) in
          loop (l - 1) delta_prev ((w_grad, b_grad) :: acc)
      in
      loop l_count delta_last []

    method update_weights grads =
      (* grads : (matrix * vector) list  where each pair = (dW, dB) for each layer *)
      let ws', bs' = List.split grads in
      (* Update weights and biases elementwise using the learning_rate *)
      weights <- List.map2 (fun w dw -> matrix_sub w (scalar_matrix_mul learning_rate dw)) weights ws';
      biases <- List.map2 (fun b db -> vector_sub b (scalar_vector_mul learning_rate db)) biases bs'

  end

(* -----------------------------
   4. Example usage
----------------------------- *)
let () =
  Random.self_init ();

  (* Define a 2-layer MLP: 2 inputs -> 3 hidden -> 2 outputs
     NOTE: we pass per-neuron activation functions; for the final layer we
     pass identity (fun x -> x) because softmax is applied vector-wise in forward. *)
  let mlp1 = new mlp [2; 3; 2] [relu; (fun x -> x)] [relu_derivative; (fun _ -> 1.0)] 0.01 in
  (* ^^^ FIX: Use (fun x -> x) for last per-neuron activation and fun _ -> 1.0 for its derivative.
       Reason: softmax is vector-level; last layer per-neuron activation should be identity. *)

  (* Single input and target *)
  let input = [1.0; 0.5] in
  let _target = [1.0; 0.0] in (* FIX: renamed to _target to avoid unused-variable warning *)

  (* Forward pass *)
  let (activations, zs) = mlp1#forward input in
  let output = List.hd (List.rev activations) in  (* extract last activation vector (softmax probabilities) *)
  (* using List.rev then List.hd to get last element; safe because layers exist *)

  Printf.printf "Output before training: [%s]\n"
    (String.concat "; " (List.map string_of_float output))

2. CONCLUSION

Windows PowerShell
Copyright (C) Microsoft Corporation. All rights reserved.

Install the latest PowerShell for new features and improvements! https://aka.ms/PSWindows

OCaml version: The OCaml toplevel, version 5.3.0
Coq-LSP version: 0.2.3
Loading personal and system profiles took 4430ms.
PS C:\Users\User> ocaml C:\AI2025\lesson8.ml
Output before training: [0.474754307249; 0.525245692751]
PS C:\Users\User>

3. REFERENCES

Bird, R. (2015). Thinking Functionally with Haskell. Cambridge, England: Cambridge University Press.

Davie, A. (1992). Introduction to Functional Programming Systems Using Haskell. Cambridge, England: Cambridge University Press.

Goerzen, J. & O'Sullivan, B. &  Stewart, D. (2008). Real World Haskell. Sebastopol, CA: O'Reilly Media, Inc.

Hutton, G. (2007). Programming in Haskell. New York: Cambridge University Press.

Lipovača, M. (2011). Learn You a Haskell for Great Good!: A Beginner's Guide. San Francisco, CA: No Starch Press, Inc.

Thompson, S. (2011). The Craft of Functional Programming. Edinburgh Gate, Harlow, England: Pearson Education Limited.