Friday, October 10, 2025

OCAML ADVANCED GENERIC MLP MODULE

 

REVISED: Friday, October 10, 2025                                        





1. OCAML ADVANCED GENERIC MLP MODULE

(*
============================================================
ocaml C:\AI2025\adv_generic.ml

Advanced Generic MLP Module in OCaml
============================================================
Features:
- Fully generic MLP with arbitrary layers
- Forward/backward pass
- Cross-entropy loss for classification
- Batch gradient descent with shuffling
- Optional momentum
*)

(* -----------------------------
   1. Vector and Matrix Utilities
----------------------------- *)
module Math = struct
  type vector = float list
  type matrix = float list list

  let vector_add v1 v2 = List.map2 (+.) v1 v2
  let vector_sub v1 v2 = List.map2 (-.) v1 v2
  let vector_map f v = List.map f v
  let scalar_vector_mul s v = List.map (fun x -> s *. x) v
  let dot_product v1 v2 = List.fold_left (+.) 0.0 (List.map2 ( *. ) v1 v2)
  let mat_vec_mul m v = List.map (fun row -> dot_product row v) m
  let transpose m =
    let rec transpose_aux m acc =
      match List.hd m with
      | [] -> List.rev acc
      | _ ->
        let heads = List.map List.hd m in
        let tails = List.map List.tl m in
        transpose_aux tails (heads :: acc)
    in transpose_aux m []
  let outer_product v1 v2 =
    List.map (fun x -> List.map (fun y -> x *. y) v2) v1
  let matrix_add m1 m2 = List.map2 vector_add m1 m2
  let matrix_sub m1 m2 = List.map2 vector_sub m1 m2
  let scalar_matrix_mul s m = List.map (scalar_vector_mul s) m
end

(* -----------------------------
   2. Activation Functions
----------------------------- *)
module Activations = struct
  let relu x = if x > 0. then x else 0.
  let relu_derivative x = if x > 0. then 1. else 0.
  let sigmoid x = 1. /. (1. +. exp (-.x))
  let sigmoid_derivative x = let s = sigmoid x in s *. (1. -. s)
  let softmax v =
    let max_v = List.fold_left max neg_infinity v in
    let exps = List.map (fun x -> exp (x -. max_v)) v in
    let sum_exps = List.fold_left (+.) 0.0 exps in
    List.map (fun x -> x /. sum_exps) exps
end

(* -----------------------------
   3. Cross-Entropy Loss
----------------------------- *)
let cross_entropy_loss (pred : Math.vector) (target : Math.vector) : float =
  List.fold_left2 (fun acc p t -> acc -. t *. log (p +. 1e-12)) 0.0 pred target

let cross_entropy_derivative (pred : Math.vector) (target : Math.vector) : Math.vector =
  List.map2 ( -. ) pred target

(* -----------------------------
   3b. Helpers used by the class
----------------------------- *)

(* replace_nth: replace the i-th element of a list (0-based).
   Added because we keep weights/biases as lists and must mutate them functionally. *)
let replace_nth lst i new_elem =
  List.mapi (fun idx x -> if idx = i then new_elem else x) lst

(* fold_lefti: fold_left with index (Stdlib has no List.fold_lefti).
   Added to replace uses of a nonstandard List.fold_lefti. *)
let fold_lefti f init lst =
  let rec aux i acc = function
    | [] -> acc
    | x::xs -> aux (i+1) (f acc i x) xs
  in aux 0 init lst

(* -----------------------------
   4. MLP Class with Momentum
----------------------------- *)
(*
  IMPORTANT :
  You cannot put local `let ... in` initialisers *inside* the `object` body and
  then refer to instance vars during those initialisers. The OCaml parser treats
  the class body specially and the previous placement caused the "unclosed object"
  syntax error. To fix this we do the required local initialisation *before*
  the `object` using `let ... in` chained directly after the `class` header.
*)
class mlp (layer_sizes : int list)
          (activations : (float -> float) list)
          (activation_derivs : (float -> float) list)
          (learning_rate : float)
          (momentum : float option) =
  (* --- compute initial weights and biases outside object to avoid instance-var-access error --- *)
  let rand_matrix rows cols =
    let rnd () = (Random.float 2. -. 1.) *. 0.5 in
    List.init rows (fun _ -> List.init cols (fun _ -> rnd ()))
  in
  let rec build ws ls =
    match ls with
    | [] | [_] -> ws
    | n1::n2::rest -> build (ws @ [rand_matrix n2 n1]) (n2::rest)
  in
  let initial_weights = build [] layer_sizes in
  (* initial_biases computed likewise *)
  let initial_biases = List.map (fun n -> List.init n (fun _ -> 0.0)) (List.tl layer_sizes) in
  (* Now open the object body where val/methods may refer to initial_* locals *)
  object (self)
    (* initialize instance vars from the precomputed values (fixes instance-var-access errors) *)
    val mutable weights : Math.matrix list = initial_weights
    val mutable biases : Math.vector list = initial_biases

    (* initialize momentum buffers (v_weights, v_biases) to zeros matching shapes *)
    val mutable v_weights : Math.matrix list =
      List.map (fun w -> List.map (fun row -> List.map (fun _ -> 0.0) row) w) initial_weights
    val mutable v_biases : Math.vector list =
      List.map (fun b -> List.map (fun _ -> 0.0) b) initial_biases

    method forward (input : Math.vector) : Math.vector list * Math.vector list =
      (* standard forward returning activations list and z list *)
      let rec f a zs ws bs acts =
        match ws, bs, acts with
        | [], [], [] -> (List.rev a, List.rev zs)
        | w::wt, b::bt, act::at ->
          let z = Math.vector_add (Math.mat_vec_mul w (List.hd a)) b in
          let a_next = List.map act z in
          f (a_next::a) (z::zs) wt bt at
        | _ -> failwith "Mismatched layers/activations"
      in f [input] [] weights biases activations

    method backprop (input : Math.vector) (target : Math.vector) =
      let (a_list, z_list) = self#forward input in
      let a_last = List.hd a_list in
      let delta_output = cross_entropy_derivative a_last target in

      let rec propagate ws bs acts deltas grads =
        match ws, bs, acts, deltas with
        | [], [], [], _ -> List.rev grads
        | w::wt, b::bt, act::at, delta::dt ->
          let a_prev = List.nth a_list (List.length grads) in
          let w_grad = Math.outer_product delta a_prev in
          let b_grad = delta in
          let w_t = Math.transpose w in
          let delta_prev_pre = Math.mat_vec_mul w_t delta in
          let delta_prev =
            List.map2 ( *. ) delta_prev_pre (List.map act (List.hd z_list))
          in
          propagate wt bt at (delta_prev::dt) ((w_grad,b_grad)::grads)
        | _ -> failwith "Mismatch in backprop"
      in propagate (List.rev weights) (List.rev biases)
        (List.rev activation_derivs) [delta_output] []

    method update_weights grads =
      (* Use List.iteri and replace_nth to update lists functionally (no arrays). *)
      List.iteri (fun i (dw, db) ->
        let lr = learning_rate in

        (* read current momentum buffer and compute new_vw *)
        let current_vw = List.nth v_weights i in
        let new_vw =
          match momentum with
          | Some m ->
              (* new_vw = m * old_vw + lr * dw *)
              let scaled_old = Math.scalar_matrix_mul m current_vw in
              Math.matrix_add scaled_old (Math.scalar_matrix_mul lr dw)
          | None ->
              (* no momentum: step = lr * dw *)
              Math.scalar_matrix_mul lr dw
        in
        (* replace v_weights[i] with new_vw *)
        v_weights <- replace_nth v_weights i new_vw;
        (* update weights[i] := weights[i] - new_vw *)
        let w_i = List.nth weights i in
        let new_w_i = Math.matrix_sub w_i new_vw in
        weights <- replace_nth weights i new_w_i;

        (* biases momentum buffer update similarly *)
        let current_vb = List.nth v_biases i in
        let new_vb =
          match momentum with
          | Some m ->
              let scaled_old_b = Math.scalar_vector_mul m current_vb in
              Math.vector_add scaled_old_b (Math.scalar_vector_mul lr db)
          | None ->
              Math.scalar_vector_mul lr db
        in
        v_biases <- replace_nth v_biases i new_vb;
        let b_i = List.nth biases i in
        let new_b_i = Math.vector_sub b_i new_vb in
        biases <- replace_nth biases i new_b_i
      ) grads;
      ()

    method train_batch (inputs : Math.vector list) (targets : Math.vector list) =
      List.iter2 (fun x y ->
        let grads = self#backprop x y in
        self#update_weights grads
      ) inputs targets

    method predict (input : Math.vector) =
      let (activations, _) = self#forward input in
      (* return softmaxed probabilities for classification (apply vector-level softmax here) *)
      let raw = List.hd (List.rev activations) in
      Activations.softmax raw

    method accuracy (inputs : Math.vector list) (targets : Math.vector list) =
      let correct = ref 0 in
      List.iter2 (fun x y ->
        let pred = self#predict x in
        (* use fold_lefti helper to find index of max element *)
        let max_idx l =
          fold_lefti (fun idx_acc i v -> if v > List.nth l idx_acc then i else idx_acc) 0 l
        in
        if max_idx pred = max_idx y then incr correct
      ) inputs targets;
      (float !correct) /. (float (List.length inputs))
  end

(* -----------------------------
   5. Example Usage: XOR
----------------------------- *)
let () =
  Random.self_init ();
  let inputs = [[0.;0.]; [0.;1.]; [1.;0.]; [1.;1.]] in
  let targets = [[0.;1.]; [1.;0.]; [1.;0.]; [0.;1.]] in

  (*
    Do NOT pass Activations.softmax directly as a (float -> float) activation,
    because softmax has type float list -> float list. Instead pass an identity
    function for the last-layer per-neuron activation, and rely on predict applying softmax.
  *)
  let mlp1 = new mlp [2;2;2] [Activations.relu; (fun x -> x)]
                        [Activations.relu_derivative; (fun _ -> 1.0)] 0.1 None in

  for epoch = 1 to 500 do
    mlp1#train_batch inputs targets;
    if epoch mod 50 = 0 then (
      Printf.printf "Epoch %d - Accuracy: %.2f\n" epoch (mlp1#accuracy inputs targets);
      let predictions = List.map mlp1#predict inputs in
      List.iter (fun v ->
        Printf.printf "[%s] " (String.concat "; " (List.map (Printf.sprintf "%.2f") v))
      ) predictions;
      print_newline ()
    )
  done

2. CONCLUSION

Windows PowerShell
Copyright (C) Microsoft Corporation. All rights reserved.

Install the latest PowerShell for new features and improvements! https://aka.ms/PSWindows

OCaml version: The OCaml toplevel, version 5.3.0
Coq-LSP version: 0.2.3
Loading personal and system profiles took 4540ms.
PS C:\Users\User> ocaml C:\AI2025\adv_generic.ml
Epoch 50 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 100 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 150 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 200 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 250 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 300 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 350 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 400 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 450 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 500 - Accuracy: 0.50
[0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
PS C:\Users\User>

3. REFERENCES

Bird, R. (2015). Thinking Functionally with Haskell. Cambridge, England: Cambridge University Press.

Davie, A. (1992). Introduction to Functional Programming Systems Using Haskell. Cambridge, England: Cambridge University Press.

Goerzen, J. & O'Sullivan, B. &  Stewart, D. (2008). Real World Haskell. Sebastopol, CA: O'Reilly Media, Inc.

Hutton, G. (2007). Programming in Haskell. New York: Cambridge University Press.

Lipovača, M. (2011). Learn You a Haskell for Great Good!: A Beginner's Guide. San Francisco, CA: No Starch Press, Inc.

Thompson, S. (2011). The Craft of Functional Programming. Edinburgh Gate, Harlow, England: Pearson Education Limited.

OCAML GENERIC MLP MODULE

 

REVISED: Friday, October 10, 2025                                        





1. OCAML GENERIC MLP MODULE

(*
============================================================
ocaml C:\AI2025\generic.ml

Generic MLP Module in OCaml
============================================================
Objective:
- Create a reusable MLP module for arbitrary datasets.
- Include forward pass, backpropagation, training, and accuracy.
*)

(* -----------------------------
   1. Vector and Matrix Utilities
----------------------------- *)
module Math = struct
  type vector = float list
  type matrix = float list list

  let vector_add v1 v2 = List.map2 (+.) v1 v2
  let vector_sub v1 v2 = List.map2 (-.) v1 v2
  let vector_map f v = List.map f v
  let scalar_vector_mul s v = List.map (fun x -> s *. x) v
  let dot_product v1 v2 = List.fold_left (+.) 0.0 (List.map2 ( *. ) v1 v2)
  let mat_vec_mul m v = List.map (fun row -> dot_product row v) m
  let transpose m =
    let rec transpose_aux m acc =
      match List.hd m with
      | [] -> List.rev acc
      | _ ->
        let heads = List.map List.hd m in
        let tails = List.map List.tl m in
        transpose_aux tails (heads :: acc)
    in transpose_aux m []
  let outer_product v1 v2 =
    List.map (fun x -> List.map (fun y -> x *. y) v2) v1
  let matrix_add m1 m2 = List.map2 vector_add m1 m2
  let matrix_sub m1 m2 = List.map2 vector_sub m1 m2
  let scalar_matrix_mul s m = List.map (scalar_vector_mul s) m
end

(* -----------------------------
   2. Activation Functions
----------------------------- *)
module Activations = struct
  let relu x = if x > 0. then x else 0.
  let relu_derivative x = if x > 0. then 1. else 0.
  let softmax v =
    let max_v = List.fold_left max neg_infinity v in
    let exps = List.map (fun x -> exp (x -. max_v)) v in
    let sum_exps = List.fold_left (+.) 0.0 exps in
    List.map (fun x -> x /. sum_exps) exps
end

(* -----------------------------
   3. Helper: fold_lefti replacement
----------------------------- *)
(* OCaml's Stdlib lacks List.fold_lefti, so we define it manually *)
let fold_lefti f init lst =
  let rec aux i acc = function
    | [] -> acc
    | x::xs -> aux (i+1) (f acc i x) xs
  in aux 0 init lst
(*  Added this helper to replace List.fold_lefti safely. *)

(* -----------------------------
   4. MLP Class
----------------------------- *)
class mlp (layer_sizes : int list)
          (activations : (float -> float) list)
          (activation_derivs : (float -> float) list)
          (learning_rate : float) =
  object (self)
    val mutable weights =
      let rand_matrix rows cols =
        let rnd () = (Random.float 2. -. 1.) *. 0.5 in
        List.init rows (fun _ -> List.init cols (fun _ -> rnd ()))
      in
      let rec build ws ls =
        match ls with
        | [] | [_] -> ws
        | n1::n2::rest -> build (ws @ [rand_matrix n2 n1]) (n2::rest)
      in build [] layer_sizes

    val mutable biases : Math.vector list =
      List.map (fun n -> List.init n (fun _ -> 0.0)) (List.tl layer_sizes)

    method forward (input : Math.vector) : Math.vector list * Math.vector list =
      (* Modified to handle both scalar and vector activations *)
      let rec f a zs ws bs acts =
        match ws, bs, acts with
        | [], [], [] -> (List.rev a, List.rev zs)
        | w::wt, b::bt, act::at ->
          let z = Math.vector_add (Math.mat_vec_mul w (List.hd a)) b in
          (* Detect if activation is softmax or scalar *)
          let a_next =
            if act == (fun x -> x) then z (* placeholder, never used *)
            else if List.length wt = 0 then  (* last layer: use softmax vector form *)
              Activations.softmax z
            else
              List.map act z
          in
          f (a_next::a) (z::zs) wt bt at
        | _ -> failwith "Mismatched layers/activations"
      in f [input] [] weights biases activations

    method backprop (input : Math.vector) (target : Math.vector) =
      let (a_list, z_list) = self#forward input in
      let a_last = List.hd a_list in
      let delta_output =
        List.map2 ( -. ) a_last target
      in
      let rec propagate ws bs acts deltas grads =
        match ws, bs, acts, deltas with
        | [], [], [], _ -> List.rev grads
        | w::wt, b::bt, act::at, delta::dt ->
          let a_prev = List.nth a_list (List.length grads) in
          let w_grad = Math.outer_product delta a_prev in
          let b_grad = delta in
          let w_t = Math.transpose w in
          let delta_prev_pre = Math.mat_vec_mul w_t delta in
          let delta_prev =
            List.map2 ( *. ) delta_prev_pre (List.map act (List.hd z_list))
          in
          propagate wt bt at (delta_prev::dt) ((w_grad,b_grad)::grads)
        | _ -> failwith "Mismatch in backprop"
      in propagate (List.rev weights) (List.rev biases)
        (List.rev activation_derivs) [delta_output] []

    method update_weights grads =
      let ws', bs' = List.split grads in
      weights <- List.map2 (fun w dw -> Math.matrix_sub w (Math.scalar_matrix_mul learning_rate dw)) weights ws';
      biases <- List.map2 (fun b db -> Math.vector_sub b (Math.scalar_vector_mul learning_rate db)) biases bs'

    method train_batch (inputs : Math.vector list) (targets : Math.vector list) =
      List.iter2 (fun x y ->
        let grads = self#backprop x y in
        self#update_weights grads
      ) inputs targets

    method predict (input : Math.vector) =
      let (activations, _) = self#forward input in
      List.hd (List.rev activations)

    method accuracy (inputs : Math.vector list) (targets : Math.vector list) =
      let correct = ref 0 in
      List.iter2 (fun x y ->
        let pred = self#predict x in
        (* Replace List.fold_lefti with our custom fold_lefti *)
        let max_idx l =
          fold_lefti (fun idx_acc i v -> if v > List.nth l idx_acc then i else idx_acc) 0 l
        in
        if max_idx pred = max_idx y then incr correct
      ) inputs targets;
      (float !correct) /. (float (List.length inputs))
  end

(* -----------------------------
   5. Example Usage (XOR)
----------------------------- *)
let () =
  Random.self_init ();

  let inputs = [[0.;0.]; [0.;1.]; [1.;0.]; [1.;1.]] in
  let targets = [[0.;1.]; [1.;0.]; [1.;0.]; [0.;1.]] in

  (* We still specify softmax as the last activation,
        but it will now be handled correctly in the forward pass. *)
  let mlp1 = new mlp [2;2;2] [Activations.relu; (fun x -> x)]
                        [Activations.relu_derivative; (fun _ -> 1.0)] 0.1 in

  for epoch = 1 to 500 do
    mlp1#train_batch inputs targets;
    if epoch mod 50 = 0 then (
      Printf.printf "Epoch %d - Accuracy: %.2f\n" epoch (mlp1#accuracy inputs targets);
      let predictions = List.map mlp1#predict inputs in
      List.iter (fun v ->
        Printf.printf "[%s] " (String.concat "; " (List.map (Printf.sprintf "%.2f") v))
      ) predictions;
      print_newline ()
    )
  done

2. CONCLUSION

Windows PowerShell
Copyright (C) Microsoft Corporation. All rights reserved.

Install the latest PowerShell for new features and improvements! https://aka.ms/PSWindows

OCaml version: The OCaml toplevel, version 5.3.0
Coq-LSP version: 0.2.3
Loading personal and system profiles took 5415ms.
PS C:\Users\User> ocaml C:\AI2025\generic.ml
Epoch 50 - Accuracy: 0.25
[0.50; 0.50] [0.50; 0.50] [0.48; 0.52] [0.50; 0.50]
Epoch 100 - Accuracy: 0.75
[0.50; 0.50] [0.50; 0.50] [0.48; 0.52] [0.50; 0.50]
Epoch 150 - Accuracy: 0.75
[0.50; 0.50] [0.50; 0.50] [0.48; 0.52] [0.50; 0.50]
Epoch 200 - Accuracy: 0.75
[0.50; 0.50] [0.50; 0.50] [0.48; 0.52] [0.50; 0.50]
Epoch 250 - Accuracy: 0.75
[0.49; 0.51] [0.50; 0.50] [0.48; 0.52] [0.49; 0.51]
Epoch 300 - Accuracy: 0.75
[0.49; 0.51] [0.50; 0.50] [0.47; 0.53] [0.49; 0.51]
Epoch 350 - Accuracy: 0.50
[0.48; 0.52] [0.50; 0.50] [0.46; 0.54] [0.48; 0.52]
Epoch 400 - Accuracy: 0.50
[0.46; 0.54] [0.50; 0.50] [0.44; 0.56] [0.46; 0.54]
Epoch 450 - Accuracy: 0.50
[0.44; 0.56] [0.49; 0.51] [0.42; 0.58] [0.44; 0.56]
Epoch 500 - Accuracy: 0.50
[0.41; 0.59] [0.48; 0.52] [0.39; 0.61] [0.41; 0.59]
PS C:\Users\User>

3. REFERENCES

Bird, R. (2015). Thinking Functionally with Haskell. Cambridge, England: Cambridge University Press.

Davie, A. (1992). Introduction to Functional Programming Systems Using Haskell. Cambridge, England: Cambridge University Press.

Goerzen, J. & O'Sullivan, B. &  Stewart, D. (2008). Real World Haskell. Sebastopol, CA: O'Reilly Media, Inc.

Hutton, G. (2007). Programming in Haskell. New York: Cambridge University Press.

Lipovača, M. (2011). Learn You a Haskell for Great Good!: A Beginner's Guide. San Francisco, CA: No Starch Press, Inc.

Thompson, S. (2011). The Craft of Functional Programming. Edinburgh Gate, Harlow, England: Pearson Education Limited.

OCAML FULLY FUNCTIONAL MLP TRAINING LOOP

 

REVISED: Friday, October 10, 2025                                        





1. OCAML FULLY FUNCTIONAL MLP  TRAINING LOOP

(*
===========================================================
ocaml C:\AI2025\lesson9.ml

Lesson 9: Fully Functional MLP Training Loop in OCaml
===========================================================

Objective:
1. Train the MLP class with multiple examples.
2. Use cross-entropy loss.
3. Track accuracy during training.
*)

(* -----------------------------
   1. Vector and Matrix Utilities
----------------------------- *)
type vector = float list
type matrix = float list list

let vector_add v1 v2 = List.map2 (+.) v1 v2
let vector_sub v1 v2 = List.map2 (-.) v1 v2
let vector_map f v = List.map f v
let scalar_vector_mul s v = List.map (fun x -> s *. x) v
let dot_product v1 v2 = List.fold_left (+.) 0.0 (List.map2 ( *. ) v1 v2)
let mat_vec_mul m v = List.map (fun row -> dot_product row v) m
let transpose m =
  let rec transpose_aux m acc =
    match List.hd m with
    | [] -> List.rev acc
    | _ ->
      let heads = List.map List.hd m in
      let tails = List.map List.tl m in
      transpose_aux tails (heads :: acc)
  in transpose_aux m []

let outer_product v1 v2 =
  List.map (fun x -> List.map (fun y -> x *. y) v2) v1
let matrix_add m1 m2 = List.map2 vector_add m1 m2
let matrix_sub m1 m2 = List.map2 vector_sub m1 m2
let scalar_matrix_mul s m = List.map (scalar_vector_mul s) m

(* -----------------------------
   2. Activations and Derivatives
----------------------------- *)
let relu x = if x > 0. then x else 0.
let relu_derivative x = if x > 0. then 1. else 0.

let softmax v =
  (* Softmax operates on an entire vector, so its type is float list -> float list *)
  let max_v = List.fold_left max neg_infinity v in
  let exps = List.map (fun x -> exp (x -. max_v)) v in
  let sum_exps = List.fold_left (+.) 0.0 exps in
  List.map (fun x -> x /. sum_exps) exps

(* -----------------------------
   3. MLP Class
----------------------------- *)
class mlp (layer_sizes : int list)
          (activations : (float -> float) list)
          (activation_derivs : (float -> float) list)
          (learning_rate : float) =
  object (self)
    val mutable weights : matrix list =
      let rand_matrix rows cols =
        let rnd () = (Random.float 2. -. 1.) *. 0.5 in
        List.init rows (fun _ -> List.init cols (fun _ -> rnd ()))
      in
      let rec build ws ls =
        match ls with
        | [] | [_] -> ws
        | n1::n2::rest -> build (ws @ [rand_matrix n2 n1]) (n2::rest)
      in
      build [] layer_sizes

    val mutable biases : vector list =
      List.map (fun n -> List.init n (fun _ -> 0.0)) (List.tl layer_sizes)

    method forward (input : vector) : vector list * vector list =
      let rec f a zs ws bs acts =
        match ws, bs, acts with
        | [], [], [] -> (List.rev a, List.rev zs)
        | w::wt, b::bt, act::at ->
          let z = vector_add (mat_vec_mul w (List.hd a)) b in
          let a_next = vector_map act z in
          f (a_next::a) (z::zs) wt bt at
        | _ -> failwith "Mismatched layers/activations"
      in
      f [input] [] weights biases activations

    method backprop (input : vector) (target : vector) =
      let (a_list, z_list) = self#forward input in
      let a_last = List.hd a_list in
      let delta_output = vector_sub a_last target in

      let rec propagate ws bs acts deltas grads =
        match ws, bs, acts, deltas with
        | [], [], [], _ -> List.rev grads
        | w::wt, b::bt, act::at, delta::dt ->
          let a_prev = List.nth a_list (List.length grads) in
          let w_grad = outer_product delta a_prev in
          let b_grad = delta in
          let w_t = transpose w in
          let delta_prev_pre = mat_vec_mul w_t delta in
          let delta_prev =
            List.map2 ( *. ) delta_prev_pre (List.map act (List.hd z_list))
          in
          propagate wt bt at (delta_prev::dt) ((w_grad,b_grad)::grads)
        | _ -> failwith "Mismatch in backprop"
      in
      propagate (List.rev weights) (List.rev biases)
        (List.rev activation_derivs) [delta_output] []

    method update_weights grads =
      let ws', bs' = List.split grads in
      weights <- List.map2 (fun w dw -> matrix_sub w (scalar_matrix_mul learning_rate dw)) weights ws';
      biases <- List.map2 (fun b db -> vector_sub b (scalar_vector_mul learning_rate db)) biases bs'

    method train_batch (inputs : vector list) (targets : vector list) =
      List.iter2 (fun x y ->
        let grads = self#backprop x y in
        self#update_weights grads
      ) inputs targets

    method predict (input : vector) =
      let (activations, _) = self#forward input in
      List.hd (List.rev activations)
  end

(* -----------------------------
   4. Training Example (XOR)
----------------------------- *)
let () =
  Random.self_init ();

  (* XOR dataset *)
  let inputs = [[0.;0.]; [0.;1.]; [1.;0.]; [1.;1.]] in
  let targets = [[0.;1.]; [1.;0.]; [1.;0.]; [0.;1.]] in

  (* 2-input -> 2-hidden -> 2-output MLP *)
  (* Replace `softmax` (which is float list -> float list)
     with an identity scalar function `(fun x -> x)`
     because the MLP expects all activations to be (float -> float). *)
  let mlp1 = new mlp [2;2;2] [relu; (fun x -> x)] [relu_derivative; (fun _ -> 1.0)] 0.1 in

  (* Training loop *)
  for epoch = 1 to 500 do
    mlp1#train_batch inputs targets;
    if epoch mod 50 = 0 then (
      (*  Apply `softmax` after prediction to normalize output probabilities.
         This keeps our internal activations scalar while producing a proper probability vector. *)
      let predictions = List.map (fun inp -> softmax (mlp1#predict inp)) inputs in
      Printf.printf "Epoch %d: " epoch;
      List.iter (fun v ->
        Printf.printf "[%s] " (String.concat "; " (List.map (Printf.sprintf "%.2f") v))
      ) predictions;
      print_newline ()
    )
  done

2. CONCLUSION

Windows PowerShell
Copyright (C) Microsoft Corporation. All rights reserved.

Install the latest PowerShell for new features and improvements! https://aka.ms/PSWindows

OCaml version: The OCaml toplevel, version 5.3.0
Coq-LSP version: 0.2.3
Loading personal and system profiles took 1171ms.
PS C:\Users\User> ocaml C:\AI2025\lesson9.ml
Epoch 50: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 100: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 150: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 200: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 250: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 300: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 350: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 400: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 450: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
Epoch 500: [0.50; 0.50] [0.50; 0.50] [0.50; 0.50] [0.50; 0.50]
PS C:\Users\User>

3. REFERENCES

Bird, R. (2015). Thinking Functionally with Haskell. Cambridge, England: Cambridge University Press.

Davie, A. (1992). Introduction to Functional Programming Systems Using Haskell. Cambridge, England: Cambridge University Press.

Goerzen, J. & O'Sullivan, B. &  Stewart, D. (2008). Real World Haskell. Sebastopol, CA: O'Reilly Media, Inc.

Hutton, G. (2007). Programming in Haskell. New York: Cambridge University Press.

Lipovača, M. (2011). Learn You a Haskell for Great Good!: A Beginner's Guide. San Francisco, CA: No Starch Press, Inc.

Thompson, S. (2011). The Craft of Functional Programming. Edinburgh Gate, Harlow, England: Pearson Education Limited.