Wednesday, October 8, 2025

OCAML MULTI-LAYER NEURAL NETWORK (MLP)

 

REVISED: Wednesday, October 8, 2025                                        





1. OCAML MULTI-LAYER NEURAL NETWORK (MLP)

(*
========================================
ocaml C:\AI2025\lesson7.ml

Lesson 7: Multi-Layer Neural Network (MLP)
========================================

Objective:
1. Implement a fully connected network with two layers.
2. Add softmax output and cross-entropy loss.
3. Perform forward pass, backward pass, and batch updates.
*)

(* -----------------------------
   1. Vector and Matrix Utilities
----------------------------- *)

type vector = float list
type matrix = float list list

let vector_add v1 v2 = List.map2 (+.) v1 v2
let vector_sub v1 v2 = List.map2 (-.) v1 v2
let vector_map f v = List.map f v
let scalar_vector_mul s v = List.map (fun x -> s *. x) v
let dot_product v1 v2 = List.fold_left (+.) 0.0 (List.map2 ( *. ) v1 v2)
let mat_vec_mul m v = List.map (fun row -> dot_product row v) m
let transpose m =
  let rec transpose_aux m acc =
    match List.hd m with
    | [] -> List.rev acc
    | _ ->
      let heads = List.map List.hd m in
      let tails = List.map List.tl m in
      transpose_aux tails (heads :: acc)
  in transpose_aux m []
let outer_product v1 v2 =
  List.map (fun x -> List.map (fun y -> x *. y) v2) v1
let matrix_add m1 m2 = List.map2 vector_add m1 m2
let matrix_sub m1 m2 = List.map2 vector_sub m1 m2
let scalar_matrix_mul s m = List.map (scalar_vector_mul s) m

(* -----------------------------
   2. Activation Functions
----------------------------- *)

let relu x = if x > 0. then x else 0.
let relu_derivative x = if x > 0. then 1. else 0.
let softmax v =
  let max_v = List.fold_left max neg_infinity v in
  let exps = List.map (fun x -> exp (x -. max_v)) v in
  let sum_exps = List.fold_left (+.) 0.0 exps in
  List.map (fun x -> x /. sum_exps) exps

(* -----------------------------
   3. Loss Function
----------------------------- *)

let cross_entropy_loss predicted target =
  (* predicted and target are same length vectors *)
  let eps = 1e-12 in
  (* Removed an extra closing ')' that caused the previous syntax error.
     The fixed line below computes - sum_i t_i * log(p_i + eps). *)
  -. List.fold_left (+.) 0.0 (List.map2 (fun p t -> t *. log (p +. eps)) predicted target)

(* -----------------------------
   4. Network Parameters
----------------------------- *)

let weights1 : matrix = [
  [0.5; -0.3];
  [0.2; 0.8];
  [-0.5; 0.1]
]

let biases1 : vector = [0.1; -0.1; 0.05]

let weights2 : matrix = [
  [0.3; -0.2; 0.4];
  [0.1; 0.5; -0.3]
]

let biases2 : vector = [0.05; -0.05]

let learning_rate = 0.01

(* -----------------------------
   5. Forward Pass
----------------------------- *)

let forward input =
  (* Layer 1 *)
  let z1 = vector_add (mat_vec_mul weights1 input) biases1 in
  let a1 = vector_map relu z1 in
  
  (* Layer 2 *)
  let z2 = vector_add (mat_vec_mul weights2 a1) biases2 in
  let a2 = softmax z2 in
  (a1, a2, z1, z2)

(* -----------------------------
   6. Backpropagation
----------------------------- *)

let backprop input target =
  (* Forward pass *)
  let (a1, a2, z1, z2) = forward input in

  (* Output layer error (softmax + cross-entropy derivative) *)
  let delta2 = vector_sub a2 target in

  (* Gradients for second layer *)
  let weight2_grad = outer_product delta2 a1 in
  let bias2_grad = delta2 in

  (* Hidden layer error *)
  let w2_t = transpose weights2 in
  let delta1_pre = mat_vec_mul w2_t delta2 in
  let delta1 = List.map2 ( *. ) delta1_pre (vector_map relu_derivative z1) in

  (* Gradients for first layer *)
  let weight1_grad = outer_product delta1 input in
  let bias1_grad = delta1 in

  (* Update parameters *)
  let w1_new = matrix_sub weights1 (scalar_matrix_mul learning_rate weight1_grad) in
  let b1_new = vector_sub biases1 (scalar_vector_mul learning_rate bias1_grad) in
  let w2_new = matrix_sub weights2 (scalar_matrix_mul learning_rate weight2_grad) in
  let b2_new = vector_sub biases2 (scalar_vector_mul learning_rate bias2_grad) in

  (w1_new, b1_new, w2_new, b2_new)

(* -----------------------------
   7. Training Loop
----------------------------- *)

let train inputs targets epochs =
  let rec loop w1 b1 w2 b2 epoch =
    if epoch = 0 then (w1, b1, w2, b2)
    else
      let w1_curr, b1_curr, w2_curr, b2_curr =
        List.fold_left2 (fun (w1_acc,b1_acc,w2_acc,b2_acc) input target ->
          backprop input target
        ) (w1, b1, w2, b2) inputs targets
      in
      loop w1_curr b1_curr w2_curr b2_curr (epoch-1)
  in loop weights1 biases1 weights2 biases2 epochs

(* -----------------------------
   8. Example usage
----------------------------- *)

let inputs = [
  [1.0; 2.0];
  [0.5; -1.0];
  [0.0; 0.0]
]

let targets = [
  [1.0; 0.0];
  [0.0; 1.0];
  [1.0; 0.0]
]

let epochs = 1000

(* -----------------------------
   Fix for "unused variable" warnings:
   - Add a small helper `forward_with_params` that runs a forward pass
     using explicit weight/bias parameters (so we can apply the trained params).
   - Use the trained parameters for the printed predictions (so w1_tr etc. are actually used).
----------------------------- *)

(* Forward using explicitly supplied parameters.
   This function was ADDED to apply the parameters returned by `train`. *)
let forward_with_params w1_param b1_param w2_param b2_param input =
  let z1 = vector_add (mat_vec_mul w1_param input) b1_param in  
(* compute z1 with supplied w1,b1 *)
  let a1 = vector_map relu z1 in               
 (* apply relu to get hidden activations *)
  let z2 = vector_add (mat_vec_mul w2_param a1) b2_param in           
(* compute z2 with supplied w2,b2 *)
  let a2 = softmax z2 in                     
(* apply softmax to get output probabilities *)
  (a1, a2, z1, z2)                                     
 (* return same tuple shape as forward *)

let () =
  (* Train and capture trained parameters in variables (these will now be used) *)
  let w1_tr, b1_tr, w2_tr, b2_tr = train inputs targets epochs in
  (* Use the trained parameters for predictions so the variables are not unused.
     This also ensures printed predictions reflect the trained model. *)
  List.iter2 (fun inp targ ->
    let (_, pred, _, _) = forward_with_params w1_tr b1_tr w2_tr b2_tr inp in  (* use trained params for forward *)
    Printf.printf "Input: [%s] -> Predicted: [%s], Target: [%s]\n"
      (String.concat "; " (List.map string_of_float inp))
      (String.concat "; " (List.map string_of_float pred))
      (String.concat "; " (List.map string_of_float targ))
  ) inputs targets

2. CONCLUSION

Windows PowerShell
Copyright (C) Microsoft Corporation. All rights reserved.

Install the latest PowerShell for new features and improvements! https://aka.ms/PSWindows

OCaml version: The OCaml toplevel, version 5.3.0
Coq-LSP version: 0.2.3
Loading personal and system profiles took 1143ms.
PS C:\Users\User> ocaml C:\AI2025\lessson7.ml
Input: [1.; 2.] -> Predicted: [0.253394746034; 0.746605253966], Target: [1.; 0.]
Input: [0.5; -1.] -> Predicted: [0.559716521644; 0.440283478356], Target: [0.; 1.]
Input: [0.; 0.] -> Predicted: [0.541600918545; 0.458399081455], Target: [1.; 0.]
PS C:\Users\User>

3. REFERENCES

Bird, R. (2015). Thinking Functionally with Haskell. Cambridge, England: Cambridge University Press.

Davie, A. (1992). Introduction to Functional Programming Systems Using Haskell. Cambridge, England: Cambridge University Press.

Goerzen, J. & O'Sullivan, B. &  Stewart, D. (2008). Real World Haskell. Sebastopol, CA: O'Reilly Media, Inc.

Hutton, G. (2007). Programming in Haskell. New York: Cambridge University Press.

Lipovača, M. (2011). Learn You a Haskell for Great Good!: A Beginner's Guide. San Francisco, CA: No Starch Press, Inc.

Thompson, S. (2011). The Craft of Functional Programming. Edinburgh Gate, Harlow, England: Pearson Education Limited.