|
3 | 3 | require "matrix" |
4 | 4 |
|
5 | 5 | module IrtRuby |
6 | | - # A class representing the Three-Parameter model for Item Response Theory. |
| 6 | + # A class representing the Three-Parameter model (3PL) for Item Response Theory. |
| 7 | + # Incorporates: |
| 8 | + # - Adaptive learning rate |
| 9 | + # - Missing data handling |
| 10 | + # - Parameter clamping for discrimination, guessing |
| 11 | + # - Multiple convergence checks |
| 12 | + # - Separate gradient calculation & updates |
7 | 13 | class ThreeParameterModel |
8 | | - def initialize(data, max_iter: 1000, tolerance: 1e-6, learning_rate: 0.01) |
| 14 | + def initialize(data, max_iter: 1000, tolerance: 1e-6, param_tolerance: 1e-6, |
| 15 | + learning_rate: 0.01, decay_factor: 0.5) |
9 | 16 | @data = data |
10 | | - @abilities = Array.new(data.row_count) { rand } |
11 | | - @difficulties = Array.new(data.column_count) { rand } |
12 | | - @discriminations = Array.new(data.column_count) { rand } |
13 | | - @guessings = Array.new(data.column_count) { rand * 0.3 } |
14 | | - @max_iter = max_iter |
15 | | - @tolerance = tolerance |
16 | | - @learning_rate = learning_rate |
| 17 | + @data_array = data.to_a |
| 18 | + num_rows = @data_array.size |
| 19 | + num_cols = @data_array.first.size |
| 20 | + |
| 21 | + # Typical initialization for 3PL |
| 22 | + @abilities = Array.new(num_rows) { rand(-0.25..0.25) } |
| 23 | + @difficulties = Array.new(num_cols) { rand(-0.25..0.25) } |
| 24 | + @discriminations = Array.new(num_cols) { rand(0.5..1.5) } |
| 25 | + @guessings = Array.new(num_cols) { rand(0.0..0.3) } |
| 26 | + |
| 27 | + @max_iter = max_iter |
| 28 | + @tolerance = tolerance |
| 29 | + @param_tolerance = param_tolerance |
| 30 | + @learning_rate = learning_rate |
| 31 | + @decay_factor = decay_factor |
17 | 32 | end |
18 | 33 |
|
19 | | - # Sigmoid function to calculate probability |
20 | 34 | def sigmoid(x) |
21 | 35 | 1.0 / (1.0 + Math.exp(-x)) |
22 | 36 | end |
23 | 37 |
|
24 | | - # Probability function for the 3PL model |
| 38 | + # Probability for the 3PL model: c + (1-c)*sigmoid(a*(θ - b)) |
25 | 39 | def probability(theta, a, b, c) |
26 | | - c + (1 - c) * sigmoid(a * (theta - b)) |
| 40 | + c + (1.0 - c) * sigmoid(a * (theta - b)) |
27 | 41 | end |
28 | 42 |
|
29 | | - # Calculate the log-likelihood of the data given the current parameters |
30 | | - def likelihood |
31 | | - likelihood = 0 |
32 | | - @data.row_vectors.each_with_index do |row, i| |
33 | | - row.to_a.each_with_index do |response, j| |
34 | | - prob = probability(@abilities[i], @discriminations[j], @difficulties[j], @guessings[j]) |
35 | | - likelihood += response == 1 ? Math.log(prob) : Math.log(1 - prob) |
| 43 | + def log_likelihood |
| 44 | + ll = 0.0 |
| 45 | + @data_array.each_with_index do |row, i| |
| 46 | + row.each_with_index do |resp, j| |
| 47 | + next if resp.nil? |
| 48 | + |
| 49 | + prob = probability(@abilities[i], @discriminations[j], |
| 50 | + @difficulties[j], @guessings[j]) |
| 51 | + ll += if resp == 1 |
| 52 | + Math.log(prob + 1e-15) |
| 53 | + else |
| 54 | + Math.log((1 - prob) + 1e-15) |
| 55 | + end |
36 | 56 | end |
37 | 57 | end |
38 | | - likelihood |
| 58 | + ll |
39 | 59 | end |
40 | 60 |
|
41 | | - # Update parameters using gradient ascent |
42 | | - def update_parameters |
43 | | - last_likelihood = likelihood |
44 | | - @max_iter.times do |_iter| |
45 | | - @data.row_vectors.each_with_index do |row, i| |
46 | | - row.to_a.each_with_index do |response, j| |
47 | | - prob = probability(@abilities[i], @discriminations[j], @difficulties[j], @guessings[j]) |
48 | | - error = response - prob |
49 | | - @abilities[i] += @learning_rate * error * @discriminations[j] |
50 | | - @difficulties[j] -= @learning_rate * error * @discriminations[j] |
51 | | - @discriminations[j] += @learning_rate * error * (@abilities[i] - @difficulties[j]) |
52 | | - @guessings[j] += @learning_rate * error * (1 - prob) |
53 | | - @guessings[j] = [[@guessings[j], 0].max, 1].min # Keep guessings within [0, 1] |
54 | | - end |
| 61 | + def compute_gradient |
| 62 | + grad_abilities = Array.new(@abilities.size, 0.0) |
| 63 | + grad_difficulties = Array.new(@difficulties.size, 0.0) |
| 64 | + grad_discriminations = Array.new(@discriminations.size, 0.0) |
| 65 | + grad_guessings = Array.new(@guessings.size, 0.0) |
| 66 | + |
| 67 | + @data_array.each_with_index do |row, i| |
| 68 | + row.each_with_index do |resp, j| |
| 69 | + next if resp.nil? |
| 70 | + |
| 71 | + theta = @abilities[i] |
| 72 | + a = @discriminations[j] |
| 73 | + b = @difficulties[j] |
| 74 | + c = @guessings[j] |
| 75 | + |
| 76 | + prob = probability(theta, a, b, c) |
| 77 | + error = resp - prob |
| 78 | + |
| 79 | + grad_abilities[i] += error * a * (1 - c) |
| 80 | + grad_difficulties[j] -= error * a * (1 - c) |
| 81 | + grad_discriminations[j] += error * (theta - b) * (1 - c) |
| 82 | + |
| 83 | + grad_guessings[j] += error * 1.0 |
55 | 84 | end |
56 | | - current_likelihood = likelihood |
57 | | - break if (last_likelihood - current_likelihood).abs < @tolerance |
| 85 | + end |
58 | 86 |
|
59 | | - last_likelihood = current_likelihood |
| 87 | + [grad_abilities, grad_difficulties, grad_discriminations, grad_guessings] |
| 88 | + end |
| 89 | + |
| 90 | + def apply_gradient_update(ga, gd, gdisc, gc) |
| 91 | + old_abilities = @abilities.dup |
| 92 | + old_difficulties = @difficulties.dup |
| 93 | + old_discriminations = @discriminations.dup |
| 94 | + old_guessings = @guessings.dup |
| 95 | + |
| 96 | + @abilities.each_index do |i| |
| 97 | + @abilities[i] += @learning_rate * ga[i] |
| 98 | + end |
| 99 | + |
| 100 | + @difficulties.each_index do |j| |
| 101 | + @difficulties[j] += @learning_rate * gd[j] |
| 102 | + end |
| 103 | + |
| 104 | + @discriminations.each_index do |j| |
| 105 | + @discriminations[j] += @learning_rate * gdisc[j] |
| 106 | + @discriminations[j] = 0.01 if @discriminations[j] < 0.01 |
| 107 | + @discriminations[j] = 5.0 if @discriminations[j] > 5.0 |
60 | 108 | end |
| 109 | + |
| 110 | + @guessings.each_index do |j| |
| 111 | + @guessings[j] += @learning_rate * gc[j] |
| 112 | + @guessings[j] = 0.0 if @guessings[j] < 0.0 |
| 113 | + @guessings[j] = 0.35 if @guessings[j] > 0.35 |
| 114 | + end |
| 115 | + |
| 116 | + [old_abilities, old_difficulties, old_discriminations, old_guessings] |
| 117 | + end |
| 118 | + |
| 119 | + def average_param_update(old_a, old_d, old_disc, old_c) |
| 120 | + deltas = [] |
| 121 | + @abilities.each_with_index do |x, i| |
| 122 | + deltas << (x - old_a[i]).abs |
| 123 | + end |
| 124 | + @difficulties.each_with_index do |x, j| |
| 125 | + deltas << (x - old_d[j]).abs |
| 126 | + end |
| 127 | + @discriminations.each_with_index do |x, j| |
| 128 | + deltas << (x - old_disc[j]).abs |
| 129 | + end |
| 130 | + @guessings.each_with_index do |x, j| |
| 131 | + deltas << (x - old_c[j]).abs |
| 132 | + end |
| 133 | + deltas.sum / deltas.size |
61 | 134 | end |
62 | 135 |
|
63 | | - # Fit the model to the data |
64 | 136 | def fit |
65 | | - update_parameters |
| 137 | + prev_ll = log_likelihood |
| 138 | + |
| 139 | + @max_iter.times do |
| 140 | + ga, gd, gdisc, gc = compute_gradient |
| 141 | + old_a, old_d, old_disc, old_c = apply_gradient_update(ga, gd, gdisc, gc) |
| 142 | + |
| 143 | + curr_ll = log_likelihood |
| 144 | + param_delta = average_param_update(old_a, old_d, old_disc, old_c) |
| 145 | + |
| 146 | + if curr_ll < prev_ll |
| 147 | + @abilities = old_a |
| 148 | + @difficulties = old_d |
| 149 | + @discriminations = old_disc |
| 150 | + @guessings = old_c |
| 151 | + |
| 152 | + @learning_rate *= @decay_factor |
| 153 | + else |
| 154 | + ll_diff = (curr_ll - prev_ll).abs |
| 155 | + break if ll_diff < @tolerance && param_delta < @param_tolerance |
| 156 | + |
| 157 | + prev_ll = curr_ll |
| 158 | + end |
| 159 | + end |
| 160 | + |
66 | 161 | { |
67 | 162 | abilities: @abilities, |
68 | 163 | difficulties: @difficulties, |
|
0 commit comments