-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathex2_2PL.r
More file actions
107 lines (89 loc) · 4.46 KB
/
ex2_2PL.r
File metadata and controls
107 lines (89 loc) · 4.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#######################################################################
# example file 2 - 2PL (unidimensional) IRT
#
# This example file loads a dichotomous response matrix
# and runs the collaborative filter for one model, e.g. 2PL
#
# One regularization parameter is specified lambda
# (provides a tunable control on parameter spreading)
#
# information-based errors are computed and, for 2PL
# parameters are rescaled under the assumption of unit normal
# distribution of abilities
#
# provided response matrix must either have NAs or -9 for
# omitted responses, or edit this in the read-in
#
# other parameters
# models: specify list of models (see example)
# mask_fraction: how much of response matrix to hold out from training
# numTries: how many times to random subsample with each model
#
# Yoav Bergner, 2012
#######################################################################
source("cfirt.r")
### filenames
datafile = 'cfirtdata.txt'
ICCfile = 'cfirtICC'
### model spec, see documentation for details
### this example is unidimensional 2PL
model = "2PL"
dimX <- c(2,0)
dimTheta <- c(1,1)
lambda = 1 ## regularization hyperparameter. Higher dimensional models usually need larger lambda.
#######################################################################
### read-in
#######################################################################
U <- as.matrix(read.table(datafile, row.names=1,header=TRUE)) ## use this if header row can be parsed by R
U[is.na(U)] <- -9 ## replace NAs by numeric value (-9) so that when multiplied by 0 get 0 not NA
numStudents = nrow(U)
numItems = ncol(U)
R <- (U != -9) + 0 # R matrix hold 1 for scored responses 0 if omitted
#######################################################################
### BEGIN parameter estimation
#######################################################################
### initialize guess using random normal distributed values for all parameters
X.start = rnorm(numItems*dimX[1])
Theta.start = rnorm(numStudents*dimTheta[1])
initial_parameters = c(X.start,Theta.start)
### this is where the regularize cost function (-loglikelihood) is minimized
fn <- function(param.vec) cfirtCost(param.vec, U, R, dimX, dimTheta, lambda)
gr <- function(param.vec) cfirtGrad(param.vec, U, R, dimX, dimTheta, lambda)
### L-BFGS-B is much faster, when it doesn't throw errors, BFGS takes 5-10 times as long but usually works, so
res <- tryCatch(optim(initial_parameters, fn, gr, method="L-BFGS-B", control=list(trace=1,maxit=500)),
error=function(e){
cat("** L-BFGS-B method failed. Trying BFGS...slow... \n")
optim(initial_parameters, fn, gr, method="BFGS", control=list(trace=1,maxit=500))})
if (res$convergence) cat("There were convergence issues. \n")
### just reshaping the vector back into parameter matrices.
param.matrices <- reshape.params(res$par, numStudents, numItems, dimX, dimTheta)
X <- param.matrices$X
Theta <- param.matrices$Theta
### errors are estimated using Fisher information
ddlogL <- cfirtDD(res$par, U, R, dimX, dimTheta) # for errors
error.est <- reshape.params(1/sqrt(ddlogL), numStudents, numItems, dimX, dimTheta)
X_se = error.est$X
Theta_se = error.est$Theta
### for unidimensional 2PL, rescaling (make skills unit normal) and converting to diff/discrim style
if (model == "2PL") {
## without errors
rescaled <- RescaleSI(Theta[,2],X[,1],X[,2],noerrs=TRUE)
resX <- cbind(rescaled$X1, rescaled$X2)
resTheta <- rescaled$Theta
ddstyle <- SI2DD(resX[,1],resX[,2],noerrs=TRUE)
itemparams <- data.frame(diffs = ddstyle$diffs, discrims = ddstyle$discrims)
# ## with errors, make each parameter a two-column matrix
# rescaled <- RescaleSI(cbind(Theta[,2],Theta_se),cbind(X[,1],X_se[,1]),cbind(X[,2],X_se[,2]))
# resX.w.err <- cbind(rescaled$X1, rescaled$X2)
# resTheta.w.err <- rescaled$Theta
#
# ddstyle <- SI2DD(cbind(resX.w.err[,1],resX.w.err[,2]),cbind(resX.w.err[,3],resX.w.err[,4]))
# itemparams.w.err <- data.frame(diffs = ddstyle$diffs[,1], diffs.se = ddstyle$diffs[,2], discrims = ddstyle$discrims[,1], discrims.se = ddstyle$discrims[,2])
## generate ICCs if so desired, see makeICCs.r for details
U[U == -9] <- NA # put NAs back in
ICCitems <- 1:ncol(U) # defaults to plotting ICCs for all items, otherwise change this
## binning choice: I like to base the binning on dividing the [0,1] interval and using the logit transformation
x = seq(0.04,0.96,by=0.04)
bins = log(x/(1-x))
chiSq <- makeICCs(U[,ICCitems], resTheta, ddstyle$diffs[ICCitems], ddstyle$discrims[ICCitems], prefix=ICCfile, bins=bins)
}