##########INSTRUCTION###############################
#You only need to specify your data folder and csv dataset name in the "INPUTS" section below
#The output dataset will be produced in the folder you specify
#Comment: This program calculates scale scores for each survey taker. The calculated scale scores are just
# additional analysis variables attached to your input dataset. If your survey has a complex sample
# design (e.g., you took a sample from your target population with stratification, clustering, etc.),
# you should apply your design specifications (strata, cluster, weights, etc.) as you would do when
# analyzing other survey variables.
####################################################
##########INPUTS###############################
#Specify your data folder and csv dataset name
folder <- "FOLDER PATH"
dataname <- "instructional staff data.csv"
##########SCALING PROGROM######################
#Option for decimal places
options(digits=10)
#Rounding function that rounds .5 up to the next integer
round2 = function(x, n) {
posneg = sign(x)
z = abs(x)*10^n
z = z + 0.5
z = trunc(z)
z = z/10^n
z*posneg
}
#Scale score calculation function
rasch <- function(scalelist=NULL){
#Stepvalues that are estimated from the national benchmarking. These are the values in Table A5 of the national benchmarking documentation
#(https://safesupportivelearning.ed.gov/sites/default/files/EDSCLS_Psychometric_Benchmarking_Technical_Report.pdf).
#These three sets of values are used to calculate the scale scores for any survey takers.
c0 <- c("IENGCLC2","IENGCLC3","IENGCLC4","IENGCLC6","IENGCLC7","IENGCLC8","IENGREL9","IENGREL10","IENGREL12","IENGREL14","IENGREL15",
"IENGPAR29","IENGPAR31","IENGPAR32","IENGPAR36","IENGPAR42","IENGPAR48","ISAFEMO52","ISAFEMO53","ISAFEMO54","ISAFEMO55",
"ISAFEMO56","ISAFEMO58","ISAFPSAF60","ISAFPSAF61","ISAFPSAF62","ISAFPSAF64","ISAFPSAF66","ISAFPSAF67","ISAFBUL68",
"ISAFBUL69","ISAFBUL71","ISAFBUL73","ISAFBUL79","ISAFBUL80","ISAFBUL81","ISAFBUL82","ISAFSUB86","ISAFSUB87","ISAFSUB88","ISAFSUB91",
"IENVPENV97","IENVPENV98","IENVPENV100","IENVPENV101","IENVPENV102","IENVPENV103","IENVINS105","IENVINS107","IENVINS108",
"IENVINS110","IENVINS115","IENVINS116","IENVPHEA119","IENVPHEA120","IENVPHEA121","IENVPHEA122","IENVMEN123","IENVMEN125",
"IENVMEN126","IENVMEN128","IENVMEN137","IENVDIS129","IENVDIS130","IENVDIS134","IENVDIS134C","IENVDIS135","IENVDIS136")
c1 <- c(-2.23882,-2.1565,-2.61022,-2.72424,-1.78468,-2.18346,-3.02788,-3.22684,-4.12339,-2.32472,-2.97012,-1.64759,-1.47893,-1.83183,-1.11894,-2.28667,-2.70965,-1.42847,-0.92065,-0.92441,-1.62728,-1.32451,-2.51414,-1.06057,-1.90306,-1.86559,-3.0375,-2.14065,-0.55663,-0.98262,-0.77695,-2.52361,-2.61231,-2.28638,-2.09304,-2.17942,-2.01788,-2.00464,-2.09068,-2.06953,-2.41182,-2.00263,-2.09549,-1.75794,-1.55537,-1.61969,-1.31004,-1.14778,-1.54741,-2.46068,-2.87387,-3.11921,-2.09029,-2.57512,-2.25692,-3.02897,-2.43883,-2.07875,-2.19165,-2.21058,-2.17242,-1.99021,-2.30585,-2.91403,-1.13436,-1.39281,-1.23245,-2.30109)
c2 <- c(-0.79286,-1.23134,-0.48501,-1.52987,0.14887,-0.80117,-1.31162,-0.8068,-0.3855,0.08648,-1.66979,-0.18685,0.23174,0.95631,0.45601,0.09988,-1.61862,-1.18202,-0.08686,-0.10689,-0.51158,-1.37726,-1.22259,0.00142,-0.25214,-0.38786,-1.91849,-1.97713,0.62288,0.84634,1.43081,-0.48166,-0.79026,-1.89,-2.12925,-2.10602,-1.89899,0.08366,0.09402,-0.35578,0.22724,-1.31231,-1.38865,-0.64084,-1.02356,-0.74721,-0.46397,0.49282,0.38599,-1.08837,-1.38221,-2.31483,-1.01021,-0.17834,-0.28418,-0.80828,-0.39363,-1.12673,-0.13682,-0.19846,0.5032,-0.29119,-1.0817,-2.43138,0.16961,-0.02434,0.14534,-1.50784)
c3 <- c(1.63635,1.6155,3.37974,1.8249,2.95479,3.1319,2.68007,2.92047,3.8331,3.0916,2.16597,3.1684,3.2615,4.79256,3.49956,3.78357,1.54464,1.84011,2.70973,2.13721,1.98949,2.08055,2.14039,3.10718,2.78974,2.89179,1.35532,0.98397,2.86862,4.3873,4.28451,3.55645,2.1421,1.04757,1.02448,0.94933,0.96563,3.3934,3.48374,3.19388,3.39829,1.49996,1.69196,2.34516,2.3859,2.53191,2.63674,4.44998,4.13798,3.33738,2.09191,1.70813,2.27599,3.11342,3.19722,3.13805,3.23224,2.2716,3.24814,3.25992,3.93348,3.36141,2.16889,1.70719,2.46815,2.76214,2.74392,2.24373)
Rstepvalues <- as.data.frame(rbind(c1,c2,c3))
names(Rstepvalues) <- c0
#Items that are negatively valenced. They are reverse-coded below.
negative <- c("ISAFPSAF60","ISAFPSAF61","ISAFPSAF62","ISAFPSAF64","ISAFPSAF66","ISAFPSAF67","ISAFBUL68","ISAFBUL69","ISAFBUL79",
"ISAFBUL80","ISAFBUL81","ISAFBUL82","IENVPENV100","IENVPENV101","IENVPENV102","IENVPENV103")
#Loop through the scale list
for (scalename in scalelist) {
#Stepvalues for the items of the current scale
stepvalues <- Rstepvalues[eval(as.name(scalename))]
items <- names(stepvalues)
#Only use the variables that belong to scales
tempdata <- indata[c0]
#Code negative values to missing
tempdata[tempdata < 0] <- NA
#Reverse code negatively valenced items
tempdata[negative] <- 5-tempdata[negative]
#Use only the variables of the current scale
tempdata <- tempdata[items]
#Recode values from the range of 1-4 to 0-3
tempdata <- tempdata[]-1
#The algorithm below is based on a Rasch partial credit model.
#See section 3.1 of the national benchmarking documentation referenced above for more details.
#Further reading: http://www.winsteps.com/a/Linacre-estimation-further-topics.pdf
tdata <- tempdata
tdata$counts = apply(tdata,1,function(x) sum(!is.na(x[])))
titems <- names(stepvalues)
tstepvalues <- stepvalues
tv1 <- paste("tv1", titems, sep="")
tv2 <- paste("tv2", titems, sep="")
tv3 <- paste("tv3", titems, sep="")
tdata[tv1] <- stepvalues[1,]
tdata[tv2] <- stepvalues[2,]
tdata[tv3] <- stepvalues[3,]
nm <- paste("nm", titems, sep="")
tdata[nm] <- tdata[titems]*0+1
tdata$anm <- rowSums(tdata[nm],na.rm=TRUE)
tdata[tv1] <- tdata[tv1]*tdata[nm]
tdata[tv2] <- tdata[tv2]*tdata[nm]
tdata[tv3] <- tdata[tv3]*tdata[nm]
tdata$rl <- tdata$anm*0
tdata$ru <- tdata$anm*3
tdata$ru[tdata$ru==0] <- 1
tdata$r <- rowSums(tdata[items],na.rm=TRUE)
tdata$r[tdata$r==tdata$rl] <- tdata$rl[tdata$r==tdata$rl]+0.3
tdata$r[tdata$r==tdata$ru] <- tdata$ru[tdata$r==tdata$ru]-0.3
d <- mean(colMeans(tstepvalues[],na.rm=TRUE))
tdata$t0 <- d + log((tdata$r-tdata$rl)/(tdata$ru-tdata$r))
tdata$t <- 0
tdata$t1 <- 1
iter <- 1
while(max(abs(tdata$t1-tdata$t),na.rm=TRUE)>0.0000001 & iter <=100) {
if (iter==1) {tdata$t <- tdata$t0
} else if (iter>1) {tdata$t <- tdata$t1}
iter <- iter+1
for (n in 1:ncol(tstepvalues)) {
jp <- paste("jp", titems[n], sep="")
j2p <- paste("j2p", titems[n], sep="")
dj <- paste("dj", titems[n], sep="")
tdata$p1 <- exp(1*tdata$t-tdata[,tv1[n]])/
(1+exp(1*tdata$t-tdata[,tv1[n]])+exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])+
exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]]))
tdata$p2 <- exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])/
(1+exp(1*tdata$t-tdata[,tv1[n]])+exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])+
exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]]))
tdata$p3 <- exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])/
(1+exp(1*tdata$t-tdata[,tv1[n]])+exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])+
exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]]))
tdata$p1[is.na(tdata[,tv1[n]])] <- NA
tdata$p2[is.na(tdata[,tv1[n]])] <- NA
tdata$p3[is.na(tdata[,tv1[n]])] <- NA
tdata[jp] <- 1*tdata$p1+2*tdata$p2+3*tdata$p3
tdata[j2p] <- 1*1*tdata$p1+2*2*tdata$p2+3*3*tdata$p3
tdata[dj] <- tdata[j2p]-tdata[jp]*tdata[jp]
}
tdata$e <- rowSums(tdata[, grep("jp", names(tdata))],na.rm = TRUE)
tdata$v <- rowSums(tdata[, grep("dj", names(tdata))],na.rm = TRUE)
tdata$t1 <- tdata$t+(tdata$r-tdata$e)/pmax(2*tdata$v,1,na.rm = TRUE)
}
#Set to missing if the algorithm did not converge
tdata$t1[tdata$t1>30] <- NA
#Only calculate score scores for those valid responses to at least three items and more than half of the items
tdata$t1[tdata$counts<3 | tdata$counts<0.5*ncol(tempdata)] <- NA
##########SCALING FACTORS######################
#These factors are used to recale the scale scores so that the cut scores are 300 and 400 for all scales.
#This is a linear transformation so it does not affect the relative distance between two scores: The distance is inflated by a constant factor B.
#See section 3.3 the national benchmarking documentation referenced above for more details.
if (scalename == 'eng' ) {
A <- 316.291
B <- 29.053
}
if (scalename == 'clc' ) {
A <- 325.048
B <- 31.112
}
if (scalename == 'rel' ) {
A <- 322.095
B <- 26.592
}
if (scalename == 'par' ) {
A <- 298.823
B <- 30.178
}
if (scalename == 'saf' ) {
A <- 323.415
B <- 31.995
}
if (scalename == 'emo' ) {
A <- 327.561
B <- 33.799
}
if (scalename == 'psaf' ) {
A <- 324.906
B <- 32.092
}
if (scalename == 'bul' ) {
A <- 338.35
B <- 30.829
}
if (scalename == 'sub' ) {
A <- 299.599
B <- 29.819
}
if (scalename == 'env' ) {
A <- 320.62
B <- 29.264
}
if (scalename == 'penv' ) {
A <- 330.34
B <- 31.939
}
if (scalename == 'ins' ) {
A <- 324.968
B <- 25.403
}
if (scalename == 'phea') {
A <- 311.666
B <- 27.864
}
if (scalename == 'men' ) {
A <- 307.097
B <- 28.889
}
if (scalename == 'dis' ) {
A <- 328.255
B <- 30.631
}
#Rescale the scores based on the factors above
tdata$tscore <- round2(tdata$t1*B+A,0)
#Keep only the calculated scale scores
score_data <- tdata[c("tscore")]
#Rename the data using the scale name and save it to the workspace
colnames(score_data) <- paste(scalename, colnames(score_data), sep = "_")
assign(scalename,score_data,envir = .GlobalEnv)
}
}
##########SPECIFICATION AND OUTPUT######################
#Read in data
indata <- read.table(file=sprintf("%s\\%s", folder, dataname),header=TRUE,sep=",")
names(indata) <- toupper(names(indata))
#Item names for each scale
eng <- c("IENGCLC2","IENGCLC3","IENGCLC4","IENGCLC6","IENGCLC7","IENGCLC8","IENGREL9","IENGREL10","IENGREL12","IENGREL14","IENGREL15",
"IENGPAR29","IENGPAR31","IENGPAR32","IENGPAR36","IENGPAR42","IENGPAR48")
clc <- c("IENGCLC2","IENGCLC3","IENGCLC4","IENGCLC6","IENGCLC7","IENGCLC8")
rel <- c("IENGREL9","IENGREL10","IENGREL12","IENGREL14","IENGREL15")
par <- c("IENGPAR29","IENGPAR31","IENGPAR32","IENGPAR36","IENGPAR42","IENGPAR48")
saf <- c("ISAFEMO52","ISAFEMO53","ISAFEMO54","ISAFEMO55",
"ISAFEMO56","ISAFEMO58","ISAFPSAF60","ISAFPSAF61","ISAFPSAF62","ISAFPSAF64","ISAFPSAF66","ISAFPSAF67","ISAFBUL68",
"ISAFBUL69","ISAFBUL71","ISAFBUL73","ISAFBUL79","ISAFBUL80","ISAFBUL81","ISAFBUL82","ISAFSUB86","ISAFSUB87","ISAFSUB88","ISAFSUB91")
emo <- c("ISAFEMO52","ISAFEMO53","ISAFEMO54","ISAFEMO55","ISAFEMO56","ISAFEMO58")
psaf <- c("ISAFPSAF60","ISAFPSAF61","ISAFPSAF62","ISAFPSAF64","ISAFPSAF66","ISAFPSAF67")
bul <- c("ISAFBUL68","ISAFBUL69","ISAFBUL71","ISAFBUL73","ISAFBUL79","ISAFBUL80","ISAFBUL81","ISAFBUL82")
sub <- c("ISAFSUB86","ISAFSUB87","ISAFSUB88","ISAFSUB91")
env <- c("IENVPENV97","IENVPENV98","IENVPENV100","IENVPENV101","IENVPENV102","IENVPENV103","IENVINS105","IENVINS107","IENVINS108",
"IENVINS110","IENVINS115","IENVINS116","IENVPHEA119","IENVPHEA120","IENVPHEA121","IENVPHEA122","IENVMEN123","IENVMEN125",
"IENVMEN126","IENVMEN128","IENVMEN137","IENVDIS129","IENVDIS130","IENVDIS134","IENVDIS134C","IENVDIS135","IENVDIS136")
penv <- c("IENVPENV97","IENVPENV98","IENVPENV100","IENVPENV101","IENVPENV102","IENVPENV103")
ins <- c("IENVINS105","IENVINS107","IENVINS108","IENVINS110","IENVINS115","IENVINS116")
phea <- c("IENVPHEA119","IENVPHEA120","IENVPHEA121","IENVPHEA122")
men <- c("IENVMEN123","IENVMEN125","IENVMEN126","IENVMEN128","IENVMEN137")
dis <- c("IENVDIS129","IENVDIS130","IENVDIS134","IENVDIS134C","IENVDIS135","IENVDIS136")
#Run scaling program for each scale
rasch(scalelist=c("eng","clc","rel","par","saf","emo","psaf","bul","sub","env","penv","ins","phea","men","dis"))
#Attach the scale score data to the input data
out <- cbind(indata,eng,clc,rel,par,saf,emo,psaf,bul,sub,env,penv,ins,phea,men,dis)
#Exclude cases if they did not provide any demographic information
for (v in c("eng","clc","rel","par","saf","emo","psaf","bul","sub","env","penv","ins","phea","men","dis")) {
out[which((is.na(out$IDEMO138) | out$IDEMO138 == "" | out$IDEMO138 == -1) & (is.na(out$IDEMO139) | out$IDEMO139 == "" | out$IDEMO139 == -1) & (is.na(out$IDEMO140) | out$IDEMO140 == "" | out$IDEMO140 == -1)),paste(v,"_tscore",sep="")] <- NA
}
##########OUTPUT DATASET WITH SCALE SCORES###############
write.table(out, sprintf("%s\\scale_score_%s", folder, dataname), col.names=T,row.names=F,sep=",")
##########END OF PROGRAM#################################