##########INSTRUCTION############################### #You only need to specify your data folder and csv dataset name in the "INPUTS" section below #The output dataset will be produced in the folder you specify #Comment: This program calculates scale scores for each survey taker. The calculated scale scores are just # additional analysis variables attached to your input dataset. If your survey has a complex sample # design (e.g., you took a sample from your target population with stratification, clustering, etc.), # you should apply your design specifications (strata, cluster, weights, etc.) as you would do when # analyzing other survey variables. #################################################### ##########INPUTS############################### #Specify your data folder and csv dataset name folder <- "FOLDER PATH" dataname <- "student data.csv" ##########SCALING PROGROM###################### #Option for decimal places options(digits=10) #Rounding function that rounds .5 up to the next integer round2 = function(x, n) { posneg = sign(x) z = abs(x)*10^n z = z + 0.5 z = trunc(z) z = z/10^n z*posneg } #Scale score calculation function rasch <- function(scalelist=NULL){ #Stepvalues that are estimated from the national benchmarking. These are the values in Table A4 of the national benchmarking documentation #(https://safesupportivelearning.ed.gov/sites/default/files/EDSCLS_Psychometric_Benchmarking_Technical_Report.pdf). #These three sets of values are used to calculate the scale scores for any survey takers. c0 <- c("SENGCLC1","SENGCLC2","SENGCLC3","SENGCLC4","SENGCLC7","SENGREL9","SENGREL11","SENGREL12","SENGREL14","SENGREL153","SENGREL17","SENGREL20","SENGREL21","SENGREL29", "SENGPAR44","SENGPAR45","SENGPAR46","SENGPAR47","SENGPAR48","SSAFEMO49","SSAFEMO52","SSAFEMO53","SSAFEMO54","SSAFEMO56","SSAFEMO57", "SSAFPSAF60","SSAFPSAF63","SSAFPSAF65","SSAFPSAF67","SSAFPSAF68","SSAFPSAF69","SSAFPSAF71","SSAFBUL74","SSAFBUL75","SSAFBUL76","SSAFBUL77B","SSAFBUL73","SSAFBUL83", "SSAFSUB88","SSAFSUB91","SSAFSUB92","SSAFSUB93","SSAFSUB94","SENVPENV100","SENVPENV102","SENVPENV105","SENVPENV106","SENVPENV107", "SENVINS111","SENVINS113","SENVINS114","SENVINS115","SENVINS121","SENVMEN130","SENVMEN132","SENVMEN133","SENVMEN134","SENVMEN137", "SENVDIS142","SENVDIS143","SENVDIS146","SENVDIS147","SENVDIS147C") c1 <- c(-0.88758,-1.21688,-1.70151,-1.13062,-1.59396,-0.9249,-1.6756,-1.37546,-1.19418,-1.35606,-1.32961,-0.77372,-1.21581,-1.45891,-1.02078,-1.15759,-0.83518,-1.57761,-1.54687,-1.19732,-0.79951,-1.15283,-0.652,-1.1754,-1.21302,-1.40832,-1.53905,-1.71611,-0.94029,-0.58,-1.03096,-0.72259,-0.98061,-1.18313,-0.94225,-1.00375,-0.83535,-0.47988,-1.2565,-1.28382,-1.4722,-1.09243,-1.04787,-0.22565,-0.83049,-1.56286,-1.58736,-1.27968,-1.47932,-1.53159,-1.49103,-1.57155,-2.2499,-1.45449,-1.49858,-1.42755,-0.20715,-0.59274,-1.81967,-1.40034,-1.56048,-1.03239,-1.03612) c2 <- c(-0.21689,-0.27319,-1.0471,-0.11206,-1.06613,0.07381,-0.63246,-0.30377,-1.18031,-1.27296,-0.53273,0.42434,-0.11198,-1.67465,-0.38939,-0.3564,0.19951,-1.54482,-1.21379,-0.08038,0.6924,0.25698,-0.98285,-0.86814,-1.09519,-1.56709,-1.81347,-0.96873,0.35623,0.38108,0.19308,0.15162,-0.46344,-0.71079,-0.13648,-0.29701,0.09477,0.53213,-0.55163,-0.41416,-0.71855,-0.02012,0.09659,0.46013,0.48755,-0.9234,-0.77854,0.15582,-0.62922,-0.99408,-0.29674,-1.21026,-2.46508,-1.00195,-1.06141,-0.88773,1.33389,0.61958,-1.84193,-0.55586,-0.52414,-0.47264,-0.48429) c3 <- c(1.51974,1.64389,1.90232,1.70962,1.54944,2.41102,2.09221,2.13021,1.69008,1.58491,2.16515,2.91955,2.90247,1.27887,1.40529,1.2542,2.39614,0.97705,1.67163,3.05184,2.64666,2.86166,1.31697,1.59058,1.57176,1.39777,0.00153,0.16629,1.60171,1.49101,1.89466,2.2059,1.23622,1.13569,1.18006,1.47382,2.16486,2.06854,0.49417,0.54856,0.34286,0.61828,0.57692,2.66983,2.48407,2.10358,2.00146,2.34459,1.70855,1.92133,2.11189,1.09009,0.32927,1.46992,1.52817,1.52252,2.7488,2.81182,1.31956,1.72246,1.92085,1.27339,1.82711) Rstepvalues <- as.data.frame(rbind(c1,c2,c3)) names(Rstepvalues) <- c0 #Items that are negatively valenced. They are reverse-coded below. negative <- c("SSAFPSAF63", "SSAFPSAF65", "SSAFPSAF67", "SSAFPSAF68", "SSAFPSAF69", "SSAFPSAF71", "SSAFBUL74", "SSAFBUL75", "SSAFBUL76", "SSAFBUL77B", "SSAFBUL73", "SSAFBUL83","SSAFSUB88","SSAFSUB91","SSAFSUB92","SSAFSUB93","SSAFSUB94") #Loop through the scale list for (scalename in scalelist) { #Stepvalues for the items of the current scale stepvalues <- Rstepvalues[eval(as.name(scalename))] items <- names(stepvalues) #Only use the variables that belong to scales tempdata <- indata[c0] #Code negative values to missing tempdata[tempdata < 0] <- NA #Reverse code negatively valenced items tempdata[negative] <- 5-tempdata[negative] #Use only the variables of the current scale tempdata <- tempdata[items] #Recode values from the range of 1-4 to 0-3 tempdata <- tempdata[]-1 #The algorithm below is based on a Rasch partial credit model. #See section 3.1 of the national benchmarking documentation referenced above for more details. #Further reading: http://www.winsteps.com/a/Linacre-estimation-further-topics.pdf tdata <- tempdata tdata$counts = apply(tdata,1,function(x) sum(!is.na(x[]))) titems <- names(stepvalues) tstepvalues <- stepvalues tv1 <- paste("tv1", titems, sep="") tv2 <- paste("tv2", titems, sep="") tv3 <- paste("tv3", titems, sep="") tdata[tv1] <- stepvalues[1,] tdata[tv2] <- stepvalues[2,] tdata[tv3] <- stepvalues[3,] nm <- paste("nm", titems, sep="") tdata[nm] <- tdata[titems]*0+1 tdata$anm <- rowSums(tdata[nm],na.rm=TRUE) tdata[tv1] <- tdata[tv1]*tdata[nm] tdata[tv2] <- tdata[tv2]*tdata[nm] tdata[tv3] <- tdata[tv3]*tdata[nm] tdata$rl <- tdata$anm*0 tdata$ru <- tdata$anm*3 tdata$ru[tdata$ru==0] <- 1 tdata$r <- rowSums(tdata[items],na.rm=TRUE) tdata$r[tdata$r==tdata$rl] <- tdata$rl[tdata$r==tdata$rl]+0.3 tdata$r[tdata$r==tdata$ru] <- tdata$ru[tdata$r==tdata$ru]-0.3 d <- mean(colMeans(tstepvalues[],na.rm=TRUE)) tdata$t0 <- d + log((tdata$r-tdata$rl)/(tdata$ru-tdata$r)) tdata$t <- 0 tdata$t1 <- 1 iter <- 1 while(max(abs(tdata$t1-tdata$t),na.rm=TRUE)>0.0000001 & iter <=100) { if (iter==1) {tdata$t <- tdata$t0 } else if (iter>1) {tdata$t <- tdata$t1} iter <- iter+1 for (n in 1:ncol(tstepvalues)) { jp <- paste("jp", titems[n], sep="") j2p <- paste("j2p", titems[n], sep="") dj <- paste("dj", titems[n], sep="") tdata$p1 <- exp(1*tdata$t-tdata[,tv1[n]])/ (1+exp(1*tdata$t-tdata[,tv1[n]])+exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])+ exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])) tdata$p2 <- exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])/ (1+exp(1*tdata$t-tdata[,tv1[n]])+exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])+ exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])) tdata$p3 <- exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])/ (1+exp(1*tdata$t-tdata[,tv1[n]])+exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])+ exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])) tdata$p1[is.na(tdata[,tv1[n]])] <- NA tdata$p2[is.na(tdata[,tv1[n]])] <- NA tdata$p3[is.na(tdata[,tv1[n]])] <- NA tdata[jp] <- 1*tdata$p1+2*tdata$p2+3*tdata$p3 tdata[j2p] <- 1*1*tdata$p1+2*2*tdata$p2+3*3*tdata$p3 tdata[dj] <- tdata[j2p]-tdata[jp]*tdata[jp] } tdata$e <- rowSums(tdata[, grep("jp", names(tdata))],na.rm = TRUE) tdata$v <- rowSums(tdata[, grep("dj", names(tdata))],na.rm = TRUE) tdata$t1 <- tdata$t+(tdata$r-tdata$e)/pmax(2*tdata$v,1,na.rm = TRUE) } #Set to missing if the algorithm did not converge tdata$t1[tdata$t1>30] <- NA #Only calculate score scores for those valid responses to at least three items and more than half of the items tdata$t1[tdata$counts<3 | tdata$counts<0.5*ncol(tempdata)] <- NA ##########SCALING FACTORS###################### #These factors are used to recale the scale scores so that the cut scores are 300 and 400 for all scales. #This is a linear transformation so it does not affect the relative distance between two scores: The distance is inflated by a constant factor B. #See section 3.3 the national benchmarking documentation referenced above for more details. if (scalename == 'eng' ) { A <- 325.859 B <- 40.557 } if (scalename == 'clc' ) { A <- 325.697 B <- 44.614 } if (scalename == 'rel' ) { A <- 322.89 B <- 36.517 } if (scalename == 'par' ) { A <- 332.549 B <- 44.683 } if (scalename == 'saf' ) { A <- 320.161 B <- 60.027 } if (scalename == 'emo' ) { A <- 315.079 B <- 39.663 } if (scalename == 'psaf' ) { A <- 328.929 B <- 59.402 } if (scalename == 'bul' ) { A <- 310.828 B <- 58.872 } if (scalename == 'sub' ) { A <- 338.85 B <- 119.933 } if (scalename == 'env' ) { A <- 326.967 B <- 40.341 } if (scalename == 'penv' ) { A <- 307.241 B <- 40.106 } if (scalename == 'ins' ) { A <- 344.491 B <- 38.992 } if (scalename == 'men' ) { A <- 318.995 B <- 41.928 } if (scalename == 'dis' ) { A <- 333.135 B <- 41.56 } #Rescale the scores based on the factors above tdata$tscore <- round2(tdata$t1*B+A,0) #Keep only the calculated scale scores score_data <- tdata[c("tscore")] #Rename the data using the scale name and save it to the workspace colnames(score_data) <- paste(scalename, colnames(score_data), sep = "_") assign(scalename,score_data,envir = .GlobalEnv) } } ##########SPECIFICATION AND OUTPUT###################### #Read in data indata <- read.table(file=sprintf("%s\\%s", folder, dataname),header=TRUE,sep=",") names(indata) <- toupper(names(indata)) #Item names for each scale eng <- c("SENGCLC1","SENGCLC2","SENGCLC3","SENGCLC4","SENGCLC7","SENGREL9","SENGREL11","SENGREL12","SENGREL14","SENGREL153","SENGREL17","SENGREL20","SENGREL21","SENGREL29", "SENGPAR44","SENGPAR45","SENGPAR46","SENGPAR47","SENGPAR48") clc <- c("SENGCLC1", "SENGCLC2", "SENGCLC3", "SENGCLC4", "SENGCLC7") rel <- c("SENGREL9","SENGREL11","SENGREL12","SENGREL14","SENGREL153","SENGREL17","SENGREL20","SENGREL21","SENGREL29") par <- c("SENGPAR44","SENGPAR45","SENGPAR46","SENGPAR47","SENGPAR48") saf <- c("SSAFEMO49","SSAFEMO52","SSAFEMO53","SSAFEMO54","SSAFEMO56","SSAFEMO57","SSAFPSAF60","SSAFPSAF63","SSAFPSAF65","SSAFPSAF67","SSAFPSAF68","SSAFPSAF69","SSAFPSAF71", "SSAFBUL74","SSAFBUL75","SSAFBUL76","SSAFBUL77B","SSAFBUL73","SSAFBUL83","SSAFSUB88","SSAFSUB91","SSAFSUB92","SSAFSUB93","SSAFSUB94") emo <- c("SSAFEMO49", "SSAFEMO52", "SSAFEMO53", "SSAFEMO54", "SSAFEMO56", "SSAFEMO57") psaf <- c("SSAFPSAF60", "SSAFPSAF63", "SSAFPSAF65", "SSAFPSAF67", "SSAFPSAF68", "SSAFPSAF69", "SSAFPSAF71") bul <- c("SSAFBUL74","SSAFBUL75","SSAFBUL76","SSAFBUL77B","SSAFBUL73","SSAFBUL83") sub <- c("SSAFSUB88","SSAFSUB91","SSAFSUB92","SSAFSUB93","SSAFSUB94") env <- c("SENVPENV100","SENVPENV102","SENVPENV105","SENVPENV106","SENVPENV107","SENVINS111","SENVINS113","SENVINS114","SENVINS115","SENVINS121","SENVMEN130","SENVMEN132", "SENVMEN133","SENVMEN134","SENVMEN137","SENVDIS142","SENVDIS143","SENVDIS146","SENVDIS147","SENVDIS147C") penv <- c("SENVPENV100","SENVPENV102","SENVPENV105","SENVPENV106","SENVPENV107") ins <- c("SENVINS111","SENVINS113","SENVINS114","SENVINS115","SENVINS121") men <- c("SENVMEN130", "SENVMEN132", "SENVMEN133", "SENVMEN134", "SENVMEN137") dis <- c("SENVDIS142","SENVDIS143","SENVDIS146","SENVDIS147","SENVDIS147C") #Run scaling program for each scale rasch(scalelist=c("eng","clc","rel","par","saf","emo","psaf","bul","sub","env","penv","ins","men","dis")) #Attach the scale score data to the input data out <- cbind(indata,eng,clc,rel,par,saf,emo,psaf,bul,sub,env,penv,ins,men,dis) #Exclude cases if they did not provide any demographic information for (v in c("eng","clc","rel","par","saf","emo","psaf","bul","sub","env","penv","ins","phea","men","dis")) { out[which((is.na(out$SDEMO148) | out$SDEMO148 == "" | out$SDEMO148 == -1) & (is.na(out$SDEMO149) | out$SDEMO149 == "" | out$SDEMO149 == -1) & (is.na(out$SDEMO150) | out$SDEMO150 == "" | out$SDEMO150 == -1) & (is.na(out$SDEMO151) | out$SDEMO151 == "" | out$SDEMO151 == -1)),paste(v,"_tscore",sep="")] <- NA } ##########OUTPUT DATASET WITH SCALE SCORES############### write.table(out, sprintf("%s\\scale_score_%s", folder, dataname), col.names=T,row.names=F,sep=",") ##########END OF PROGRAM#################################