##########INSTRUCTION############################### #You only need to specify your data folder and csv dataset name in the "INPUTS" section below #The output dataset will be produced in the folder you specify #Comment: This program calculates scale scores for each survey taker. The calculated scale scores are just # additional analysis variables attached to your input dataset. If your survey has a complex sample # design (e.g., you took a sample from your target population with stratification, clustering, etc.), # you should apply your design specifications (strata, cluster, weights, etc.) as you would do when # analyzing other survey variables. #################################################### ##########INPUTS############################### #Specify your data folder and csv dataset name folder <- "FOLDER PATH" dataname <- "noninstructional staff data.csv" ##########SCALING PROGROM###################### #Option for decimal places options(digits=10) #Rounding function that rounds .5 up to the next integer round2 = function(x, n) { posneg = sign(x) z = abs(x)*10^n z = z + 0.5 z = trunc(z) z = z/10^n z*posneg } #Scale score calculation function rasch <- function(scalelist=NULL){ #Stepvalues that are estimated from the national benchmarking. These are the values in Table A6 of the national benchmarking documentation #(https://safesupportivelearning.ed.gov/sites/default/files/EDSCLS_Psychometric_Benchmarking_Technical_Report.pdf). #These three sets of values are used to calculate the scale scores for any survey takers. c0 <- c("NENGCLC2","NENGCLC3","NENGCLC4","NENGCLC6","NENGCLC7","NENGCLC8","NENGREL16","NENGREL17","NENGREL18","NENGREL24","NENGREL25","NENGREL30", "NENGPAR34","NENGPAR37","NENGPAR38","NENGPAR44","NENGPAR47","NSAFEMO51","NSAFEMO52","NSAFEMO53","NSAFEMO54","NSAFEMO55","NSAFEMO148", "NSAFPSAF57","NSAFPSAF58","NSAFPSAF59","NSAFPSAF61","NSAFPSAF63","NSAFPSAF64","NSAFBUL65","NSAFBUL66","NSAFBUL70","NSAFBUL76","NSAFBUL77", "NSAFBUL78","NSAFBUL79","NSAFSUB83","NSAFSUB84","NSAFSUB85","NSAFSUB87","NSAFSUB88","NENVPENV97","NENVPENV98","NENVPENV99","NENVPENV100", "NENVPENV102","NENVPENV103","NENVINS109","NENVINS110","NENVINS111","NENVINS140","NENVPHEA115","NENVPHEA117","NENVPHEA118", "NENVPHEA119","NENVMEN122","NENVMEN125","NENVMEN126","NENVMEN127","NENVDIS130","NENVDIS131","NENVDIS132","NENVDIS134","NENVDIS134C", "NENVDIS135","NENVDIS136","NENVDIS137") c1 <- c(-2.41269,-2.90456,-2.54538,-2.44946,-2.37193,-2.71602,-2.51626,-3.51999,-2.47435,-3.24542,-2.48313,-3.09321,-1.81246,-2.66216,-2.32825,-3.02065,-3.21406,-1.0321,-0.96298,-0.91493,-0.99467,-1.74572,-2.7771,-0.90158,-2.04704,-1.87806,-3.11085,-1.92372,-0.52093,-1.08067,-0.96739,-1.99696,-1.42134,-1.39773,-1.99819,-0.85593,-1.4652,-1.7502,-1.95507,-1.37448,-1.48793,-1.31365,-2.04206,-1.80633,-1.83406,-1.66769,-2.25032,-2.80502,-3.03108,-1.45675,-3.53932,-2.20742,-2.31603,-2.86615,-2.70978,-1.49341,-2.46824,-1.7135,-2.22951,-2.26218,-2.98917,-2.50782,-0.79484,-0.72395,-3.00002,-0.93667,-1.65967) c2 <- c(-0.62783,-0.93949,-0.35921,-1.91379,-0.49578,-0.90214,-1.33325,-0.57221,-0.64719,-1.08817,-0.03373,-0.94521,0.2645,0.20785,0.96649,-0.16654,-1.66755,-1.26082,-0.37968,-0.43057,-0.8671,-1.55043,-0.80514,0.26985,-0.5574,-0.67698,-1.76912,-1.89623,0.55635,0.66543,1.10514,-0.93873,-2.01817,-2.26507,-2.08219,-2.28479,-0.30073,-0.21784,-0.61555,-0.59265,-0.14093,-0.75334,-1.53466,-1.47464,-0.83077,-1.64181,-1.1453,-1.10456,-0.14643,-0.85192,-1.21007,-0.51886,-0.76917,-1.32258,-0.54017,-0.79424,0.31508,-0.05805,-0.99588,-1.08258,-2.15103,-1.84725,0.24373,-0.00727,-0.59234,-0.10022,-1.39865) c3 <- c(2.01573,2.21546,3.94359,2.20042,3.0729,3.1253,3.44854,3.47261,3.50871,2.71541,3.31846,3.72407,3.79969,3.80338,5.37724,4.01239,2.26929,1.70307,2.01959,1.7704,1.72993,2.12222,2.22456,3.05953,2.66121,2.70934,1.21185,1.02633,2.64525,4.41386,3.90358,2.38629,1.17343,1.03809,0.92795,0.98601,2.99407,3.24186,3.06233,3.20019,3.40319,2.46686,2.51556,2.54883,2.84167,2.19677,2.43437,2.80337,3.53325,2.6866,2.43721,3.27185,2.94548,3.29168,3.54043,3.55629,4.24422,4.06839,2.78672,2.67435,2.0923,2.35542,2.72603,3.01571,3.61033,3.33175,2.96215) Rstepvalues <- as.data.frame(rbind(c1,c2,c3)) names(Rstepvalues) <- c0 #Items that are negatively valenced. They are reverse-coded below. negative <- c("NSAFPSAF57","NSAFPSAF58","NSAFPSAF59","NSAFPSAF61","NSAFPSAF63","NSAFPSAF64","NSAFBUL65","NSAFBUL66","NSAFBUL76","NSAFBUL77", "NSAFBUL78","NSAFBUL79","NENVPENV97","NENVPENV98","NENVPENV99","NENVPENV100") #Loop through the scale list for (scalename in scalelist) { #Stepvalues for the items of the current scale stepvalues <- Rstepvalues[eval(as.name(scalename))] items <- names(stepvalues) #Only use the variables that belong to scales tempdata <- indata[c0] #Code negative values to missing tempdata[tempdata < 0] <- NA #Reverse code negatively valenced items tempdata[negative] <- 5-tempdata[negative] #Use only the variables of the current scale tempdata <- tempdata[items] #Recode values from the range of 1-4 to 0-3 tempdata <- tempdata[]-1 #The algorithm below is based on a Rasch partial credit model. #See section 3.1 of the national benchmarking documentation referenced above for more details. #Further reading: http://www.winsteps.com/a/Linacre-estimation-further-topics.pdf tdata <- tempdata tdata$counts = apply(tdata,1,function(x) sum(!is.na(x[]))) titems <- names(stepvalues) tstepvalues <- stepvalues tv1 <- paste("tv1", titems, sep="") tv2 <- paste("tv2", titems, sep="") tv3 <- paste("tv3", titems, sep="") tdata[tv1] <- stepvalues[1,] tdata[tv2] <- stepvalues[2,] tdata[tv3] <- stepvalues[3,] nm <- paste("nm", titems, sep="") tdata[nm] <- tdata[titems]*0+1 tdata$anm <- rowSums(tdata[nm],na.rm=TRUE) tdata[tv1] <- tdata[tv1]*tdata[nm] tdata[tv2] <- tdata[tv2]*tdata[nm] tdata[tv3] <- tdata[tv3]*tdata[nm] tdata$rl <- tdata$anm*0 tdata$ru <- tdata$anm*3 tdata$ru[tdata$ru==0] <- 1 tdata$r <- rowSums(tdata[items],na.rm=TRUE) tdata$r[tdata$r==tdata$rl] <- tdata$rl[tdata$r==tdata$rl]+0.3 tdata$r[tdata$r==tdata$ru] <- tdata$ru[tdata$r==tdata$ru]-0.3 d <- mean(colMeans(tstepvalues[],na.rm=TRUE)) tdata$t0 <- d + log((tdata$r-tdata$rl)/(tdata$ru-tdata$r)) tdata$t <- 0 tdata$t1 <- 1 iter <- 1 while(max(abs(tdata$t1-tdata$t),na.rm=TRUE)>0.0000001 & iter <=100) { if (iter==1) {tdata$t <- tdata$t0 } else if (iter>1) {tdata$t <- tdata$t1} iter <- iter+1 for (n in 1:ncol(tstepvalues)) { jp <- paste("jp", titems[n], sep="") j2p <- paste("j2p", titems[n], sep="") dj <- paste("dj", titems[n], sep="") tdata$p1 <- exp(1*tdata$t-tdata[,tv1[n]])/ (1+exp(1*tdata$t-tdata[,tv1[n]])+exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])+ exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])) tdata$p2 <- exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])/ (1+exp(1*tdata$t-tdata[,tv1[n]])+exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])+ exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])) tdata$p3 <- exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])/ (1+exp(1*tdata$t-tdata[,tv1[n]])+exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])+ exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])) tdata$p1[is.na(tdata[,tv1[n]])] <- NA tdata$p2[is.na(tdata[,tv1[n]])] <- NA tdata$p3[is.na(tdata[,tv1[n]])] <- NA tdata[jp] <- 1*tdata$p1+2*tdata$p2+3*tdata$p3 tdata[j2p] <- 1*1*tdata$p1+2*2*tdata$p2+3*3*tdata$p3 tdata[dj] <- tdata[j2p]-tdata[jp]*tdata[jp] } tdata$e <- rowSums(tdata[, grep("jp", names(tdata))],na.rm = TRUE) tdata$v <- rowSums(tdata[, grep("dj", names(tdata))],na.rm = TRUE) tdata$t1 <- tdata$t+(tdata$r-tdata$e)/pmax(2*tdata$v,1,na.rm = TRUE) } #Set to missing if the algorithm did not converge tdata$t1[tdata$t1>30] <- NA #Only calculate score scores for those valid responses to at least three items and more than half of the items tdata$t1[tdata$counts<3 | tdata$counts<0.5*ncol(tempdata)] <- NA ##########SCALING FACTORS###################### #These factors are used to recale the scale scores so that the cut scores are 300 and 400 for all scales. #This is a linear transformation so it does not affect the relative distance between two scores: The distance is inflated by a constant factor B. #See section 3.3 the national benchmarking documentation referenced above for more details. if (scalename == 'eng' ) { A <- 316.027 B <- 25.661 } if (scalename == 'clc' ) { A <- 324.047 B <- 27.686 } if (scalename == 'rel' ) { A <- 318.803 B <- 24.114 } if (scalename == 'par' ) { A <- 300.916 B <- 25.772 } if (scalename == 'saf' ) { A <- 328.465 B <- 31.578 } if (scalename == 'emo' ) { A <- 332.304 B <- 35.115 } if (scalename == 'psaf' ) { A <- 327.846 B <- 32.555 } if (scalename == 'bul' ) { A <- 342.271 B <- 32.014 } if (scalename == 'sub' ) { A <- 310.583 B <- 28.119 } if (scalename == 'env' ) { A <- 323.636 B <- 25.919 } if (scalename == 'penv' ) { A <- 333.383 B <- 26.649 } if (scalename == 'ins' ) { A <- 322.626 B <- 27.086 } if (scalename == 'phea') { A <- 319.552 B <- 24.661 } if (scalename == 'men' ) { A <- 309.507 B <- 24.669 } if (scalename == 'dis' ) { A <- 326.977 B <- 25.694 } #Rescale the scores based on the factors above tdata$tscore <- round2(tdata$t1*B+A,0) #Keep only the calculated scale scores score_data <- tdata[c("tscore")] #Rename the data using the scale name and save it to the workspace colnames(score_data) <- paste(scalename, colnames(score_data), sep = "_") assign(scalename,score_data,envir = .GlobalEnv) } } ##########SPECIFICATION AND OUTPUT###################### #Read in data indata <- read.table(file=sprintf("%s\\%s", folder, dataname),header=TRUE,sep=",") names(indata) <- toupper(names(indata)) #Item names for each scale eng <- c("NENGCLC2","NENGCLC3","NENGCLC4","NENGCLC6","NENGCLC7","NENGCLC8","NENGREL16","NENGREL17","NENGREL18","NENGREL24","NENGREL25","NENGREL30", "NENGPAR34","NENGPAR37","NENGPAR38","NENGPAR44","NENGPAR47") clc <- c("NENGCLC2","NENGCLC3","NENGCLC4","NENGCLC6","NENGCLC7","NENGCLC8") rel <- c("NENGREL16","NENGREL17","NENGREL18","NENGREL24","NENGREL25","NENGREL30") par <- c("NENGPAR34","NENGPAR37","NENGPAR38","NENGPAR44","NENGPAR47") saf <- c("NSAFEMO51","NSAFEMO52","NSAFEMO53","NSAFEMO54","NSAFEMO55","NSAFEMO148", "NSAFPSAF57","NSAFPSAF58","NSAFPSAF59","NSAFPSAF61","NSAFPSAF63","NSAFPSAF64","NSAFBUL65","NSAFBUL66","NSAFBUL70","NSAFBUL76","NSAFBUL77", "NSAFBUL78","NSAFBUL79","NSAFSUB83","NSAFSUB84","NSAFSUB85","NSAFSUB87","NSAFSUB88") emo <- c("NSAFEMO51","NSAFEMO52","NSAFEMO53","NSAFEMO54","NSAFEMO55","NSAFEMO148") psaf <- c("NSAFPSAF57","NSAFPSAF58","NSAFPSAF59","NSAFPSAF61","NSAFPSAF63","NSAFPSAF64") bul <- c("NSAFBUL65","NSAFBUL66","NSAFBUL70","NSAFBUL76","NSAFBUL77","NSAFBUL78","NSAFBUL79") sub <- c("NSAFSUB83","NSAFSUB84","NSAFSUB85","NSAFSUB87","NSAFSUB88") env <- c("NENVPENV97","NENVPENV98","NENVPENV99","NENVPENV100", "NENVPENV102","NENVPENV103","NENVINS109","NENVINS110","NENVINS111","NENVINS140","NENVPHEA115","NENVPHEA117","NENVPHEA118", "NENVPHEA119","NENVMEN122","NENVMEN125","NENVMEN126","NENVMEN127","NENVDIS130","NENVDIS131","NENVDIS132","NENVDIS134","NENVDIS134C", "NENVDIS135","NENVDIS136","NENVDIS137") penv <- c("NENVPENV97","NENVPENV98","NENVPENV99","NENVPENV100","NENVPENV102","NENVPENV103") ins <- c("NENVINS109","NENVINS110","NENVINS111","NENVINS140") phea <- c("NENVPHEA115","NENVPHEA117","NENVPHEA118","NENVPHEA119") men <- c("NENVMEN122","NENVMEN125","NENVMEN126","NENVMEN127") dis <- c("NENVDIS130","NENVDIS131","NENVDIS132","NENVDIS134","NENVDIS134C","NENVDIS135","NENVDIS136","NENVDIS137") #Run scaling program for each scale rasch(scalelist=c("eng","clc","rel","par","saf","emo","psaf","bul","sub","env","penv","ins","phea","men","dis")) #Attach the scale score data to the input data out <- cbind(indata,eng,clc,rel,par,saf,emo,psaf,bul,sub,env,penv,ins,phea,men,dis) #Exclude cases if they did not provide any demographic information for (v in c("eng","clc","rel","par","saf","emo","psaf","bul","sub","env","penv","ins","phea","men","dis")) { out[which((is.na(out$NDEMO142) | out$NDEMO142 == "" | out$NDEMO142 == -1) & (is.na(out$NDEMO143) | out$NDEMO143 == "" | out$NDEMO143 == -1) & (is.na(out$NDEMO144) | out$NDEMO144 == "" | out$NDEMO144 == -1)),paste(v,"_tscore",sep="")] <- NA } ##########OUTPUT DATASET WITH SCALE SCORES############### write.table(out, sprintf("%s\\scale_score_%s", folder, dataname), col.names=T,row.names=F,sep=",") ##########END OF PROGRAM#################################