##########INSTRUCTION############################### #you only need to specify your data folder and csv dataset name in the "INPUTS" section below #the output dataset will be produced in the folder you specify #################################################### ##########INPUTS############################### #specify your data folder and csv dataset name folder <- "FOLDER PATH" dataname <- "instructional staff data.csv" ##########SCALING PROGROM###################### options(digits=10) round2 = function(x, n) { posneg = sign(x) z = abs(x)*10^n z = z + 0.5 z = trunc(z) z = z/10^n z*posneg } rasch <- function(scalelist=NULL){ #stepvalues c0 <- c("IENGCLC2","IENGCLC3","IENGCLC4","IENGCLC6","IENGCLC7","IENGCLC8","IENGREL9","IENGREL10","IENGREL12","IENGREL14","IENGREL15", "IENGPAR29","IENGPAR31","IENGPAR32","IENGPAR36","IENGPAR42","IENGPAR48","ISAFEMO52","ISAFEMO53","ISAFEMO54","ISAFEMO55", "ISAFEMO56","ISAFEMO58","ISAFPSAF60","ISAFPSAF61","ISAFPSAF62","ISAFPSAF64","ISAFPSAF66","ISAFPSAF67","ISAFBUL68", "ISAFBUL69","ISAFBUL71","ISAFBUL73","ISAFBUL79","ISAFBUL80","ISAFBUL81","ISAFBUL82","ISAFSUB86","ISAFSUB87","ISAFSUB88","ISAFSUB91", "IENVPENV97","IENVPENV98","IENVPENV100","IENVPENV101","IENVPENV102","IENVPENV103","IENVINS105","IENVINS107","IENVINS108", "IENVINS110","IENVINS115","IENVINS116","IENVPHEA119","IENVPHEA120","IENVPHEA121","IENVPHEA122","IENVMEN123","IENVMEN125", "IENVMEN126","IENVMEN128","IENVMEN137","IENVDIS129","IENVDIS130","IENVDIS134","IENVDIS134C","IENVDIS135","IENVDIS136") c1 <- c(-2.23882,-2.1565,-2.61022,-2.72424,-1.78468,-2.18346,-3.02788,-3.22684,-4.12339,-2.32472,-2.97012,-1.64759,-1.47893,-1.83183,-1.11894,-2.28667,-2.70965,-1.42847,-0.92065,-0.92441,-1.62728,-1.32451,-2.51414,-1.06057,-1.90306,-1.86559,-3.0375,-2.14065,-0.55663,-0.98262,-0.77695,-2.52361,-2.61231,-2.28638,-2.09304,-2.17942,-2.01788,-2.00464,-2.09068,-2.06953,-2.41182,-2.00263,-2.09549,-1.75794,-1.55537,-1.61969,-1.31004,-1.14778,-1.54741,-2.46068,-2.87387,-3.11921,-2.09029,-2.57512,-2.25692,-3.02897,-2.43883,-2.07875,-2.19165,-2.21058,-2.17242,-1.99021,-2.30585,-2.91403,-1.13436,-1.39281,-1.23245,-2.30109) c2 <- c(-0.79286,-1.23134,-0.48501,-1.52987,0.14887,-0.80117,-1.31162,-0.8068,-0.3855,0.08648,-1.66979,-0.18685,0.23174,0.95631,0.45601,0.09988,-1.61862,-1.18202,-0.08686,-0.10689,-0.51158,-1.37726,-1.22259,0.00142,-0.25214,-0.38786,-1.91849,-1.97713,0.62288,0.84634,1.43081,-0.48166,-0.79026,-1.89,-2.12925,-2.10602,-1.89899,0.08366,0.09402,-0.35578,0.22724,-1.31231,-1.38865,-0.64084,-1.02356,-0.74721,-0.46397,0.49282,0.38599,-1.08837,-1.38221,-2.31483,-1.01021,-0.17834,-0.28418,-0.80828,-0.39363,-1.12673,-0.13682,-0.19846,0.5032,-0.29119,-1.0817,-2.43138,0.16961,-0.02434,0.14534,-1.50784) c3 <- c(1.63635,1.6155,3.37974,1.8249,2.95479,3.1319,2.68007,2.92047,3.8331,3.0916,2.16597,3.1684,3.2615,4.79256,3.49956,3.78357,1.54464,1.84011,2.70973,2.13721,1.98949,2.08055,2.14039,3.10718,2.78974,2.89179,1.35532,0.98397,2.86862,4.3873,4.28451,3.55645,2.1421,1.04757,1.02448,0.94933,0.96563,3.3934,3.48374,3.19388,3.39829,1.49996,1.69196,2.34516,2.3859,2.53191,2.63674,4.44998,4.13798,3.33738,2.09191,1.70813,2.27599,3.11342,3.19722,3.13805,3.23224,2.2716,3.24814,3.25992,3.93348,3.36141,2.16889,1.70719,2.46815,2.76214,2.74392,2.24373) Rstepvalues <- as.data.frame(rbind(c1,c2,c3)) names(Rstepvalues) <- c0 #items that are negatively valenced negative <- c("ISAFPSAF60","ISAFPSAF61","ISAFPSAF62","ISAFPSAF64","ISAFPSAF66","ISAFPSAF67","ISAFBUL68","ISAFBUL69","ISAFBUL79", "ISAFBUL80","ISAFBUL81","ISAFBUL82","IENVPENV100","IENVPENV101","IENVPENV102","IENVPENV103") for (scalename in scalelist) { stepvalues <- Rstepvalues[eval(as.name(scalename))] items <- names(stepvalues) tempdata <- indata[c0] tempdata[tempdata < 0] <- NA tempdata[negative] <- 5-tempdata[negative] tempdata <- tempdata[items] tempdata <- tempdata[]-1 tdata <- tempdata tdata$counts = apply(tdata,1,function(x) sum(!is.na(x[]))) titems <- names(stepvalues) tstepvalues <- stepvalues tv1 <- paste("tv1", titems, sep="") tv2 <- paste("tv2", titems, sep="") tv3 <- paste("tv3", titems, sep="") tdata[tv1] <- stepvalues[1,] tdata[tv2] <- stepvalues[2,] tdata[tv3] <- stepvalues[3,] nm <- paste("nm", titems, sep="") tdata[nm] <- tdata[titems]*0+1 tdata$anm <- rowSums(tdata[nm],na.rm=TRUE) tdata[tv1] <- tdata[tv1]*tdata[nm] tdata[tv2] <- tdata[tv2]*tdata[nm] tdata[tv3] <- tdata[tv3]*tdata[nm] tdata$rl <- tdata$anm*0 tdata$ru <- tdata$anm*3 tdata$ru[tdata$ru==0] <- 1 tdata$r <- rowSums(tdata[items],na.rm=TRUE) tdata$r[tdata$r==tdata$rl] <- tdata$rl[tdata$r==tdata$rl]+0.3 tdata$r[tdata$r==tdata$ru] <- tdata$ru[tdata$r==tdata$ru]-0.3 d <- mean(colMeans(tstepvalues[],na.rm=TRUE)) tdata$t0 <- d + log((tdata$r-tdata$rl)/(tdata$ru-tdata$r)) tdata$t <- 0 tdata$t1 <- 1 iter <- 1 while(max(abs(tdata$t1-tdata$t),na.rm=TRUE)>0.0000001 & iter <=100) { if (iter==1) {tdata$t <- tdata$t0 } else if (iter>1) {tdata$t <- tdata$t1} iter <- iter+1 for (n in 1:ncol(tstepvalues)) { jp <- paste("jp", titems[n], sep="") j2p <- paste("j2p", titems[n], sep="") dj <- paste("dj", titems[n], sep="") tdata$p1 <- exp(1*tdata$t-tdata[,tv1[n]])/ (1+exp(1*tdata$t-tdata[,tv1[n]])+exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])+ exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])) tdata$p2 <- exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])/ (1+exp(1*tdata$t-tdata[,tv1[n]])+exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])+ exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])) tdata$p3 <- exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])/ (1+exp(1*tdata$t-tdata[,tv1[n]])+exp(2*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]])+ exp(3*tdata$t-tdata[,tv1[n]]-tdata[,tv2[n]]-tdata[,tv3[n]])) tdata$p1[is.na(tdata[,tv1[n]])] <- NA tdata$p2[is.na(tdata[,tv1[n]])] <- NA tdata$p3[is.na(tdata[,tv1[n]])] <- NA tdata[jp] <- 1*tdata$p1+2*tdata$p2+3*tdata$p3 tdata[j2p] <- 1*1*tdata$p1+2*2*tdata$p2+3*3*tdata$p3 tdata[dj] <- tdata[j2p]-tdata[jp]*tdata[jp] } tdata$e <- rowSums(tdata[, grep("jp", names(tdata))],na.rm = TRUE) tdata$v <- rowSums(tdata[, grep("dj", names(tdata))],na.rm = TRUE) tdata$t1 <- tdata$t+(tdata$r-tdata$e)/pmax(2*tdata$v,1,na.rm = TRUE) } tdata$t1[tdata$t1>30] <- NA tdata$t1[tdata$counts<3 | tdata$counts<0.5*ncol(tempdata)] <- NA ##########SCALING FACTORS###################### if (scalename == 'eng' ) { A <- 316.291 B <- 29.053 } if (scalename == 'clc' ) { A <- 325.048 B <- 31.112 } if (scalename == 'rel' ) { A <- 322.095 B <- 26.592 } if (scalename == 'par' ) { A <- 298.823 B <- 30.178 } if (scalename == 'saf' ) { A <- 323.415 B <- 31.995 } if (scalename == 'emo' ) { A <- 327.561 B <- 33.799 } if (scalename == 'psaf' ) { A <- 324.906 B <- 32.092 } if (scalename == 'bul' ) { A <- 338.35 B <- 30.829 } if (scalename == 'sub' ) { A <- 299.599 B <- 29.819 } if (scalename == 'env' ) { A <- 320.62 B <- 29.264 } if (scalename == 'penv' ) { A <- 330.34 B <- 31.939 } if (scalename == 'ins' ) { A <- 324.968 B <- 25.403 } if (scalename == 'phea') { A <- 311.666 B <- 27.864 } if (scalename == 'men' ) { A <- 307.097 B <- 28.889 } if (scalename == 'dis' ) { A <- 328.255 B <- 30.631 } tdata$tscore <- round2(tdata$t1*B+A,0) score_data <- tdata[c("tscore")] colnames(score_data) <- paste(scalename, colnames(score_data), sep = "_") assign(scalename,score_data,envir = .GlobalEnv) } } ##########SPECIFICATION AND OUTPUT###################### #read in data indata <- read.table(file=sprintf("%s\\%s", folder, dataname),header=TRUE,sep=",") names(indata) <- toupper(names(indata)) #item names for each scale eng <- c("IENGCLC2","IENGCLC3","IENGCLC4","IENGCLC6","IENGCLC7","IENGCLC8","IENGREL9","IENGREL10","IENGREL12","IENGREL14","IENGREL15", "IENGPAR29","IENGPAR31","IENGPAR32","IENGPAR36","IENGPAR42","IENGPAR48") clc <- c("IENGCLC2","IENGCLC3","IENGCLC4","IENGCLC6","IENGCLC7","IENGCLC8") rel <- c("IENGREL9","IENGREL10","IENGREL12","IENGREL14","IENGREL15") par <- c("IENGPAR29","IENGPAR31","IENGPAR32","IENGPAR36","IENGPAR42","IENGPAR48") saf <- c("ISAFEMO52","ISAFEMO53","ISAFEMO54","ISAFEMO55", "ISAFEMO56","ISAFEMO58","ISAFPSAF60","ISAFPSAF61","ISAFPSAF62","ISAFPSAF64","ISAFPSAF66","ISAFPSAF67","ISAFBUL68", "ISAFBUL69","ISAFBUL71","ISAFBUL73","ISAFBUL79","ISAFBUL80","ISAFBUL81","ISAFBUL82","ISAFSUB86","ISAFSUB87","ISAFSUB88","ISAFSUB91") emo <- c("ISAFEMO52","ISAFEMO53","ISAFEMO54","ISAFEMO55","ISAFEMO56","ISAFEMO58") psaf <- c("ISAFPSAF60","ISAFPSAF61","ISAFPSAF62","ISAFPSAF64","ISAFPSAF66","ISAFPSAF67") bul <- c("ISAFBUL68","ISAFBUL69","ISAFBUL71","ISAFBUL73","ISAFBUL79","ISAFBUL80","ISAFBUL81","ISAFBUL82") sub <- c("ISAFSUB86","ISAFSUB87","ISAFSUB88","ISAFSUB91") env <- c("IENVPENV97","IENVPENV98","IENVPENV100","IENVPENV101","IENVPENV102","IENVPENV103","IENVINS105","IENVINS107","IENVINS108", "IENVINS110","IENVINS115","IENVINS116","IENVPHEA119","IENVPHEA120","IENVPHEA121","IENVPHEA122","IENVMEN123","IENVMEN125", "IENVMEN126","IENVMEN128","IENVMEN137","IENVDIS129","IENVDIS130","IENVDIS134","IENVDIS134C","IENVDIS135","IENVDIS136") penv <- c("IENVPENV97","IENVPENV98","IENVPENV100","IENVPENV101","IENVPENV102","IENVPENV103") ins <- c("IENVINS105","IENVINS107","IENVINS108","IENVINS110","IENVINS115","IENVINS116") phea <- c("IENVPHEA119","IENVPHEA120","IENVPHEA121","IENVPHEA122") men <- c("IENVMEN123","IENVMEN125","IENVMEN126","IENVMEN128","IENVMEN137") dis <- c("IENVDIS129","IENVDIS130","IENVDIS134","IENVDIS134C","IENVDIS135","IENVDIS136") #run scaling program for each scale rasch(scalelist=c("eng","clc","rel","par","saf","emo","psaf","bul","sub","env","penv","ins","phea","men","dis")) #combine scale score data out <- cbind(indata,eng,clc,rel,par,saf,emo,psaf,bul,sub,env,penv,ins,phea,men,dis) #exclude cases if they did not provide any demographic information for (v in c("eng","clc","rel","par","saf","emo","psaf","bul","sub","env","penv","ins","phea","men","dis")) { out[which((is.na(out$IDEMO138) | out$IDEMO138 == "" | out$IDEMO138 == -1) & (is.na(out$IDEMO139) | out$IDEMO139 == "" | out$IDEMO139 == -1) & (is.na(out$IDEMO140) | out$IDEMO140 == "" | out$IDEMO140 == -1)),paste(v,"_tscore",sep="")] <- NA } ##########OUTPUT DATASET WITH SCALE SCORES############### write.table(out, sprintf("%s\\scale_score_%s", folder, dataname), col.names=T,row.names=F,sep=",") ##########END OF PROGRAM#################################