######################## # This is the fourth script in the R bites series # We use aov and lm for data analysis # Beatriz Gallo Cordoba # CYPEP # Monash University ######################### #Working directory setwd("~/Google Drive/RBites") #Library library(stargazer) library(ggplot2) #install.packages("stargazer") # Data---- dta<-read.csv("PISA2015AUS.csv") # BELONG: Belonging # MOTIVAT: Achievement motivation # ANXTEST:Test anxiety # ESCS: Index of economic, social and cultural status (WLE) # ST004D01T: Gender; 1 Female 2 Male #fixing data dta$gender<-factor(dta$ST004D01T,labels = c("Female","Male")) table(dta$gender,dta$ST004D01T) dta$BELONG[dta$BELONG==99]<-NA dta$MOTIVAT[dta$MOTIVAT==99]<-NA dta$qescs<-cut(dta$ESCS,breaks = quantile(dta$ESCS,na.rm=TRUE), labels=paste0("Q",1:4)) table(dta$qescs) # Summary in nice format stargazer(dta,type = "text") head(dta) #ANOVA x<-aov(BELONG~qescs,dta) x summary(x) attributes(x) x$effects #Regression X<-lm(BELONG~qescs,dta) X coef(X) summary(X) #Nice tables stargazer(X,type="text") stargazer(X,type="html",out = "reg1table.html") #Predict prX<-predict(X,dta,se.fit = TRUE) dprX<-data.frame(y=prX$fit,se=prX$se.fit,x=dta$qescs) #Confidence intervals dprX$min<-dprX$y-dprX$se*qt(0.025,prX$df, lower.tail = FALSE) dprX$max<-dprX$y+dprX$se*qt(0.025,prX$df, lower.tail = FALSE) #Graph head(dprX) ggplot(dprX,aes(x=x,y=y,ymin=min,ymax=max))+ geom_errorbar()+ geom_point()+ ylim(min(dprX$min),max(dprX$max)) #Alternative: continuous version X<-lm(BELONG~ESCS,dta) X coef(X) summary(X) #Nice tables stargazer(X,type="text") stargazer(X,type="html",out = "reg1table.html") #Predict prX<-predict(X,dta,se.fit = TRUE) dprX<-data.frame(y=prX$fit,se=prX$se.fit,x=dta$ESCS) #Confidence intervals dprX$min<-dprX$y-dprX$se*qt(0.025,prX$df, lower.tail = FALSE) dprX$max<-dprX$y+dprX$se*qt(0.025,prX$df, lower.tail = FALSE) #Graph head(dprX) ggplot(dprX,aes(x=x,y=y,ymin=min,ymax=max))+ geom_ribbon(alpha=0.5)+ geom_line() #Alternative: continuous version with controls XC<-lm(BELONG~ESCS + gender,dta) XC coef(XC) summary(XC) #Nice tables stargazer(X,XC,type="text") stargazer(X,XC,type="html",out = "reg1table.html") #Predict prXC<-predict(XC,dta,se.fit = TRUE) dprXC<-data.frame(y=prXC$fit,se=prXC$se.fit,x=dta$ESCS,Gender=dta$gender) #Confidence intervals dprXC$min<-dprXC$y-dprXC$se*qt(0.025,prXC$df, lower.tail = FALSE) dprXC$max<-dprXC$y+dprXC$se*qt(0.025,prXC$df, lower.tail = FALSE) #Graph head(dprXC) ggplot(dprXC,aes(x=x,y=y,ymin=min,ymax=max,fill=Gender,colour=Gender))+ geom_ribbon(alpha=0.5)+ geom_line()+ ylim(min(dprXC$min),max(dprXC$max))