##install packages ggplot2, doBy, gridExtr library(ggplot2) #background info see http://docs.ggplot2.org/current/ library(doBy) library(gridExtra) Sys.setlocale("LC_TIME","English") #------------------------------------------------------------ #--------------Step I: data loading and first look -------- #------------------------------------------------------------ #Download ATM.txt and import via RStudio or use direct link A<-read.table("http://www.trutschnig.net/Datensatz.txt",head=TRUE) summary(A) head(A) str(A) A$ymd<-as.Date(A$ymd) plot(A$ymd,A$sum_out,type="l") #------------------------------------------------------------ #--------------Step 2: plot the timeseries ---------------- #------------------------------------------------------------ # a look at the first year A$year<-as.numeric(substr(A$ymd,1,4)) A7<-subset(A,A$year==2007) p <- ggplot(data=A7,aes(x=ymd,y=sum_out)) p <- p + geom_line() p <- p + geom_point() p # a look at the first year p <- ggplot(data=A,aes(x=ymd,y=sum_out)) p <- p + geom_line() p <- p + geom_point() p <- p + facet_wrap(~year,scales = "free_x",nrow=3) p #show more capabilities of ggplot2 (no need to understand all of it right now) http://docs.ggplot2.org/current/ #improvement: better grid, holidays in colour, smoother A$monthday<-as.numeric(substr(A$ymd,9,10)) #day of month a<-max(A$sum_out,na.rm=TRUE) B<-subset(A,A$monthday==1) #first days per month A$month<-as.numeric(substr(A$ymd,6,7)) H<-subset(A,A$holiday==1) #holidays V<-subset(A,A$holiday==0.5) p <- ggplot(data=A,aes(x=ymd,y=sum_out)) p <- p + geom_line() p <- p + geom_point() p <- p + geom_point(data=H,colour="red",size=3) p <- p + geom_point(data=V,colour="blue",size=3) p <- p + facet_wrap(~year,scales = "free_x",nrow=3) p <- p + scale_y_continuous(limits=c(0,a),name="withdrawn") p <- p + theme(panel.grid.minor.x = element_blank()) p <- p + theme_bw() p <- p + scale_x_date(breaks=B$ymd) p <- p + stat_smooth(se=FALSE) p #weekdays in different colours p <- ggplot(data=A,aes(x=ymd,y=sum_out)) p <- p + geom_line() p <- p + geom_point(aes(x=ymd,y=sum_out,color=factor(nr_weekday)),size=3) #p <- p + scale_colour_manual(values=c(rep("black",6),"red"),name="Wochentag") #p <- p + scale_colour_manual(values=cols(7),name="Wochentag") p <- p + facet_wrap(~year,scales = "free_x",nrow=3) p <- p + scale_y_continuous(limits=c(0,a),name="Geldmenge") p <- p + theme(panel.grid.minor.x = element_blank()) p <- p + theme_bw() p <- p + scale_x_date(breaks=B$ymd) p <- p + stat_smooth(se=FALSE) p #------------------------------------------------------------ #--------------Step 3: yearly histo and boxplots -------- #------------------------------------------------------------ #histo p <- ggplot(data=A,aes(x=sum_out)) p <- p + geom_histogram() p <- p + facet_wrap(~year,nrow=3) p <- p + theme_bw() p #to do: find out how to get a nicer colour for the histogram, see, e.g. http://docs.ggplot2.org/current/ p <- ggplot(data=A,aes(x=sum_out)) p <- p + geom_histogram(fill="gray30",colour="gray") p <- p + facet_wrap(~year,nrow=3) p <- p + theme_bw() p #boxplots p <- ggplot(data=A,aes(x=factor(year),y=sum_out)) p <- p + geom_boxplot() p <- p + theme_bw() p #to do: add all samples per year in the boxplot - use geom_jitter p <- ggplot(data=A,aes(x=factor(year),y=sum_out)) p <- p + geom_boxplot() p <- p + geom_jitter() p <- p + theme_bw() p #------------------------------------------------------------ #--------------Step 4: daily and monthly boxplots -------- #------------------------------------------------------------ #monthly boxplot p <- ggplot(data=A,aes(x=factor(month),y=sum_out)) p <- p + geom_boxplot(fill="green") p <- p + facet_wrap(~year) p <- p + theme_bw() p #reorder p <- ggplot(data=A,aes(x=factor(month),y=sum_out,fill=factor(year))) p <- p + geom_boxplot() p <- p + theme_bw() p #weekday p <- ggplot(data=A,aes(x=factor(nr_weekday),y=sum_out,fill=factor(year))) p <- p + geom_boxplot() p <- p + theme_bw() p #weekday reordered p <- ggplot(data=A,aes(x=factor(nr_weekday),y=sum_out,fill=factor(nr_weekday))) p <- p + geom_boxplot() p <- p + facet_wrap(~year) p <- p + scale_y_continuous(limits=c(0,a)) p <- p + theme_bw() p #------------------------------------------------------------ #--------------Step 5: include holiday info -------- #------------------------------------------------------------ #weekday reordered - omit outliers and plot holiday info instead V<-subset(A,A$holiday==0.5) p <- ggplot(data=A,aes(x=factor(nr_weekday),y=sum_out,fill=factor(nr_weekday))) p <- p + geom_boxplot(outlier.size=0) p <- p + facet_wrap(~year) p <- p + scale_y_continuous(limits=c(0,a)) p <- p + xlab("weekday") p <- p + labs(title = "Boxplot per weekday and year") p <- p + scale_fill_discrete(name = "Weekday") p <- p + geom_point(data=H,colour="red",size=3) p <- p + geom_point(data=V,colour="blue",size=3) p <- p + theme_bw() p #weekday reordered violin plot (adjust = 0.5) p <- ggplot(data=A,aes(x=factor(nr_weekday),y=sum_out,fill=factor(nr_weekday))) p <- p + geom_violin(adjust=0.5) p <- p + stat_summary(fun.y = median,fun.ymin = median, fun.ymax = median) p <- p + facet_wrap(~year) p <- p + scale_y_continuous(limits=c(0,a),name="Weekday") p <- p + theme_bw() p #monthday p <- ggplot(data=A,aes(x=factor(monthday),y=sum_out)) p <- p + geom_boxplot(fill="green") p <- p + facet_wrap(~year) p <- p + scale_y_continuous(limits=c(0,a)) p <- p + theme_bw() p #heatmap for weekdays A$yw<-format(A$ymd,"%Y-%W") farbe<-rainbow(100,start=.40,end=.17) p <- ggplot(A, aes(yw,nr_weekday)) p <- p + geom_tile(aes(fill = sum_out),colour = "white") p <- p + scale_fill_gradientn(colours=farbe,name="count") #p <- p + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) p <- p + theme_bw() p <- p + theme(axis.text.x=element_text(size=9,angle=30)) p <- p + labs(title = paste("Sum_out per day and week","\n","",sep="")) p <- p + facet_wrap(~year,nrow=3,scales="free") print(p) A$monthday<-as.numeric(substr(A$ymd,9,10)) A$ym<-substr(A$ymd,1,7) p <- ggplot(A, aes(ym,monthday)) p <- p + geom_tile(aes(fill = sum_out),colour = "white") p <- p + scale_fill_gradientn(colours=farbe,name="count") #p <- p + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) p <- p + theme_bw() p <- p + theme(axis.text.x=element_text(size=9,angle=30)) p <- p + labs(title = paste("Sum_out per day and week","\n","",sep="")) print(p) #------------------------------------------------------------ #--------------Step 6: save plots -------- #------------------------------------------------------------ #last two plots in one pdf p <- ggplot(data=A,aes(x=factor(nr_weekday),y=sum_out,fill=factor(nr_weekday))) p <- p + geom_violin(adjust=0.5) p <- p + theme(legend.position="none") p <- p + stat_summary(fun.y = median,fun.ymin = median, fun.ymax = median) p <- p + facet_wrap(~year) p <- p + scale_y_continuous(limits=c(0,a),name="outgoing") p <- p + scale_x_discrete(name="weekday") p <- p + theme_bw() p1 <- p #monthday p <- ggplot(data=A,aes(x=factor(monthday),y=sum_out)) p <- p + geom_boxplot(fill="green") p <- p + facet_wrap(~year) p <- p + scale_y_continuous(limits=c(0,a)) p <- p + scale_x_discrete(name="monthday") p <- p + theme_bw() p2 <- p library(gridExtra) pdf(file="plots.pdf",width=20,height=12) grid.arrange(p1, p2, nrow=2) dev.off()