 ## R Programming Task on Statistical Analysis for Effective Decision Making

• 8th Oct, 2021
• 15:39 PM

library(ggplot2)
library(dplyr)
library(tidyverse)

#Question 6

#Reading in the dataset into Austin dataframe
str(Austin)

#(b)

Austin\$delay<-Austin\$arr_del15/Austin\$arr_flights

ggplot(data=Austin, aes(x=time, y=delay, group=carrier)) + geom_line(aes(color=carrier))+ geom_point(aes(color=carrier))
#We see that proportion of flights operated by American are delayed the highest, particularly after 2015

tapply(AUS\$delay,AUS\$carrier, mean)
#AA has highest proportion of delayed flights followed by UA & DL in that order.

#(c)

#Subsetting the flight data from January 2014 into a separate dataset
Austin_2014<-Austin[which(Austin\$year>=2014),]
str(Austin_2014)

#Boxplot of proportion of each airline's delayed flights January 2014 and beyond
ggplot(data=Austin_2014, aes(x=time, y=delay, group=carrier)) + geom_boxplot(aes(color=carrier))

#(d)

#We subset the flights' delay data of individual flight operators into separate vectors
AA<-Austin_2014[which(Austin_2014\$carrier=="AA"),24]
DL<-Austin_2014[which(Austin_2014\$carrier=="DL"),24]
UA<-Austin_2014[which(Austin_2014\$carrier=="UA"),24]

#Before proceeding with the t-tests, we test the assumption that the variances are equal

var.test(AA, DL)
#P-value = 0.09612, at 5% significance levels, we fail to reject the null hypothesis that the variances are equal
var.test(DL, UA)
#P-value = 0.01792, at 5% significance levels, we reject the null hypothesis that the variances are equal
var.test(UA, AA)
#P-value = 0.4747, at 5% significance levels, we fail to reject the null hypothesis that the variances are equal

#T-tests can now be done, with the suitable adjustments for var.equal input

t.test(UA, DL,var.equal=F)
#P-value = 2.155e-06, at 5% significance levels, delays of UA and DL are statistically different

t.test(AA, UA,var.equal=T)
#P-value = 0.2215, at 5% significance levels, delays of UA and AA are not statistically different

t.test(AA, DL,var.equal=T)
#P-value = 6.345e-10, at 5% significance levels, delays of AA and DL are statistically different