## R Programming Task on Statistical Analysis for Effective Decision Making

- 8th Oct, 2021
- 15:39 PM

library(ggplot2)

library(dplyr)

library(tidyverse)

#Question 6

#Reading in the dataset into Austin dataframe

Austin<-read.csv("Austin_Texas.csv")

str(Austin)

#(b)

Austin$delay<-Austin$arr_del15/Austin$arr_flights

ggplot(data=Austin, aes(x=time, y=delay, group=carrier)) + geom_line(aes(color=carrier))+ geom_point(aes(color=carrier))

#We see that proportion of flights operated by American are delayed the highest, particularly after 2015

tapply(AUS$delay,AUS$carrier, mean)

#AA has highest proportion of delayed flights followed by UA & DL in that order.

#(c)

#Subsetting the flight data from January 2014 into a separate dataset

Austin_2014<-Austin[which(Austin$year>=2014),]

str(Austin_2014)

#Boxplot of proportion of each airline's delayed flights January 2014 and beyond

ggplot(data=Austin_2014, aes(x=time, y=delay, group=carrier)) + geom_boxplot(aes(color=carrier))

#(d)

#We subset the flights' delay data of individual flight operators into separate vectors

AA<-Austin_2014[which(Austin_2014$carrier=="AA"),24]

DL<-Austin_2014[which(Austin_2014$carrier=="DL"),24]

UA<-Austin_2014[which(Austin_2014$carrier=="UA"),24]

#Before proceeding with the t-tests, we test the assumption that the variances are equal

var.test(AA, DL)

#P-value = 0.09612, at 5% significance levels, we fail to reject the null hypothesis that the variances are equal

var.test(DL, UA)

#P-value = 0.01792, at 5% significance levels, we reject the null hypothesis that the variances are equal

var.test(UA, AA)

#P-value = 0.4747, at 5% significance levels, we fail to reject the null hypothesis that the variances are equal

#T-tests can now be done, with the suitable adjustments for var.equal input

t.test(UA, DL,var.equal=F)

#P-value = 2.155e-06, at 5% significance levels, delays of UA and DL are statistically different

t.test(AA, UA,var.equal=T)

#P-value = 0.2215, at 5% significance levels, delays of UA and AA are not statistically different

t.test(AA, DL,var.equal=T)

#P-value = 6.345e-10, at 5% significance levels, delays of AA and DL are statistically different