2+2 result1 <- 2 + 2 result1 result2 <- 4 + 4 ls() result1 <- result1 + 10 result1 weight <- 70 height <- 1.75 bmi <- weight/height^2 bmi setwd("C:/data") getwd() data <- read.csv(file = 'Recruits.csv', header = TRUE, sep = ',', na.strings=c("",".","NA")) head(data) tail(data) dim(data) str(data) #calculate BMI and add this to the dataframe data$bmi <- data$weight/(data$height/100)^2 # note correct height to m from cm head(data) #accessing specific elements of the dataframe data[6,] data[6,11] #subset data by age >= 25 years as a new dataframe data_gt25y <- data[data$age >= 25,] head(data_gt25y) min(data_gt25y$age) #subset on comfort data$comfort[data$comfort >= 4] #Introducing NA as missing median(data$comfort) median(data$comfort, na.rm=T) #same the data frame. Used a different name to the original data file #so to not overwrite the original data write.csv(data, file="data.csv", na = "NA", row.names=F) #read the updated data back in setwd("C:/data") data <- read.csv(file = 'data.csv', header = TRUE, sep = ',', na.strings=c("",".","NA")) head(data) #checking it loaded and looks good unique(data$gender) data$genderf[data$gender == "1"] <- "Male" data$genderf[data$gender == "2"] <- "Female" data$genderf <- as.factor(data$genderf) #Make sure that categories align with the name (Categoryf) Factor_summary <- with(data, table(gender, genderf)) Factor_summary #Here is another way, but be careful, its not so clear whats happening data$gender_f <- as.factor(data$gender) levels(data$gender_f) <- c("Male","Female") Factor_summary <- with(data, table(gender, gender_f)) Factor_summary unique(data$injured) data$injuredf[data$injured == "0"] <- "Yes" data$injuredf[data$injured == "1"] <- "No" data$injuredf <- as.factor(data$injuredf) Factor_summary <- with(data, table(injured, injuredf)) # This is just a temporary object to check the factors Factor_summary #So I let it be over written each time unique(data$histinj) data$histinjf[data$histinj == "0"] <- "Yes" data$histinjf[data$histinj == "1"] <- "No" data$histinjf <- as.factor(data$histinjf) Factor_summary <- with(data, table(histinj, histinjf)) Factor_summary unique(data$smokes) data$smokesf[data$smokes == "0"] <- "Yes" data$smokesf[data$smokes == "1"] <- "No" data$smokesf <- as.factor(data$smokesf) Factor_summary <- with(data, table(smokes, smokesf)) Factor_summary unique(data$boots) data$bootsf[data$boots == "0"] <- "General Purpose" data$bootsf[data$boots == "1"] <- "Taipan" data$bootsf[data$boots == "2"] <- "Other" data$bootsf <- as.factor(data$bootsf) Factor_summary <- with(data, table(boots, bootsf)) Factor_summary unique(data$comfort) data$comfortf[data$comfort == "1"] <- "Extremely comfortable" data$comfortf[data$comfort == "2"] <- "Comfortable" data$comfortf[data$comfort == "3"] <- "OK" data$comfortf[data$comfort == "4"] <- "Uncomfortable" data$comfortf[data$comfort == "5"] <- "Extremely uncomfortable" data$comfortf <- as.factor(data$comfortf) Factor_summary <- with(data, table(comfort, comfortf)) Factor_summary Summary_stats_basic <- summary(data) Summary_stats_basic Summary_stats_bmi_basic <- summary(data$bmi) Summary_stats_bmi_basic #load doBy packagy library(doBy) Summary_stats_boots <- summaryBy(comfortf~bootsf+genderf, FUN=c(length, median, mean, sd, min, max),data=data) Summary_stats_boots write.csv(Summary_stats_boots, file="Summary_stats_boots.csv", na = "NA", row.names=F) Summary_stats_bmi <- summaryBy(bmi~genderf+smokesf, FUN=c(length, mean, median, sd, min, max),data=data) Summary_stats_bmi write.csv(Summary_stats_bmi, file="Summary_stats_bmi.csv", na = "NA", row.names=F) #save the data updated frame. Used a different name to the original data file write.csv(data, file="data.csv", na = "NA", row.names=F) plot(bmi~age, data=data) plot(bmi~age, data=data, col="blue") dev.copy(png, 'bmi_v_age.png') dev.off() plot(bmi~gender, data=data) plot(bmi~genderf, data=data) dev.copy(png, 'bmi_v_genderf.png') dev.off() hist(data$bmi) #Blue Histogram with 6 bins hist(data$bmi, breaks=5, col="blue") hist(data$bmi, breaks=5, col="blue", xlab="Body mass index (kg/m^2)", main="Histogram of BMI") #Obtain the density data dens_bmi <- density(data$bmi) plot(dens_bmi) plot(dens_bmi, main="Density distribution of BMI", col="blue") dens_bmi #Normality test shapiro.test(data$bmi) #T-test t.test(bmi ~ gender, var.equal = FALSE, conf.level = 0.95, data=data) t.test(bmi ~ gender, var.equal = TRUE, conf.level = 0.95, data=data) #Equality of variance tests var.test(bmi ~ gender, data = data) fligner.test(bmi ~ gender, data=data) #One Way ANOVA anova_bmi_comf <- aov(bmi ~ comfortf, data = data) anova_bmi_comf summary(anova_bmi_comf) #gives the basic ANOVA output #Kruskal-Wallis kruskal_bmi_comf <- kruskal.test(bmi ~ comfortf, data = data) kruskal_bmi_comf #Regression fit_comfort_age_boots_gender <- lm(comfort ~ age + genderf + bootsf, data = data) summary(fit_comfort_age_boots_gender) coefficients(fit_comfort_age_boots_gender) # model coefficients confint(fit_comfort_age_boots_gender, level=0.95) # Confidence Intervals for model parameters, here set to 95% fitted(fit_comfort_age_boots_gender) # predicted values residuals(fit_comfort_age_boots_gender) # residuals anova(fit_comfort_age_boots_gender) # anova table vcov(fit_comfort_age_boots_gender) # covariance matrix for model parameters influence(fit_comfort_age_boots_gender) # regression diagnostics #Regression diagnostic plots: layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page plot(fit_comfort_age_boots_gender) #Test of normality of residuals fit_res <- residuals(fit_comfort_age_boots_gender) # object containing the residuals shapiro.test(fit_res) #Comparision of models - anova fit_vo2max_age <- lm(vo2max ~ age, data = data) summary(fit_vo2max_age) fit_vo2max_age_gender <- lm(vo2max ~ age + gender, data = data) summary(fit_vo2max_age_gender) anova(fit_vo2max_age_gender, fit_vo2max_age) #Comparision of models - anova library(lmtest) #load lmtest package lrtest(fit_vo2max_age_gender, fit_vo2max_age) #Correlation cor.test(data$bmi, data$age, method = "pearson") #Confidence intervals CI <- 0.975 #95% CI mean <- mean(data$bmi, na.rm=T) stdev <- sd(data$bmi, na.rm=T) n <- length(data$bmi) error <- qt(0.975,df=n-1)*stdev/sqrt(n) lowerCI <- mean-error upperCI <- mean+error mean stdev lowerCI upperCI #Combining datasets #read the updated data back in setwd("C:/data") data <- read.csv(file = 'data.csv', header = TRUE, sep = ',', na.strings=c("",".","NA")) head(data) dim(data) #cbind ID and eye colour data to recruuits data IDdata <- read.csv(file = 'IDdata.csv', header = TRUE, sep = ',', na.strings=c("",".","NA")) head(IDdata) str(IDdata) unique(IDdata$eye) plot(ID ~ eye, data=IDdata) data <- cbind(IDdata,data) head(data) unique(data$eye) # checking that there are still only 3 values. Yes I am paranoid data$eyef[data$eye == "1"] <- "Blue" data$eyef[data$eye == "2"] <- "Brown" data$eyef[data$eye == "3"] <- "Other" data$eyef <- as.factor(data$eyef) Factor_summary <- with(data, table(eye, eyef)) Factor_summary head(data) plot(ID ~ eyef, data=data) #plot just for fun! write.csv(data, file="data_eye_ID.csv", na = "NA", row.names=F) # saving the new updated data #rbind the two halves of the original recuits data data1_100 <- read.csv(file = 'Recruits1_100.csv', header = TRUE, sep = ',', na.strings=c("",".","NA")) data101_205 <- read.csv(file = 'Recruits101_205.csv', header = TRUE, sep = ',', na.strings=c("",".","NA")) head(data1_100) head(data101_205) #make sure the headings match data1_205 <- rbind(data1_100, data101_205) dim(data1_100) dim(data101_205) dim(data1_205) dim(data) # check lengths match /add up as expected to the 205 of the original data head(data) #Merge data conc_data <- read.csv(file = 'concentration_data.csv', header = TRUE, sep = ',', na.strings=c("",".","NA")) head(conc_data) hr_data <- read.csv(file = 'effect_data.csv', header = TRUE, sep = ',', na.strings=c("",".","NA")) head(hr_data) #Plot the data and colour by subject ID plot(conc~time, data=conc_data, col=SID) plot(hr~time, data=hr_data, col=SID) #merge the dataframes all_data <- merge(conc_data, hr_data, all = TRUE) head(all_data) plot(conc~time, data=all_data, col=SID) plot(hr~time, data=all_data, col=SID)