9.4 Exercises Solutions
Exercise 9.4 Exercise 1 Solution
<- read.csv("./Data/titanic3.csv", stringsAsFactors = FALSE, na.strings = "")
data = function(df){
n_nan_print for (i in 1:ncol(df)){
if (sum(is.na(df[,i])) > 0){
print(paste("The column", colnames(data[i]), "have", sum(is.na(df[,i])), "missing values"))
}
}
}
n_nan_print(data)
## [1] "The column age have 263 missing values"
## [1] "The column fare have 1 missing values"
## [1] "The column cabin have 1014 missing values"
## [1] "The column embarked have 2 missing values"
## [1] "The column boat have 823 missing values"
## [1] "The column body have 1188 missing values"
## [1] "The column home.dest have 564 missing values"
Exercise 9.5 Exercise 2 Solution
= function(df,per=0.8){
remove_mv = df[ ,(colSums((is.na(df))/nrow(df)) < per)]
df_new = na.omit(df_new)
df_new return(df_new)
}
= remove_mv(data,0.4)
z c(dim(data), dim(z))
## [1] 1309 14 1043 10
Exercise 9.6 Exercise 3 Solution:
= data
dataImputed $body[is.na(dataImputed$body)] = 0
dataImputed$home.dest[is.na(dataImputed$home.dest)] = "Unknown"
dataImputed$age[is.na(dataImputed$age)] = round(mean(data$age, na.rm=T))
dataImputeddatatable(dataImputed)