142 lines
12 KiB
R
142 lines
12 KiB
R
library(tidyverse)
|
|
library(caret)
|
|
|
|
dropout <- read.csv("C:/Users/Mark/Desktop/Grad School/PDAT630/dropout.csv")
|
|
|
|
dropout_clean=dropout
|
|
colnames(dropout_clean)[1]='Marital.status'
|
|
|
|
## Combining under represented factors into buckets of at least 10
|
|
table(dropout_clean$Marital.status)
|
|
table(dropout_clean$Application.mode)
|
|
table(dropout_clean$Previous.qualification)
|
|
table(dropout_clean$Mother.s.qualification)
|
|
table(dropout_clean$Father.s.qualification)
|
|
table(dropout_clean$Mother.s.occupation)
|
|
table(dropout_clean$Father.s.occupation)
|
|
|
|
dropout_clean=mutate(dropout_clean, Marital.status = ifelse((Marital.status == "3"|
|
|
Marital.status == "6"), 7, Marital.status))
|
|
|
|
dropout_clean=mutate(dropout_clean, Application.mode = ifelse((Application.mode == "2"|
|
|
Application.mode =="5"|
|
|
Application.mode == "10"|
|
|
Application.mode == "11"|
|
|
Application.mode == "18"), 19, Application.mode))
|
|
|
|
dropout_clean=mutate(dropout_clean, Previous.qualification = ifelse((Previous.qualification == "4"|
|
|
Previous.qualification == "5"|
|
|
Previous.qualification == "8"|
|
|
Previous.qualification == "10"|
|
|
Previous.qualification == "11"|
|
|
Previous.qualification == "13"|
|
|
Previous.qualification == "17"), 18, Previous.qualification))
|
|
|
|
dropout_clean=mutate(dropout_clean, Mother.s.qualification = ifelse((Mother.s.qualification == "6"|
|
|
Mother.s.qualification == "7"|
|
|
Mother.s.qualification == "8"|
|
|
Mother.s.qualification == "9"|
|
|
Mother.s.qualification == "11"|
|
|
Mother.s.qualification == "12"|
|
|
Mother.s.qualification == "14"|
|
|
Mother.s.qualification == "15"|
|
|
Mother.s.qualification == "16"|
|
|
Mother.s.qualification == "17"|
|
|
Mother.s.qualification == "18"|
|
|
Mother.s.qualification == "20"|
|
|
Mother.s.qualification == "21"|
|
|
Mother.s.qualification == "24"|
|
|
Mother.s.qualification == "25"|
|
|
Mother.s.qualification == "26"|
|
|
Mother.s.qualification == "27"|
|
|
Mother.s.qualification == "28"|
|
|
Mother.s.qualification == "29"), 35, Mother.s.qualification))
|
|
|
|
dropout_clean=mutate(dropout_clean, Father.s.qualification = ifelse((Father.s.qualification == "6"|
|
|
Father.s.qualification == "7"|
|
|
Father.s.qualification == "8"|
|
|
Father.s.qualification == "11"|
|
|
Father.s.qualification == "12"|
|
|
Father.s.qualification == "13"|
|
|
Father.s.qualification == "15"|
|
|
Father.s.qualification == "16"|
|
|
Father.s.qualification == "17"|
|
|
Father.s.qualification == "18"|
|
|
Father.s.qualification == "19"|
|
|
Father.s.qualification == "20"|
|
|
Father.s.qualification == "21"|
|
|
Father.s.qualification == "22"|
|
|
Father.s.qualification == "23"|
|
|
Father.s.qualification == "25"|
|
|
Father.s.qualification == "26"|
|
|
Father.s.qualification == "30"|
|
|
Father.s.qualification == "31"|
|
|
Father.s.qualification == "32"|
|
|
Father.s.qualification == "33"|
|
|
Father.s.qualification == "34"), 35, Father.s.qualification))
|
|
|
|
dropout_clean=mutate(dropout_clean, Mother.s.occupation = ifelse((Mother.s.occupation == "11"|
|
|
Mother.s.occupation == "14"|
|
|
Mother.s.occupation == "15"|
|
|
Mother.s.occupation == "16"|
|
|
Mother.s.occupation == "17"|
|
|
Mother.s.occupation == "18"|
|
|
Mother.s.occupation == "19"|
|
|
Mother.s.occupation == "20"|
|
|
Mother.s.occupation == "21"|
|
|
Mother.s.occupation == "22"|
|
|
Mother.s.occupation == "23"|
|
|
Mother.s.occupation == "24"|
|
|
Mother.s.occupation == "25"|
|
|
Mother.s.occupation == "26"|
|
|
Mother.s.occupation == "27"|
|
|
Mother.s.occupation == "28"|
|
|
Mother.s.occupation == "30"|
|
|
Mother.s.occupation == "31"), 47, Mother.s.occupation))
|
|
|
|
dropout_clean=mutate(dropout_clean, Father.s.occupation = ifelse((Father.s.occupation == "14"|
|
|
Father.s.occupation == "15"|
|
|
Father.s.occupation == "16"|
|
|
Father.s.occupation == "17"|
|
|
Father.s.occupation == "18"|
|
|
Father.s.occupation == "19"|
|
|
Father.s.occupation == "20"|
|
|
Father.s.occupation == "21"|
|
|
Father.s.occupation == "22"|
|
|
Father.s.occupation == "23"|
|
|
Father.s.occupation == "24"|
|
|
Father.s.occupation == "25"|
|
|
Father.s.occupation == "26"|
|
|
Father.s.occupation == "27"|
|
|
Father.s.occupation == "28"|
|
|
Father.s.occupation == "29"|
|
|
Father.s.occupation == "30"|
|
|
Father.s.occupation == "31"|
|
|
Father.s.occupation == "32"|
|
|
Father.s.occupation == "33"|
|
|
Father.s.occupation == "34"|
|
|
Father.s.occupation == "35"|
|
|
Father.s.occupation == "36"|
|
|
Father.s.occupation == "37"|
|
|
Father.s.occupation == "38"|
|
|
Father.s.occupation == "39"|
|
|
Father.s.occupation == "40"|
|
|
Father.s.occupation == "41"|
|
|
Father.s.occupation == "42"|
|
|
Father.s.occupation == "43"|
|
|
Father.s.occupation == "45"|
|
|
Father.s.occupation == "46"), 47, Father.s.occupation))
|
|
|
|
#factor_cols=c("Marital.status","Scholarship.holder","Tuition.fees.up.to.date","Gender","Displaced","Daytime.evening.attendance","Application.mode", "Course","Previous.qualification","Mother.s.qualification","Father.s.qualification","Mother.s.occupation","Father.s.occupation","International","Target")
|
|
#dropout_clean[factor_cols] <- lapply(dropout_clean[factor_cols], factor)
|
|
#sapply(dropout_clean, table)
|
|
|
|
dropout_clean=subset(dropout_clean, select = c(Gender,Displaced,Tuition.fees.up.to.date,
|
|
Scholarship.holder,Marital.status, Daytime.evening.attendance, Course,
|
|
Previous.qualification, Mother.s.qualification,Father.s.qualification,
|
|
Mother.s.occupation, Father.s.occupation, Age.at.enrollment,
|
|
International, Target))
|
|
|
|
#str(dropout_clean)
|
|
|
|
write.csv(dropout_clean, "C:/Users/Mark/Desktop/Grad School/PDAT630/dropout_clean.csv", row.names=FALSE) |