forked from laurencehendry/Codes-for-Jambo-Bukoba
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathClean school data.R
84 lines (65 loc) · 4.07 KB
/
Clean school data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
########### Clean School Data
library(stringr)
library(plyr)
library(dplyr)
library(zoo)
School <- read.csv("C:/Users/Christopher/Google Drive/Data Animals/Jambo Bukoba/Data/pesptz.org (books delivered)/Data_raw.csv", stringsAsFactors = FALSE, header = FALSE, sep=";", quote = "") # Import with no header
########### Git rid of symbols
list = c(">", "<", "td", "/", ",", "style=", "text-align:right", '"')
for (i in list) {
School <- as.data.frame(sapply(School,gsub,pattern=i,replacement=""))
}
########### Get rid of last character in V4
remove_charachter <- function(x) {
x <- str_sub(x, 0, -2)
}
School$V4 <- lapply(School$V4, remove_charachter) #Appply function
########### Get rid of row with 1
School <- subset(School, V1!="1")
########### Copy class to the left and one down
School$class <- ifelse(School$V1 == "I ", "I", NA ) #Look if V1 cotains <h3>, and if yes, copy the value into element school
School$class <- ifelse(School$V1 == "II ", "II", School$class) #Look if V1 cotains <h3>, and if yes, copy the value into element schoolSchool$class <- na.locf(School$class)
School$class <- ifelse(School$V1 == "III ", "III", School$class) #Look if V1 cotains <h3>, and if yes, copy the value into element school
School$class <- ifelse(School$V1 == "IV ", "IV", School$class) #Look if V1 cotains <h3>, and if yes, copy the value into element school
School$class <- ifelse(School$V1 == "V ", "V", School$class) #Look if V1 cotains <h3>, and if yes, copy the value into element school
School$class <- ifelse(School$V1 == "VI ", "VI", School$class) #Look if V1 cotains <h3>, and if yes, copy the value into element school
School$class <- ifelse(School$V1 == "VII ", "VII", School$class) #Look if V1 cotains <h3>, and if yes, copy the value into element schoolSchool$class <- ifelse(School$V1 == "I ", "I", NA ) #Look if V1 cotains <h3>, and if yes, copy the value into element school
School$class <- na.locf(School$class) # Functions that searches na and replaces it with the closest value
School <- subset(School, V2!="")
########### Indicate each school
School$school <- ifelse(School$class == "I", "newschool", NA ) #Look if V1 cotains <h3>, and if yes, copy the value into element school
########### Asssign number which will be used for merging later
rownumber = (1:6153)
School = data.frame(School, rownumber)
########### Create a subset, keeping only one observation of each school makes merging the schoolnames easier
Schoolsubset <- subset(School, school=="newschool")
############ Clean school names
Schoolnames <- read.csv("C:/Users/Christopher/Google Drive/Data Animals/Jambo Bukoba/Data/pesptz.org (books delivered)/Listofschools_raw.csv", stringsAsFactors = FALSE, header = FALSE, sep=";", quote = "") # Import with no header
Schoolnames <- subset(Schoolnames, V1!='"')
Schoolnames <- subset(Schoolnames, V1!=' Leave","')
Schoolnames <- subset(Schoolnames, V1!='(0)","')
Schoolnames$V1 <- str_replace_all(Schoolnames$V1, '","', "")
########### Get rid of rows
Schoolnames <- subset(Schoolnames , V1!='(0)"')
Schoolnames <- subset(Schoolnames , V1!='')
########### Append school data and schoolnames
Combined = data.frame(Schoolsubset, Schoolnames) # Combine
########## Merge
Complete <- merge(School, Combined, by = "rownumber" , all = TRUE)
######### Drop rows that are now double
# Unlist Complete$V4.x,as is list and we need numeric
Complete$V4.x <- as.numeric(unlist(Complete$V4.x))
# Then keep only those we want
Complete = data.frame(Complete$V1.x, Complete$V2.x, Complete$V3.x, Complete$V4.x, Complete$class.x, Complete$V1.1)
######### Fill missing values for schoolnames
Complete$Complete.V1.1 <- na.locf(Complete$Complete.V1.1) # Functions that searches na and replaces it with the closest value
#### Rename and drop
Complete <- rename(Complete,
class = Complete.class.x,
schoolname = Complete.V1.1,
pupils = Complete.V1.x,
books_delivered = Complete.V2.x,
books_planned = Complete.V3.x,
Percentage_of_delivery = Complete.V4.x
)
write.table(Complete, file = "C:/Users/Christopher/Google Drive/Data Animals/Jambo Bukoba/Data/pesptz.org (books delivered)/Schooldata_final.csv", sep=";", row.names=FALSE)