-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetchData.r
121 lines (97 loc) · 4.04 KB
/
fetchData.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
### This script screens the data set and gives an overview of what the data looks like. Construct database
# DFList. A list of df from each csv file
# Universe. The stock universe where each has same length
# U_train. A list of elements from universe, starting from 20140102 to 20151231
# U_test, A list of elements from universe, starting from 20160104 to 20161231
#
require(xts)
rm(list=ls())
# set wd
homedir<- "/Users/Eric/Documents/IFUND_comp"
datadir<- "/Users/Eric/Documents/IFUND_comp/data"
setwd(datadir)
# # read csv
fileNames<- list.files(pattern = "*.csv")
DFList<- list()
for (x in fileNames) {
eval(parse(text= paste( "DFList$'", strsplit(x, split='_')[[1]][1], "'<- read.csv(x, header=T, col.names= c('DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME', 'AMOUNT') )", sep="")))
}
setwd(homedir)
save(DFList,file = 'DFList.RData')
# load DFList
setwd(homedir)
load('DFList.RData')
DFList<- lapply(DFList, function(x) xts(x[,c("OPEN", "HIGH", "LOW", "CLOSE", "VOLUME", "AMOUNT")], order.by = as.Date(as.character(x$DATE))))
tmp<- lapply(DFList, function(x) dim(x)[1])
tmp<- unlist(tmp)> 50
DFList<- DFList[tmp]
ret_nperiod<- function(df, type='cc', N=1){ #df must be xts obj
tmp<-df
for (ty in type){
for( n in N){
if(ty=='co'){ # close to open, intraday ret
colName<- paste('RET.CO.', n, sep="")
a<-(tmp$CLOSE- tmp$OPEN)/ tmp$OPEN
eval(parse(text= paste("tmp$",colName, "<- a", sep="")))
}else if (ty=='oc'){ # open to close overnigth ret
colName<- paste("RET.OC.",n, sep="")
a<- (tmp$OPEN- lag(tmp$CLOSE, k=n))/ lag(tmp$CLOSE, k=n)
eval(parse(text= paste("tmp$",colName, "<- a", sep="")))
}else if (ty=='ho'){ # high to open intraday
colName<- paste("RET.HO.",n, sep="")
a<- (tmp$HIGH- tmp$OPEN)/ tmp$OPEN
eval(parse(text= paste("tmp$",colName, "<- a", sep="")))
}else if(ty=='lo'){ # low to open intra day
colName<- paste("RET.LO.", n, sep="")
a<- (tmp$LOW- tmp$OPEN)/ tmp$OPEN
eval(parse(text= paste("tmp$",colName, "<- a", sep="")))
}else if(ty== 'hl'){ # high to low intra day
colName<- paste("RET.HL.", n, sep="")
a<- (tmp$HIGH- tmp$LOW)/ tmp$OPEN
eval(parse(text= paste("tmp$",colName, "<- a", sep="")))
}
else { ## close to close, daily ret
colName<- paste("RET.CC.",n, sep="")
a<- (tmp$CLOSE- lag(tmp$CLOSE, k=n))/ lag(tmp$CLOSE, k=n)
eval(parse(text= paste("tmp$",colName, "<- a", sep="")))
}
}
}
return(tmp)
}
DFList<- lapply(DFList, function(x) ret_nperiod(df = x, type = "cc", N = c(1,3,7,20)))
DFList<- lapply(DFList, function(x) ret_nperiod(df=x, type = c("co", "oc","hl", "ho","lo")))
WHOLE<- DFList
save(WHOLE, file = "WHOLE.RData")
# DFList contains entites with shorter history (new stocks). Remove them to uniform the length. Save the Universe
stdLen<- 733
tmp<- sapply(names(DFList), function (x) eval(parse(text= paste("dim(DFList$'", x, "')[1]== stdLen ", sep=""))))
Universe<- DFList[tmp]
d<- names(DFList[[1]])
e<- grepl("RET.CC.*", d)
f<- grepl("RET.CO.*", d)
g<- grepl("RET.OC.*", d)
h<- data.frame( e=e, f=f, g=g)
h<- apply(h, 1, any )
indx_col<- d[h]
INDX_EQW<- list()
for ( c in indx_col){
tmp<- lapply(Universe, function(x) x[, c])
tmp<- as.data.frame(tmp)
tmp<- apply(tmp, 1, mean)
INDX_EQW[[c]]<- tmp
}
INDX_EQW$AMOUNT<- apply( as.data.frame( lapply(Universe, function(x) x$AMOUNT)), 1, function(x) sum(x, na.rm = T))
INDX_EQW<- xts(as.data.frame(INDX_EQW) , order.by = as.Date(as.character(index(Universe[[1]]))))
save(Universe, file= 'Universe.RData')
save(INDX_EQW, file= 'INDX_EQW.RData')
# Save U_train and U_test. train tasks 20140102- 20151231. test takes the else
split_flag= 489
U_train<- Universe
U_train<- lapply(U_train, function(x) x[1:split_flag,])
U_test<- Universe
U_test<- lapply(U_test, function(x) x[(split_flag+1): dim(x)[1], ])
save(U_train, file = 'U_train.RData')
save(U_test, file = 'U_test.RData')
INDX_EQW_train<- INDX_EQW[1:split_flag]
INDX_EQW_test<- INDX_EQW[(split_flag+1) : dim(INDX_EQW)[1]]