-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLoadData.R
66 lines (61 loc) · 2.09 KB
/
LoadData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
## -----------------------------------------------------------------
## load data
## -----------------------------------------------------------------
readTab = function(file) {
if (grepl("gz$", file)) {
file = gzfile(file)
}
read.table(file, sep="\t",
header=TRUE, row.names=1, check.names=FALSE)
}
xFiles = c(
## bottomly = "rnaseq/bottomly/bottomly_count_table.tsv.gz",
shen = "rnaseq/shen2012/19-tissues-expr.tsv.gz",
patel = "rnaseq/GSE57872/GSE57872_DataMatrixMapped.tsv.gz",
montastier = "pcr/GSE60946/GSE60946-raw.tsv.gz",
hess = "microarray/Hess/HessTrainingData.tsv.gz"
)
xs = lapply(X=xFiles, FUN=readTab)
## data files are all in genes-as-rows, samples-as-columns format
## for this class, we will work with opposite format
## (samples-as-rows, genes-as-columns):
xs = lapply(X=xs,
FUN=function(x) {data.frame(t(x), check.names=FALSE)})
annotFiles = c(
## bottomly = "rnaseq/bottomly/bottomly_annot.tsv",
patel = "rnaseq/GSE57872/GSE57872_MappedSampleAnnotation.tsv",
montastier = "pcr/GSE60946/GSE60946-annot.tsv",
hess = "microarray/Hess/HessTrainingAnnotation.tsv"
)
annots = lapply(X=annotFiles, FUN=readTab)
annots$shen = data.frame(
Tissue = gsub('\\d*(-.*)?', '', rownames(xs$shen)),
row.names = rownames(xs$shen)
)
annots$shen$System = c(
'boneMarrow' = 'lymphatic',
'brain' = 'nervous',
'cerebellum' = 'nervous',
'cortex' = 'nervous',
'heart' = 'circulatory',
'intestine' = 'digestive/excretory',
'kidney' = 'digestive/excretory',
'limb' = 'other',
'liver' = 'digestive/excretory',
'lung' = 'respiratory',
'mef' = 'other',
'mESC' = 'other',
'olfactory' = 'nervous',
'placenta' = 'other',
'spleen' = 'lymphatic',
'testes' = 'other',
'thymus' = 'lymphatic'
)[annots$shen$Tissue]
annots$shen$Nervous = (annots$shen$System == 'nervous')
annots = annots[c('shen', 'patel', 'montastier', 'hess')]
## check that data objects (xs) are aligned with annot objects (annots)
mapply(
FUN = function(x, annot) {all(rownames(x) == rownames(annot))},
xs,
annots
)