-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathris.R
65 lines (44 loc) · 2.22 KB
/
ris.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
library(dplyr)
library(stringr)
ris_covid <- read.delim("data/covid-rcts-iloveevidence.ris",
quote = "",
fill = F)
dois <- ris_covid %>%
filter(str_detect(TY....JOUR, "DO - "))
# Remove DO - from rows
dois[] <- sapply(dois, function(x) gsub("DO - ", "", as.character(x)))
names(dois) <- "doi"
data1 <- merge(opendata, dois, by="doi")
# All papers
ris_covid_all <- read.delim("data/06102022.litcovid.export.tsv",
quote = "",
fill = F)
names(ris_covid_all) <- "pmid"
pmids <- as.data.frame(ris_covid_all[!is.na(as.numeric(ris_covid_all$pmid)), ])
names(pmids) <- "pmid"
ris_covid_all <- read.csv("data/litcovid.export.all.tsv",
sep = "\t",
header = T)
db_1 <- epmc_search(query = '(FIRST_PDATE:[2020-01-01 TO 2020-06-30]) AND (OPEN_ACCESS:y) AND (SRC:"MED") AND (LANG:"eng" OR LANG:"en" OR LANG:"us")',
limit = 2000000,
output = "parsed",
verbose = F)
db_2 <- epmc_search(query = '(FIRST_PDATE:[2020-07-01 TO 2020-12-31]) AND (OPEN_ACCESS:y) AND (SRC:"MED") AND (LANG:"eng" OR LANG:"en" OR LANG:"us")',
limit = 2000000,
output = "parsed",
verbose = F)
db_3 <- epmc_search(query = '(FIRST_PDATE:[2021-01-01 TO 2021-06-30]) AND (OPEN_ACCESS:y) AND (SRC:"MED") AND (LANG:"eng" OR LANG:"en" OR LANG:"us")',
limit = 2000000,
output = "parsed",
verbose = F)
db_4 <- epmc_search(query = '(FIRST_PDATE:[2021-07-01 TO 2021-12-31]) AND (OPEN_ACCESS:y) AND (SRC:"MED") AND (LANG:"eng" OR LANG:"en" OR LANG:"us")',
limit = 2000000,
output = "parsed",
verbose = F)
db_5 <- epmc_search(query = '(FIRST_PDATE:[2022-01-01 TO 2022-06-09]) AND (OPEN_ACCESS:y) AND (SRC:"MED") AND (LANG:"eng" OR LANG:"en" OR LANG:"us")',
limit = 2000000,
output = "parsed",
verbose = F)
data1 <- merge(db_1, pmids, by = "pmid")
db_all <- rbind(data1, data2, data3, data4, data5)
write.csv(db_all, "data/db_all.csv")