-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathread_large_data_set.R
35 lines (27 loc) · 1.08 KB
/
read_large_data_set.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Use regular readr -------------------------------------------------------
# data can be downloaded from
# https://www.uspto.gov/ip-policy/economic-research/research-datasets/artificial-intelligence-patent-dataset
library(readr)
ai_model_predictions <- read_tsv("ai_model_predictions.tsv",
n_max = 1000)
head(ai_model_predictions)
# Use bigmemory package ---------------------------------------------------
col_names <- colnames(ai_model_predictions)
library(bigmemory)
data_ai <- read.big.matrix("ai_model_predictions.tsv",
sep = "\t",
header = TRUE)
dim(data_ai)
summary(data_ai)
head(data_ai) # has some NA generated
rm(data_ai) # remove the object to save memory
# Use data.table ---------------------------------------------------------
library(data.table)
data_ai_2 <- fread("ai_model_predictions.tsv", sep = "\t") # faster
dim(data_ai_2) # 13244037 31
summary(data_ai_2)
head(data_ai_2)
table(data_ai_2$analysis_phase)
# free the memory
rm(data_ai_2)
gc()