-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinto_sql_dev_3.R
69 lines (54 loc) · 1.95 KB
/
into_sql_dev_3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
library(RSQLite)
# Connect to the SQLite database
db <- dbConnect(SQLite(), dbname = "chrom3_1_c.db", timeout = 10000)
# Create table if it doesn't exist
dbExecute(db, "CREATE TABLE IF NOT EXISTS chrom_data (identifier INTEGER PRIMARY KEY, info TEXT)")
# Function to handle bulk inserts
bulk_insert <- function(data) {
# Convert list to data frame
params <- do.call(rbind, data)
# Prepare the SQL insert statement
sql <- "INSERT OR IGNORE INTO chrom_data (identifier, info) VALUES (?, ?)"
stmt <- dbSendQuery(db, sql)
# Bind parameters and execute row by row
for (i in 1:nrow(params)) {
dbBind(stmt, params[i, , drop = FALSE])
}
# Clear the statement
dbClearResult(stmt)
}
# Open the file connection
con <- file("data/input/annotations/chrom3_1_c.trn", "r")
# Initialize variables for bulk insert
data_buffer <- list()
buffer_size <- 10000 # Adjust based on performance and system memory
batch_counter <- 0
maxChars <- 61
# Begin a transaction
dbBegin(db)
# Read the file line by line
while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0) {
id <- as.integer(substr(line, 1, 10)) # Convert identifier to integer
info <- trimws(substr(line, 12, maxChars)) # Trim leading/trailing spaces
data_buffer[[length(data_buffer) + 1]] <- list(id, info)
# When buffer reaches the specified size, do a bulk insert
if (length(data_buffer) >= buffer_size) {
bulk_insert(data_buffer)
data_buffer <- list() # Reset buffer after insertion
batch_counter <- batch_counter + 1
if (batch_counter >= 10) {
dbCommit(db) # Commit after every 10 batches
dbBegin(db) # Begin a new transaction
batch_counter <- 0
}
}
}
# Insert any remaining data in the buffer
if (length(data_buffer) > 0) {
bulk_insert(data_buffer)
}
# Commit any remaining transaction
dbCommit(db)
# Close file and finalize database transaction
close(con)
dbDisconnect(db)