-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadd_divergent_flag.R
34 lines (24 loc) · 1.06 KB
/
add_divergent_flag.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
library(tidyverse)
library(dplyr)
library(tidyr)
library(magrittr)
library(valr)
library(fuzzyjoin)
library(Bioconductor)
#Load Divergent Regions
divergent <- data.table::fread("~/projects/b1059/projects/Ryan/csq/flat_file/lee2020.divergent_regions_strain.bed",
col.names = c("chrom", "start", "end", "strain"))
#Condense Overlapping Regions (If portion of regions shared by >1 strian
condensed <- divergent %>%
valr::bed_merge() %>%
dplyr::mutate(DIVERGENT = "D") # Add marker to Divergent Region
#Read In Flat File
data <- data.table::fread("~/projects/b1059/projects/Ryan/csq/flat_file/WI.20210121.hard-filter.isotype.bcsq.20210401.pre.flatfile-gene-impact.tsv")
#Join the data - if a position is within a divergent region Divergent tag is added
join <- genome_join(data, condensed, by = c(
"CHROM" = "chrom",
"POS" = "start",
"POS" = "end"),
mode = "left") %>%
select(-chrom, -start, -end)
data.table::fwrite(join, "~/projects/b1059/projects/Ryan/csq/flat_file/WI.20210121.hard-filter.isotype.bcsq.20210401.flatfile.tsv" )