Skip to content

Commit

Permalink
add chr_step, length2total
Browse files Browse the repository at this point in the history
  • Loading branch information
mschubert committed Aug 25, 2022
1 parent e06285a commit 6edd518
Showing 1 changed file with 33 additions and 0 deletions.
33 changes: 33 additions & 0 deletions seq/chr_lengths.r
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.genome = import('./genome')$genome
`%>%` = magrittr::`%>%`

#' Get chromosome lengths from assembly ID
#'
Expand All @@ -22,3 +23,35 @@ chr_lengths.default = function(assembly, chrs=NULL) {
chr_lengths = GenomeInfoDb::seqlengths(assembly)
chr_lengths[names(chr_lengths) %in% chrs]
}

#' Generate stepwise data points along chromosomes
#'
#' @param step Step size in bases (default: 1e7)
#' @param assembly Which assembly to use (default: GRCh38)
#' @param chrs Chromosomes to include (default: 1:22,X)
#' @param min_n Minimum number of data points per chromosome
#' @return data.frame with fields: chr, length, steps [list of numeric]
chr_step = function(step=1e7, assembly="GRCh38", chrs=NULL, min_n=7) {
chr_lengths(assembly=assembly, chrs=chrs) %>%
stack() %>%
dplyr::select(chr=ind, length=values) %>%
dplyr::rowwise() %>%
dplyr::mutate(steps = list(seq(1, length, length.out=max(min_n, round(length/step))))) %>%
dplyr::select(-length)
}

#' Convert chromosome positions to total positions
#'
#' @param chr Vector of chromosomes
#' @param pos Vector of chromosome positions
#' @return Numeric vector with absolute chromosome positions
chr2total_pos = function(chr, pos) {
old = data.frame(chr=chr, pos=pos)
add = chr_lengths() %>%
dplyr::transmute(chr = chr,
add_len = c(0, cumsum(as.numeric(length))[-nrow(.)]))

dplyr::left_join(old, add, by="chr") %>%
dplyr::mutate(pos = pos + add_len) %>%
dplyr::pull(pos)
}

0 comments on commit 6edd518

Please sign in to comment.