Skip to content
This repository has been archived by the owner on Dec 19, 2023. It is now read-only.

Commit

Permalink
generate bigBED, store in bigbed_files
Browse files Browse the repository at this point in the history
  • Loading branch information
xuebingjie1990 committed Dec 18, 2020
1 parent 2daafc0 commit 8665def
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 8 deletions.
41 changes: 40 additions & 1 deletion bedmaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import pypiper
import os
import sys
import tempfile
import pandas as pd
# import pyBigWig
import gzip
import shutil
Expand All @@ -19,7 +21,8 @@
parser.add_argument("-r", "--rfg-config", help="file path to the genome config file", type=str)
parser.add_argument("-o", "--output-file", help="path to the output BED files", type=str)
parser.add_argument("-s", "--sample-name", help="name of the sample used to systematically build the output name", type=str)

parser.add_argument("--output-bigbed", help="path to the output bigBED files", type=str)
parser.add_argument("--chrom-size", help="a full path to the chrom.sizes required for the bedtobigbed conversion", type=str)

# add pypiper args to make pipeline looper compatible
parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "looper"],
Expand Down Expand Up @@ -142,6 +145,42 @@ def main():
cmd = [cmd]
cmd.append(gzip_cmd)
pm.run(cmd, target=args.output_file)

bedfile_name = os.path.split(args.output_file)[1]
fileid = os.path.splitext(os.path.splitext(bedfile_name)[0])[0]
# Produce bigBed (bigNarrowPeak) file from peak file
bigNarrowPeak = os.path.join(args.output_bigbed, fileid + ".bigBed")
temp = tempfile.NamedTemporaryFile(dir=args.output_bigbed, delete=False)
print ("test bigbed saving path: ", bigNarrowPeak)
print ("test chrom.sizes path: ", args.chrom_size)
if not os.path.exists(bigNarrowPeak):
df = pd.read_csv(args.output_file, sep='\t', header=None,
names=("V1","V2","V3","V4","V5","V6",
"V7","V8","V9","V10")).sort_values(by=["V1","V2"])
df.to_csv(temp.name, sep='\t', header=False, index=False)
pm.clean_add(temp.name)
print ("BED: \n", df)
as_file = os.path.join(args.output_bigbed, "bigNarrowPeak.as")
cmd = ("echo 'table bigNarrowPeak\n" +
"\"BED6+4 Peaks of signal enrichment based on pooled, normalized (interpreted) data.\"\n" +
"(\n" +
" string chrom; \"Reference sequence chromosome or scaffold\"\n" +
" uint chromStart; \"Start position in chromosome\"\n" +
" uint chromEnd; \"End position in chromosome\"\n" +
" string name; \"Name given to a region (preferably unique). Use . if no name is assigned\"\n" +
" uint score; \"Indicates how dark the peak will be displayed in the browser (0-1000) \"\n" +
" char[1] strand; \"+ or - or . for unknown\"\n" +
" float signalValue; \"Measurement of average enrichment for the region\"\n" +
" float pValue; \"Statistical significance of signal value (-log10). Set to -1 if not used.\"\n" +
" float qValue; \"Statistical significance with multiple-test correction applied (FDR -log10). Set to -1 if not used.\"\n" +
" int peak; \"Point-source called for this peak; 0-based offset from chromStart. Set to -1 if no point-source called.\"\n" +
")' > " + as_file)
pm.run(cmd, as_file, clean=True)

cmd = ("bedToBigBed -as=" + as_file + " -type=bed6+4 " +
temp.name + " " + args.chrom_size + " " + bigNarrowPeak)
pm.run(cmd, bigNarrowPeak, nofail=True)

pm.stop_pipeline()


Expand Down
10 changes: 6 additions & 4 deletions pep_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,16 @@ properties:
output_file_path:
type: string
description: "absolute path the file to the output BED file (derived attribute)"
output_bigbed_path:
type: string
description: "absolute path the file to the output bigBED file (derived attribute)"
genome:
type: string
description: "organism genome code"
enum: ["hg18", "hg19", "hg38", "mm9", "mm10"]
narrowpeak:
type: integer
minimum: 0
maximum: 1
description: "binary number indicating whether the regions are narrow (transcription factor implies narrow, histone mark implies broad peaks)"
type: boolean
description: "whether the regions are narrow (transcription factor implies narrow, histone mark implies broad peaks)"
format:
type: string
description: "file format"
Expand All @@ -49,6 +50,7 @@ properties:
required:
- input_file_path
- output_file_path
- output_bigbed_path
- genome
- narrowpeak
- sample_name
Expand Down
9 changes: 6 additions & 3 deletions pipeline_interface.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
pipeline_name: BEDMAKER
pipeline_type: sample
path: bedmaker.py
input_schema: http://schema.databio.org/pipelines/bedmaker.yaml
var_templates:
path: "{looper.piface_dir}/bedmaker.py"
input_schema: "/home/bx2ur/Documents/GitHubRepo/bedmaker/pep_schema.yaml"

This comment has been minimized.

Copy link
@stolarczyk

stolarczyk Dec 21, 2020

Member

this would work only for you. You can edit this file and then specify the url here: http://schema.databio.org/pipelines/bedmaker.yaml

This comment has been minimized.

Copy link
@xuebingjie1990

xuebingjie1990 Dec 21, 2020

Author Member

i also made some changes to the bedfiles_schema.yaml. can I just push to the dev branch? or create a new trackHub branch?

This comment has been minimized.

Copy link
@stolarczyk

stolarczyk via email Dec 21, 2020

Member
command_template: >
{pipeline.path}
{pipeline.var_templates.path}
--input-file {sample.input_file_path}
--output-file {sample.output_file_path}
--output-bigbed {sample.output_bigbed_path}
--narrowpeak {sample.narrowpeak}
--input-type {sample.format}
--genome {sample.genome}
--sample-name {sample.sample_name}
{% if sample.rfg_config is defined %} --rfg-config {sample.rfg_config} {% endif %}
{% if sample.chrom_size is defined %} --chrom-size {sample.chrom_size} {% endif %}
compute:
size_dependent_variables: resources-sample.tsv

Expand Down

0 comments on commit 8665def

Please sign in to comment.