Skip to content

Commit

Permalink
include everything from dk repo
Browse files Browse the repository at this point in the history
  • Loading branch information
David Koppstein committed Feb 27, 2023
1 parent 79f1c5c commit b0b4d78
Show file tree
Hide file tree
Showing 13 changed files with 269 additions and 41 deletions.
38 changes: 19 additions & 19 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,41 +14,42 @@ reads_per_batch: 50000
# reads longer than this are filtered out (they can cause bwa-sw to crash)
max_read_length: 100000

# formerly 1000
matrix_resolutions:
base: 1000
base: 10000
zoomify:
- 1000
- 2000
- 5000
# - 1000
# - 2000
# - 5000
- 10000
- 25000
- 50000
- 20000
- 40000
- 100000
- 250000
- 500000
- 1000000
- 2500000
- 5000000
- 10000000
# - 2500000
# - 5000000
# - 10000000
software:
juicer:
tools_url: "https://s3.amazonaws.com/hicfiles.tc4ga.com/public/juicer/juicer_tools_1.19.02.jar"
bwa:
cli_opts: "bwasw -b 5 -q 2 -r 1 -T 15 -z 10"
threads: 10
threads: 64
pore_c:
create_alignment_table:
threads: 1
threads: 32
to_cooler:
threads: 10
threads: 32
to_unsorted_pairs:
threads: 1
threads: 32
sort_pairs_file:
threads: 10
threads: 32
to_salsa_bed:
threads: 4
threads: 32
sort:
threads: 10
threads: 64
memory_per_thread: "4G"
f5c:
run_mode: cpu # gpu
Expand All @@ -57,16 +58,15 @@ software:
settings:
gpu:
binary: "f5c_x86_64_linux_cuda"
threads: 8
threads: 16
gpus: 1
cli_opts: "--cuda-dev-id 0 --cuda-mem-frac 0.7"
cpu:
binary: "f5c_x86_64_linux"
threads: 20
threads: 32
gpus: 0
cli_opts: ""

# mapping_query: ""
mapping_query: ""
pore_c_version: 'rel'

3 changes: 2 additions & 1 deletion config/phased_vcfs.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
# refgenome_id: The reference genome the phased variants were called on, must match an entry in references.tsv
# biospecimen: The sample the phased variants come from, must match corresponding entry in basecalls.tsv
# vcf_path: Path to a tabix-indexed VCF file with phased variants.
#GIAB GRCh38 GM12878 .test/resources/GM12878.phased.conf.vcf.gz
phase_set_id refgenome_id biospecimen vcf_path
GIAB GRCh38 GM12878 .test/resources/GM12878.phased.conf.vcf.gz
129S1CAST GRCm38 9sCa /data/akhtar/Mouse2019AlleleSpecific2/projects/pore-c/resources/snp_genome/129S1_CAST.snp.hicpro_reformatted.vcf.gz
4 changes: 2 additions & 2 deletions config/references.tsv
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# references.tsv - One entry per genome assembly (eg reference genome, draft genome assembly or scaffold).
# refgenome_id: A unique id to identify the reference/assmembly/scaffold you're mapping against
# refgenome_path: The path of the source fasta path. If a relative path is given then it is relative to the snakemake workdir
#draft1 .test/resources/GRCh38.fasta.gz
refgenome_id refgenome_path
GRCh38 .test/resources/GRCh38.fasta.gz
draft1 .test/resources/GRCh38.fasta.gz
GRCm38 /data/repository/organisms/GRCm38_ensembl/genome_fasta/genome.fa
196 changes: 190 additions & 6 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,193 @@
name: pore-c-snakemake
channels:
- conda-forge
- bioconda
- defaults
- anaconda
- conda-forge
- bioconda
- defaults
dependencies:
- pandas ==1.0.5
- python-box ==4.2.3
- snakemake ==5.19.3
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_gnu
- abseil-cpp=20211102.0=h27087fc_1
- aioeasywebdav=2.4.0=pyha770c72_0
- aiohttp=3.8.1=py310h5764c6d_1
- aiosignal=1.2.0=pyhd8ed1ab_0
- amply=0.1.5=pyhd8ed1ab_0
- appdirs=1.4.4=pyh9f0ad1d_0
- async-timeout=4.0.2=pyhd8ed1ab_0
- attmap=0.13.2=pyhd8ed1ab_0
- attrs=22.1.0=pyh71513ae_1
- backports=1.0=py_2
- backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
- bcrypt=3.2.2=py310h5764c6d_0
- boto3=1.24.46=pyhd8ed1ab_0
- botocore=1.27.46=pyhd8ed1ab_0
- brotlipy=0.7.0=py310h5764c6d_1004
- bzip2=1.0.8=h7f98852_4
- c-ares=1.18.1=h7f98852_0
- ca-certificates=2022.6.15=ha878542_0
- cachetools=5.0.0=pyhd8ed1ab_0
- certifi=2022.6.15=py310hff52083_0
- cffi=1.15.1=py310h255011f_0
- charset-normalizer=2.1.0=pyhd8ed1ab_0
- cni=1.0.1=ha975731_1
- cni-plugins=1.0.1=ha8f183a_0
- coin-or-cbc=2.10.8=h3786ebc_0
- coin-or-cgl=0.60.6=h6f57e76_1
- coin-or-clp=1.17.7=hc56784d_1
- coin-or-osi=0.108.7=h2720bb7_1
- coin-or-utils=2.11.6=h202d8b1_1
- coincbc=2.10.8=0_metapackage
- commonmark=0.9.1=py_0
- configargparse=1.5.3=pyhd8ed1ab_0
- connection_pool=0.0.3=pyhd3deb0d_0
- cryptography=37.0.4=py310h597c629_0
- dataclasses=0.8=pyhc8e2a94_3
- datrie=0.8.2=py310h6acc77f_3
- decorator=5.1.1=pyhd8ed1ab_0
- defusedxml=0.7.1=pyhd8ed1ab_0
- docutils=0.19=py310hff52083_0
- dpath=2.0.6=py310hff52083_1
- drmaa=0.7.9=py310h06a4308_0
- dropbox=11.33.0=pyhd8ed1ab_0
- filechunkio=1.8=py_2
- filelock=3.7.1=pyhd8ed1ab_0
- frozenlist=1.3.1=py310h5764c6d_0
- ftputil=5.0.4=pyhd8ed1ab_0
- future=0.18.2=py310hff52083_5
- gitdb=4.0.9=pyhd8ed1ab_0
- gitpython=3.1.27=pyhd8ed1ab_0
- google-api-core=2.8.2=pyhd8ed1ab_0
- google-api-python-client=2.55.0=pyhd8ed1ab_0
- google-auth=2.10.0=pyh6c4a22f_0
- google-auth-httplib2=0.1.0=pyhd8ed1ab_1
- google-cloud-core=2.3.2=pyhd8ed1ab_0
- google-cloud-storage=2.4.0=pyh6c4a22f_0
- google-crc32c=1.1.2=py310he8fe98e_3
- google-resumable-media=2.3.3=pyhd8ed1ab_0
- googleapis-common-protos=1.56.4=py310hff52083_0
- grpc-cpp=1.48.0=hbd84cd8_0
- grpcio=1.48.0=py310ha0b7d45_0
- httplib2=0.20.4=pyhd8ed1ab_0
- icu=70.1=h27087fc_0
- idna=3.3=pyhd8ed1ab_0
- importlib-metadata=4.11.4=py310hff52083_0
- importlib_resources=5.9.0=pyhd8ed1ab_0
- iniconfig=1.1.1=pyh9f0ad1d_0
- jinja2=3.1.2=pyhd8ed1ab_1
- jmespath=1.0.1=pyhd8ed1ab_0
- jq=1.6=h36c2ea0_1000
- jsonschema=4.9.1=pyhd8ed1ab_0
- jupyter_core=4.11.1=py310hff52083_0
- ld_impl_linux-64=2.36.1=hea4e1c9_2
- libarchive=3.5.2=hb890918_3
- libblas=3.9.0=15_linux64_openblas
- libcblas=3.9.0=15_linux64_openblas
- libcrc32c=1.1.2=h9c3ff4c_0
- libffi=3.4.2=h7f98852_5
- libgcc-ng=12.1.0=h8d9b700_16
- libgfortran-ng=12.1.0=h69a702a_16
- libgfortran5=12.1.0=hdcd56e2_16
- libgomp=12.1.0=h8d9b700_16
- libiconv=1.16=h516909a_0
- liblapack=3.9.0=15_linux64_openblas
- liblapacke=3.9.0=15_linux64_openblas
- libnsl=2.0.0=h7f98852_0
- libopenblas=0.3.20=pthreads_h78a6416_1
- libprotobuf=3.20.1=h6239696_0
- libseccomp=2.4.4=h7f98852_1
- libsodium=1.0.18=h36c2ea0_1
- libstdcxx-ng=12.1.0=ha89aaad_16
- libuuid=2.32.1=h7f98852_1000
- libxml2=2.9.14=h22db469_3
- libzlib=1.2.12=h166bdaf_2
- logmuse=0.2.6=pyh8c360ce_0
- lz4-c=1.9.3=h9c3ff4c_1
- lzo=2.10=h516909a_1000
- markupsafe=2.1.1=py310h5764c6d_1
- multidict=6.0.2=py310h5764c6d_1
- nbformat=5.4.0=pyhd8ed1ab_0
- ncurses=6.3=h27087fc_1
- numpy=1.23.1=py310h53a5b5f_0
- oauth2client=4.1.3=py_0
- oniguruma=6.9.8=h166bdaf_0
- openssl=1.1.1q=h166bdaf_0
- packaging=21.3=pyhd8ed1ab_0
- pandas=1.4.3=py310h769672d_0
- paramiko=2.11.0=pyhd8ed1ab_0
- peppy=0.32.0=pyhd8ed1ab_1
- pip=22.2.2=pyhd8ed1ab_0
- pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0
- plac=1.3.5=pyhd8ed1ab_0
- pluggy=1.0.0=py310hff52083_3
- ply=3.11=py_1
- prettytable=3.3.0=pyhd8ed1ab_0
- protobuf=3.20.1=py310hd8f1fbe_0
- psutil=5.9.1=py310h5764c6d_0
- pulp=2.6.0=py310hff52083_1
- py=1.11.0=pyh6c4a22f_0
- pyasn1=0.4.8=py_0
- pyasn1-modules=0.2.7=py_0
- pycparser=2.21=pyhd8ed1ab_0
- pygments=2.12.0=pyhd8ed1ab_0
- pynacl=1.5.0=py310h5764c6d_1
- pyopenssl=22.0.0=pyhd8ed1ab_0
- pyparsing=3.0.9=pyhd8ed1ab_0
- pyrsistent=0.18.1=py310h5764c6d_1
- pysftp=0.2.9=py_1
- pysocks=1.7.1=py310hff52083_5
- pytest=7.1.2=py310hff52083_0
- python=3.10.5=h582c2e5_0_cpython
- python-box=6.0.2=py310h5764c6d_3
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python-fastjsonschema=2.16.1=pyhd8ed1ab_0
- python-irodsclient=1.1.4=pyhd8ed1ab_0
- python_abi=3.10=2_cp310
- pytz=2022.1=pyhd8ed1ab_0
- pyu2f=0.1.5=pyhd8ed1ab_0
- pyyaml=6.0=py310h5764c6d_4
- ratelimiter=1.2.0=py_1002
- re2=2022.06.01=h27087fc_0
- readline=8.1.2=h0f457ee_0
- requests=2.28.1=pyhd8ed1ab_0
- retry=0.9.2=py_0
- rich=12.5.1=pyhd8ed1ab_0
- rsa=4.9=pyhd8ed1ab_0
- ruamel.yaml=0.17.21=py310h5764c6d_1
- ruamel.yaml.clib=0.2.6=py310h5764c6d_1
- s3transfer=0.6.0=pyhd8ed1ab_0
- setuptools=63.4.2=py310hff52083_0
- setuptools-scm=7.0.5=pyhd8ed1ab_0
- singularity=3.8.6=h9c2343c_0
- six=1.16.0=pyh6c4a22f_0
- slacker=0.14.0=py_0
- smart_open=6.0.0=pyhd8ed1ab_0
- smmap=3.0.5=pyh44b312d_0
- snakemake=7.12.0=hdfd78af_0
- snakemake-minimal=7.12.0=pyhdfd78af_0
- sqlite=3.39.2=h4ff8645_0
- squashfs-tools=4.4=hd0129a2_3
- stone=3.3.1=pyhd8ed1ab_0
- stopit=1.1.2=py_0
- tabulate=0.8.10=pyhd8ed1ab_0
- tk=8.6.12=h27826a3_0
- toml=0.10.2=pyhd8ed1ab_0
- tomli=2.0.1=pyhd8ed1ab_0
- toposort=1.7=pyhd8ed1ab_0
- traitlets=5.3.0=pyhd8ed1ab_0
- typing-extensions=4.3.0=hd8ed1ab_0
- typing_extensions=4.3.0=pyha770c72_0
- tzdata=2022a=h191b570_0
- ubiquerg=0.6.2=pyhd8ed1ab_0
- uritemplate=4.1.1=pyhd8ed1ab_0
- urllib3=1.26.11=pyhd8ed1ab_0
- veracitools=0.1.3=py_0
- wcwidth=0.2.5=pyh9f0ad1d_2
- wheel=0.37.1=pyhd8ed1ab_0
- wrapt=1.14.1=py310h5764c6d_0
- xz=5.2.5=h516909a_1
- yaml=0.2.5=h7f98852_2
- yarl=1.7.2=py310h5764c6d_2
- yte=1.5.1=py310hff52083_0
- zipp=3.8.1=pyhd8ed1ab_0
- zlib=1.2.12=h166bdaf_2
- zstd=1.5.2=h8a70e8d_3
2 changes: 1 addition & 1 deletion envs/cooler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ channels:
- conda-forge
- bioconda
dependencies:
- cooler==0.8.11
- cooler
3 changes: 1 addition & 2 deletions envs/pore_c.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,4 @@ channels:
dependencies:
- pore-c==0.4.0
- python==3.8
- aws-sdk-cpp=1.8.186=h9ad65fb_2

- pysam==0.19.1
2 changes: 1 addition & 1 deletion envs/whatshap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ channels:
- bioconda
- defaults
dependencies:
- whatshap ==1.0
- whatshap=1.4
14 changes: 12 additions & 2 deletions rules/exports.smk
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,21 @@ rule to_cooler:
fragments=paths.virtual_digest.fragments,
params:
prefix=to_prefix(paths.matrix.cool, 1),
cooler_resolution=config["matrix_resolutions"]["base"],
log:
to_log(paths.matrix.cool),
benchmark:
to_benchmark(paths.matrix.cool)
threads: config["software"]["pore_c"]["to_cooler"]["threads"]
resources:
mem_mb=32000
conda:
PORE_C_CONDA_FILE
shell:
"pore_c {DASK_SETTINGS} --dask-num-workers {threads} "
" contacts export {input.contacts} cooler {params.prefix} --fragment-table {input.fragments} --chromsizes {input.chromsizes} 2>{log} "
" contacts export {input.contacts} cooler {params.prefix} "
"--cooler-resolution {params.cooler_resolution} "
"--fragment-table {input.fragments} --chromsizes {input.chromsizes} 2>{log} "


rule to_haplotyped_cooler:
Expand All @@ -28,6 +33,7 @@ rule to_haplotyped_cooler:
fragments=paths.virtual_digest.fragments,
params:
prefix=to_prefix(paths.matrix.haplotyped_cools, 2),
cooler_resolution=config["matrix_resolutions"]["base"],
log:
to_log(paths.matrix.haplotyped_cools),
benchmark:
Expand All @@ -37,7 +43,9 @@ rule to_haplotyped_cooler:
PORE_C_CONDA_FILE
shell:
"pore_c {DASK_SETTINGS} --dask-num-workers {threads} "
" contacts export {input.contacts} cooler {params.prefix} --by-haplotype --fragment-table {input.fragments} --chromsizes {input.chromsizes} 2>{log} "
" contacts export {input.contacts} cooler {params.prefix} "
"--cooler-resolution {params.cooler_resolution} "
"--by-haplotype --fragment-table {input.fragments} --chromsizes {input.chromsizes} 2>{log} "


rule create_mcool_file:
Expand All @@ -53,6 +61,8 @@ rule create_mcool_file:
to_log(paths.matrix.mcool),
conda:
"../envs/cooler.yml"
resources:
mem_mb=16000
threads: 1
shell:
"cooler zoomify -n {threads} -r {params.resolutions} -o {output} {input} 2>{log}"
Expand Down
Loading

0 comments on commit b0b4d78

Please sign in to comment.