config.yaml

## NOTE THE FOLLOWING:
##    1) All elements in the configuration file must be filled in with no empty
##       variables/details
##    2) Paths in this configuration file must be absolute or relative to the
##       Snakefile location
##    3) The configfile is ordered according to the order in the workflow:
##       quality control, trimming, aligning, sorting and statistical analysis
##    4) Take some time to read and understand all the parameters in this
##       config file to be able to use it correctly

###############################################################################
###############                GENERAL PARAMETERS                ###############
###############################################################################

OUTPUT: output/
METADATA: example/metadata.txt
IS_PAIRED: True
DOCKER_IMAGE: docker://continuumio/miniconda3:4.7.12

###############################################################################
###############                CONDITIONAL RULES                ###############
###############################################################################

CONVERSION_CHECK: False
RUN_TRIMMING: True
RUN_BISMARK: True
RUN_READ_SORTING: True
RUN_DMR_ANALYSIS: True
RUN_DOWNSTREAM: False

###############################################################################
###############                  SPECIAL MODES                  ###############
###############################################################################

# If you data includes only diploids or polyploids and/or you would like to compare two different conditions among same ploidy levels, then use the folllowing paramenters. Please remember to modify the metadata file accordingly

POLYPLOID_ONLY: False
DIPLOID_ONLY: False

###############################################################################
###############                 CONVERSION_CHECK                ###############
###############################################################################

# Path to folder containing the genome used as control for bisulfite conversion efficiency. For plants it's usually the chloroplast genome, while other organisms usually use the lambda genome.

CONTROL_GENOME: /path/to/genome/folder/

###############################################################################
###############                 QUALITY CONTROL                 ###############
###############################################################################

# Path to folder containing gzipped raw fastq data
RAW_DATA: example/Raw_data
# Details about naming and file format. If data is paired-end, the name format
# must be the following: <SAMPLE-NAME>_<PAIR_1>.<RAW_DATA_EXTENSION>.gz and
# <SAMPLE-NAME>_<PAIR_2>.<RAW_DATA_EXTENSION>.gz. By default those are set to
# <SAMPLE-NAME>_R1.fq.gz and <SAMPLE-NAME>_R2.fq.gz
# If data is single-end, then the name format must be:
# <SAMPLE-NAME>.<RAW_DATA_EXTENSION>.gz
PAIR_1: R1
PAIR_2: R2
RAW_DATA_EXTENSION: fastq

###############################################################################
###############                    TRIMMING                     ###############
###############################################################################

# Perform trimming just on the 5' side

TRIM_5_ONLY: False

# 5' bases to trim

CLIP_5_R1: 1
CLIP_5_R2: 1

# Perform trimming just on the 3' side

TRIM_3_ONLY: False

# 3' bases to trim

CLIP_3_R1: 1
CLIP_3_R2: 1

###############################################################################
###############                    ALIGNMENT                    ###############
###############################################################################

# Paths to the folder of the parental genome assemblies

GENOME_PARENT_1: example/genomes/parent1/
GENOME_PARENT_2: example/genomes/parent2/

# If the genomes provided above are unfinished (i.e. have thousands of scoffolds), set the following parameter to True

UNFINISHED_GENOME: False

###############################################################################
##############                    READ SORTING                    ##############
###############################################################################


# Set PHRED score to 64, default is 33
PHRED_SCORE_64: False

# Path to the genome assemblies files
ASSEMBLY_PARENT_1: example/genomes/parent1/parent1.fa
ASSEMBLY_PARENT_2: example/genomes/parent2/parent2.fa


###############################################################################
##############                    DMR ANALYSIS                    ##############
###############################################################################

# Specify DMR analysis only for CG context
ONLY_CG_CONTEXT: False

###############################################################################
##############                 DOWNSTREAM ANALYSIS               ##############
###############################################################################

# Path to annotation files
ANNOTATION_PARENT_1: example/genomes/parent1/anno1.gff
ANNOTATION_PARENT_2: example/genomes/parent2/anno2.gff

# In the 9th column of the annotation file (attributes), there should be a geneID string such as ID=AT123456. Please specify the geneID string for each parent!
# Common strings: ID, geneID, gene
GENE_ID_PARENT_1: "ID"
GENE_ID_PARENT_2: "ID"