diff --git a/cell-seek b/cell-seek
index 25f30cf..7a3479e 100755
--- a/cell-seek
+++ b/cell-seek
@@ -39,7 +39,7 @@ import argparse # potential python3 3rd party package, added in python/3.5
# Local imports
from src import version
-from src.run import init, setup, bind, dryrun, runner
+from src.run import init, setup, bind, dryrun, runner, finalcheck
from src.shells import bash
from src.utils import (
Colors,
@@ -139,8 +139,16 @@ def run(sub_args):
# Dryrun pipeline
dryrun_output = dryrun(outdir = sub_args.output) # python3 returns byte-string representation
print("\nDry-running {} pipeline:\n{}".format(_name, dryrun_output.decode("utf-8")))
+ for arg_check in ['rename', 'libraries']:
+ if config['options'][arg_check] != 'None':
+ finalcheck(config=config, flag=arg_check)
sys.exit(0)
+ # Step 4b. Perform final check of input files
+ for arg_check in ['rename', 'libraries']:
+ if config['options'][arg_check] != 'None':
+ finalcheck(config=config, flag=arg_check)
+
# Step 5. Orchestrate pipeline execution,
# run pipeline in locally on a compute node
# for debugging purposes or submit the master
@@ -542,14 +550,21 @@ def parsed_arguments(name, description):
--rename RENAME
Rename sample file. A CSV file containing the name of the FASTQ
file and the new name of the sample. Only the samples listed in
- the CSV files will be ran. This flag is currently only applicable
- when dealing with GEX projects.
+ the CSV files will be run. This flag is only applicable when
+ dealing with GEX, VDJ, or ATAC projects. Renaming samples in the
+ other pipelines can be achieved via the libraries file.
Here is an example rename.csv file:
- FASTQ,Sample
+ FASTQ,Name
original_name1,new_name1
original_name2,new_name1
original_name3,new_name2
original_name4,new_name3
+ In this example, new_name3 has FASTQ files with two different
+ names. With this input, both sets of FASTQ files will be used
+ when processing the sample as new_name3. original_name4 will not
+ be renamed. Any FASTQ file that does not have the name
+ original_name1, original_name2, original_name3, or original_name4
+ will not be run.
Example: --rename rename.csv
{3}{4}Orchestration options:{5}
diff --git a/config/genome.json b/config/genome.json
index 4d1e370..62c67fe 100644
--- a/config/genome.json
+++ b/config/genome.json
@@ -22,7 +22,7 @@
"mm2024": {
"gex_transcriptome": "/data/OpenOmics/references/cell-seek/mouse/refdata-gex-GRCm39-2024-A",
"cite_transcriptome": "/data/OpenOmics/references/cell-seek/mouse/refdata-gex-GRCm39-2024-A",
- "vdj_ref": "/data/NCBR/references/cellranger_references/refdata-cellranger-vdj-GRCh38-alts-ensembl-7.1.0/"
+ "vdj_ref": "/data/NCBR/references/cellranger_references/refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0/"
}
}
}
diff --git a/docs/usage/run.md b/docs/usage/run.md
index d1e25e9..b4e107c 100644
--- a/docs/usage/run.md
+++ b/docs/usage/run.md
@@ -17,6 +17,7 @@ $ cell-seek run [--help] \
[--aggregate {{mapped, none}}] [--exclude-introns] \
[--library LIBRARIES] [--features FEATURES] \
[--filter FILTER] [--metadata METADATA] [--create-bam] \
+ [--rename RENAME] \
--input INPUT [INPUT ...] \
--output OUTPUT \
--pipeline {gex, ...} \
@@ -161,6 +162,31 @@ Each of the following arguments are optional, and do not need to be provided.
>
> ***Example:*** `--metadata metadata.csv`
+---
+ `--rename RENAME`
+> **Rename sample file.**
+> *type: file*
+>
+> Rename sample file. A CSV file containing the name of the FASTQ file and the new name of the sample. Only the samples listed in the CSV files will be run.
+>
+> *Here is an example rename.csv file:*
+> ```
+> FASTQ,Name
+> original_name1,new_name1
+> original_name2,new_name2
+> original_name3,new_name3
+> original_name3-2,new_name3
+> original_name4,original_name4
+> ```
+>
+> *Where:*
+>
+> - *FASTQ:* The name that is used in the FASTQ file
+> - *Name:* Unique sample ID that is the sample name used for Cell Ranger count.
+>
+> In this example, new_name3 has FASTQ files with two different names. With this input, both sets of FASTQ files will be used when processing the sample as new_name3. original_name4 will not be renamed. Any FASTQ file that does not have the name original_name1, original_name2, original_name3, or original_name4 will not be run.
+>
+> ***Example:*** `--rename rename.csv`
### 2.2 VDJ
@@ -199,7 +225,7 @@ Each of the following arguments are required. Failure to provide a required argu
> **Reference genome.**
> *type: string*
>
-> This option defines the reference genome of the samples. cell-seek does comes bundled with prebuilt reference files for human and mouse samples, e.g. hg38 or mm10. Since there is no 2024 release VDJ reference, if hg2024 or mm2024 is selected the VDJ reference CR 7.1 release will be used.
+> This option defines the reference genome of the samples. cell-seek does comes bundled with prebuilt reference files for human and mouse samples, e.g. hg38 or mm10. Since there is no 2024 release VDJ reference, if hg2024 or mm2024 is selected the VDJ reference CR 7.1 release will be used for human, and CR 7.0 release will be used for mouse.
>
> A custom reference genome can also be provided via a json file. Additional information for creating this json file can be found in [cell-seek genome
](../genome).
>
@@ -218,7 +244,30 @@ Each of the following arguments are required. Failure to provide a required argu
#### 2.2.2 Analysis Options
-The VDJ pipeline currently does not have any applicable analysis flags.
+ `--rename RENAME`
+> **Rename sample file.**
+> *type: file*
+>
+> Rename sample file. A CSV file containing the name of the FASTQ file and the new name of the sample. Only the samples listed in the CSV files will be run.
+>
+> *Here is an example rename.csv file:*
+> ```
+> FASTQ,Name
+> original_name1,new_name1
+> original_name2,new_name2
+> original_name3,new_name3
+> original_name3-2,new_name3
+> original_name4,original_name4
+> ```
+>
+> *Where:*
+>
+> - *FASTQ:* The name that is used in the FASTQ file
+> - *Name:* Unique sample ID that is the sample name used for Cell Ranger count.
+>
+> In this example, new_name3 has FASTQ files with two different names. With this input, both sets of FASTQ files will be used when processing the sample as new_name3. original_name4 will not be renamed. Any FASTQ file that does not have the name original_name1, original_name2, original_name3, or original_name4 will not be run.
+>
+> ***Example:*** `--rename rename.csv`
### 2.3 CITE
@@ -393,7 +442,7 @@ Each of the following arguments are required. Failure to provide a required argu
> **Reference genome.**
> *type: string*
>
-> This option defines the reference genome of the samples. cell-seek does comes bundled with prebuilt reference files for human and mouse samples, The options hg38 or mm10 would select the 2020 release of the reference. The options hg2024 or mm2024 would select the 2024 release of the reference. More information about the officially released references can be found on the [10x Genomics website](https://www.10xgenomics.com/support/software/cell-ranger/latest/release-notes/cr-reference-release-notes). Since there is no 2024 released VDJ reference, if hg2024 or mm2024 is selected in a run that includes VDJ data, the VDJ reference CR 7.1 release will be used.
+> This option defines the reference genome of the samples. cell-seek does comes bundled with prebuilt reference files for human and mouse samples, The options hg38 or mm10 would select the 2020 release of the reference. The options hg2024 or mm2024 would select the 2024 release of the reference. More information about the officially released references can be found on the [10x Genomics website](https://www.10xgenomics.com/support/software/cell-ranger/latest/release-notes/cr-reference-release-notes). Since there is no 2024 released VDJ reference, if hg2024 or mm2024 is selected in a run that includes VDJ data, the VDJ reference CR 7.1 release will be used for human, and CR 7.0 release will be used for mouse.
>
> A custom reference genome can also be provided via a json file. Additional information for creating this json file can be found in [cell-seek genome
](../genome).
>
@@ -586,7 +635,29 @@ Each of the following arguments are required. Failure to provide a required argu
#### 2.5.2 Analysis Options
-The ATAC pipeline currently does not have any applicable analysis flags.
+> **Rename sample file.**
+> *type: file*
+>
+> Rename sample file. A CSV file containing the name of the FASTQ file and the new name of the sample. Only the samples listed in the CSV files will be run.
+>
+> *Here is an example rename.csv file:*
+> ```
+> FASTQ,Name
+> original_name1,new_name1
+> original_name2,new_name2
+> original_name3,new_name3
+> original_name3-2,new_name3
+> original_name4,original_name4
+> ```
+>
+> *Where:*
+>
+> - *FASTQ:* The name that is used in the FASTQ file
+> - *Name:* Unique sample ID that is the sample name used for Cell Ranger count.
+>
+> In this example, new_name3 has FASTQ files with two different names. With this input, both sets of FASTQ files will be used when processing the sample as new_name3. original_name4 will not be renamed. Any FASTQ file that does not have the name original_name1, original_name2, original_name3, or original_name4 will not be run.
+>
+> ***Example:*** `--rename rename.csv`
### 2.6 Multiome
diff --git a/src/run.py b/src/run.py
index 5100b3d..303c864 100755
--- a/src/run.py
+++ b/src/run.py
@@ -510,6 +510,7 @@ def _require(fields, d, lib):
# makes it so the order of the
# columns does not matter
indices = {}
+
with open(libraries_file) as fh:
try:
header = next(fh).strip().split(delimeter)
@@ -537,8 +538,6 @@ def _require(fields, d, lib):
return config
-
-
def check_reference_file(reference_file, flag, delimeter = ','):
"""Check reference information from the features
or cmo reference file. The reference file is a CSV
@@ -600,6 +599,156 @@ def _require(fields, d, lib, flag):
_require(['id', 'name', 'read', 'pattern', 'sequence', 'feature_type'], indices, reference_file, flag)
+
+def check_rename_file(config, rename_file, delimeter = ','):
+ """Check sample information from the rename file.
+ The rename file is a CSV file containing information
+ about each sample. It contains each sample's name
+ and its associated demultiplexed (FastQ) name. The
+ relationship between samples provided to the --input
+ option and samples listed in the rename file is 1:many.
+ This is because sets of FastQ files with different
+ names that are from the same sample. It contains each
+ unique set of FASTQ name and sample name.
+ @params config :
+ Config dictionary containing metadata to run pipeline
+ @params rename_file :
+ rename file containing information about each sample
+ @params flag :
+ Config flag that was used to provide the rename file
+
+ """
+ def _require(fields, d, lib):
+ """Private function that checks to see if all required fields
+ are provided in the reference file. If nan item in fields does
+ not exist in d, then the user forget to add this required field.
+ """
+ missing = []
+ for f in fields:
+ try:
+ i = d[f]
+ except KeyError:
+ missing.append(f)
+ pass
+ if missing:
+ fatal(
+ f"Error: Missing required fields in --rename {{}} file!\n \
+ └── Please add information for the following field(s): {{}}".format(
+ lib,
+ ','.join([f.lower() for f in missing])
+ )
+ )
+
+ return
+
+ # Get file extension to determine
+ # the appropriate file delimeter
+ extension = os.path.splitext(rename_file)[-1].lower()
+ if extension in ['.tsv', '.txt', '.text', '.tab']:
+ # file is tab seperated
+ delimeter = '\t'
+ # Find index of file dynamically,
+ # makes it so the order of the
+ # columns does not matter
+ indices = {}
+ with open(rename_file) as fh:
+ try:
+ header = next(fh).strip().split(delimeter)
+ except StopIteration:
+ fatal(
+ f'Error: --rename {{}} cannot be empty!\n \
+ └── Please ensure the file is not empty before proceeding again.'.format(reference_file)
+ )
+ for i in range(len(header)):
+ colname = header[i].strip().lower()
+ indices[colname] = i
+ _require(['fastq', 'name'], indices, rename_file)
+
+
+
+def finalcheck(config, flag, delimeter=','):
+ """Check the contents of the rename or libraries
+ file against input. This function checks to see if
+ the input files are not used in the rename/libraries
+ file and prints a warning if that occurs. If either
+ file lists a FASTQ file or path that is not included
+ in the input and throws an error if that is detected.
+ @params config :
+ Config dictionary containing metadata to run pipeline
+ @params flag :
+ Config flag that was used to provide the input file
+ """
+ filename = config['options'][flag]
+
+ extension = os.path.splitext(filename)[-1].lower()
+ if extension in ['.tsv', '.txt', '.text', '.tab']:
+ # file is tab seperated
+ delimeter = '\t'
+
+ # Find index of file dynamically,
+ # makes it so the order of the
+ # columns does not matter
+ indices = {}
+
+ # Dictionary holding unique contents from files to use for comparisons
+ contents = {}
+ with open(filename) as fh:
+ try:
+ header = next(fh).strip().split(delimeter)
+ except StopIteration:
+ fatal(
+ f'Error: --rename {{}} cannot be empty!\n \
+ └── Please ensure the file is not empty before proceeding again.'.format(reference_file)
+ )
+ for i in range(len(header)):
+ colname = header[i].strip().lower()
+ indices[colname] = i
+ for line in fh:
+ linelist = line.strip().split(delimeter)
+ for i in indices:
+ values = contents.get(i, set())
+ values.add(linelist[indices[i]])
+ contents[i] = values
+
+ # Compiles the sample names and fastq paths from the input (config)
+ samples = set([re.sub("_S[0-9]+_L00[0-9]", "", i) for i in config['samples']])
+ fastq_paths = set([os.path.dirname(i) for i in config['options']['input']])
+
+ for index_name in indices:
+ comparison = contents[index_name]
+
+ #Check the FASTQ names against the sample (fastq) names provided in the input files
+ if index_name in ['sample', 'fastq']:
+ if samples != comparison:
+ if len(samples-comparison) > 0:
+ print(f"\nWarning: Some FASTQs will be skipped! \nWarning: --{{}} {{}} does not contain values for all provided FASTQ files.\n \
+ └── Please note that no sample names have been provided for FASTQ files with the following id(s): {{}} \n \
+ These FASTQ files will be skipped when running the pipeline.".format(flag, filename, ','.join(samples-comparison)))
+ if len(comparison-samples) > 0:
+ fatal(
+ f'\nError: --{{}} {{}} contains values in FASTQ column that is not in the provided FASTQ files!\n \
+ └── Please note that the followed listed FASTQ names are not found in the input files: {{}} '.format(flag, filename, ','.join(comparison-samples))
+ )
+
+ if index_name == 'flowcell':
+ # Check to see which values in file are not found in fastq_paths
+ missing_file = set([i for i in comparison if sum([i in fastq_path for fastq_path in fastq_paths]) == 0])
+
+ # Check to see which fastq_paths from input are not found in the flowcell values in file
+ missing_path = set([fastq_path for fastq_path in fastq_paths if sum([i in fastq_path for i in comparison]) == 0])
+
+ if len(missing_path) > 0:
+ print(f"\nWarning: Some FASTQs will be skipped! \nWarning: --{{}} {{}} does not contain values for all provided FASTQ paths.\n \
+ └── Please note that no samples contain flowcells that are on the following path(s): \n \
+ {{}} \n \
+ Any FASTQ files in these paths will be skipped when running the pipeline.".format(flag, filename, ','.join(missing_path)))
+ if len(missing_file) > 0:
+ fatal(
+ f'\nError: --{{}} {{}} contains values in FASTQ column that is not in the provided FASTQ files!\n \
+ └── Please note that the followed listed FASTQ names are not found in the input files: {{}} '.format(flag, filename, ','.join(missing_file))
+ )
+
+
def check_conditional_parameters(config):
"""Check the compiled config fictionary to ensure
that any parameters that are only required for
@@ -710,6 +859,10 @@ def add_rawdata_information(sub_args, config, ifiles):
reference = sub_args.cmo_reference
check_reference_file(reference_file = reference, flag = "cmo_reference")
+ if sub_args.rename != None:
+ rename = sub_args.rename
+ check_rename_file(rename_file = rename, config=config)
+
return config
diff --git a/workflow/Snakefile b/workflow/Snakefile
index bfdd02f..a968cea 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -28,7 +28,7 @@ features = config['options']['features'] # Features files for cellranger (
libraries = config['options']['libraries'] # Libraries files for cellranger (not used in all pipelines)
cmo_ref = config['options']['cmo_reference'] # CMO Reference files for cellranger (only used in multi pipeline)
cmo_sample = config['options']['cmo_sample'] # CMO Sample files for cellranger (only used in multi pipelines)
-rename = config['options']['rename'] # File containing how to rename samples when running Cell Ranger analysis
+RENAME = config['options']['rename'] # File containing how to rename samples when running Cell Ranger analysis
exclude_introns = str_bool( # Use introns for pre mRNA,
config['options']['exclude_introns'] # default: False
)
@@ -44,16 +44,17 @@ if 'libraries' in config:
pipeline_output = []
-rename_dict = dict()
-if rename != 'None':
- with open(rename) as f:
+RENAME_DICT = dict() #Dictionary containing information on renamed samples where the keys are the FASTQ file names and the values are the Cell Ranger sample names
+if RENAME != 'None':
+ with open(RENAME) as f:
tabs = [i.lower() for i in next(f).strip().split(',')]
index_fastq = [i for i in range(len(tabs)) if 'fastq' in tabs[i]][0]
- index_sample = [i for i in range(len(tabs)) if 'sample' in tabs[i]][0]
+ index_sample = [i for i in range(len(tabs)) if 'name' in tabs[i]][0]
for line in f:
line = line.strip().split(',')
- rename_dict[line[index_fastq]] = line[index_sample]
- samples = list((set(samples) - set(rename_dict.keys())).union(rename_dict.values()))
+ RENAME_DICT[line[index_fastq]] = line[index_sample]
+ samples = list(set(RENAME_DICT.values()))
+# samples = list((set(samples) - set(RENAME_DICT.keys())).union(RENAME_DICT.values()))
samples.sort()
# Import rules
diff --git a/workflow/rules/atac.smk b/workflow/rules/atac.smk
index 584fc17..ab0b616 100644
--- a/workflow/rules/atac.smk
+++ b/workflow/rules/atac.smk
@@ -18,23 +18,39 @@ pipeline_output += expand(
sample=samples
)
+# Function definitions
def filterFastq(wildcards):
- return(','.join(set([os.path.dirname(i) for i in input_fastq if len(re.findall(f"{wildcards.sample}_[\w]*R2[\w]*.fastq.gz", i)) > 0])))
+ """
+ Wrapper to get a comma separated list of the directories where the FASTQ files associated with the sample are located
+ """
+ filter_paths = []
+ for sample in sample_rename(wildcards).split(','):
+ filter_paths += [os.path.dirname(i) for i in input_fastq if len(re.findall(f"{sample}_[\w]*R2[\w]*.fastq.gz", i)) > 0]
+ return(','.join(set(filter_paths)))
+ #return(','.join(set([os.path.dirname(i) for i in input_fastq if len(re.findall(f"{wildcards.sample}_[\w]*R2[\w]*.fastq.gz", i)) > 0])))
-
-# Function defitions
+def sample_rename(wildcards):
+ """
+ Wrapper to get the FASTQ file names to use processing if the sample was requested to be renamed
+ """
+ if wildcards.sample in RENAME_DICT.values():
+ names = [i[0] for i in RENAME_DICT.items() if wildcards.sample == i[1]]
+ return(','.join(names))
+ else:
+ return(wildcards.sample)
rule count:
output:
- join(workpath, "{sample}", "outs", "web_summary.html")
+ html = join(workpath, "{sample}", "outs", "web_summary.html")
log:
err = "run_{sample}_10x_cellranger_count.err",
log ="run_{sample}_10x_cellranger_count.log"
params:
rname = "count",
batch = "-l nodes=1:ppn=16,mem=96gb",
- prefix = "{sample}",
+ id = "{sample}",
+ sample = sample_rename,
reference = config["references"][genome]["atac_ref"],
fastqs = filterFastq
envmodules: config["tools"]["cellranger-atac"]
@@ -42,13 +58,15 @@ rule count:
"""
# Remove output directory
# prior to running cellranger
- if [ -d '{params.prefix}' ]; then
- rm -rf '{params.prefix}/'
+ if [ -d '{params.id}' ]; then
+ if ! [ -f '{output.html}' ]; then
+ rm -rf '{params.id}/'
+ fi
fi
cellranger-atac count \\
- --id {params.prefix} \\
- --sample {params.prefix} \\
+ --id {params.id} \\
+ --sample {params.sample} \\
--reference {params.reference} \\
--fastqs {params.fastqs} \\
2>{log.err} 1>{log.log}
diff --git a/workflow/rules/gex.smk b/workflow/rules/gex.smk
index 0912fbd..c8893dd 100644
--- a/workflow/rules/gex.smk
+++ b/workflow/rules/gex.smk
@@ -47,6 +47,9 @@ pipeline_output += [join(workpath, "Project_Cell_Filters.csv")]
# Function definitions
def filterFastq(wildcards):
+ """
+ Wrapper to get a comma separated list of the directories where the FASTQ files associated with the sample are located
+ """
filter_paths = []
for sample in sample_rename(wildcards).split(','):
filter_paths += [os.path.dirname(i) for i in input_fastq if len(re.findall(f"{sample}_[\w]*R2[\w]*.fastq.gz", i)) > 0]
@@ -57,8 +60,8 @@ def sample_rename(wildcards):
"""
Wrapper to get the FASTQ file names to use processing if the sample was requested to be renamed
"""
- if wildcards.sample in rename_dict.values():
- names = [i[0] for i in rename_dict.items() if wildcards.sample == i[1]]
+ if wildcards.sample in RENAME_DICT.values():
+ names = [i[0] for i in RENAME_DICT.items() if wildcards.sample == i[1]]
return(','.join(names))
else:
return(wildcards.sample)
@@ -163,9 +166,9 @@ rule count:
# Remove output directory
# prior to running cellranger
if [ -d '{params.id}' ]; then
- if ! [ -f '{output.html}' ]; then
- rm -rf '{params.id}/'
- fi
+ if ! [ -f '{output.html}' ]; then
+ rm -rf '{params.id}/'
+ fi
fi
cellranger count \\
diff --git a/workflow/rules/vdj.smk b/workflow/rules/vdj.smk
index 3495b74..37fb037 100644
--- a/workflow/rules/vdj.smk
+++ b/workflow/rules/vdj.smk
@@ -18,23 +18,40 @@ pipeline_output += expand(
sample=samples
)
-def filterFastq(wildcards):
- return(','.join(set([os.path.dirname(i) for i in input_fastq if len(re.findall(f"{wildcards.sample}_[\w]*R2[\w]*.fastq.gz", i)) > 0])))
-
# Function definitions
+def filterFastq(wildcards):
+ """
+ Wrapper to get a comma separated list of the directories where the FASTQ files associated with the sample are located
+ """
+ filter_paths = []
+ for sample in sample_rename(wildcards).split(','):
+ filter_paths += [os.path.dirname(i) for i in input_fastq if len(re.findall(f"{sample}_[\w]*R2[\w]*.fastq.gz", i)) > 0]
+ return(','.join(set(filter_paths)))
+ #return(','.join(set([os.path.dirname(i) for i in input_fastq if len(re.findall(f"{wildcards.sample}_[\w]*R2[\w]*.fastq.gz", i)) > 0])))
+
+def sample_rename(wildcards):
+ """
+ Wrapper to get the FASTQ file names to use processing if the sample was requested to be renamed
+ """
+ if wildcards.sample in RENAME_DICT.values():
+ names = [i[0] for i in RENAME_DICT.items() if wildcards.sample == i[1]]
+ return(','.join(names))
+ else:
+ return(wildcards.sample)
rule count:
output:
- join(workpath, "{sample}", "outs", "web_summary.html")
+ html = join(workpath, "{sample}", "outs", "web_summary.html")
log:
err = "run_{sample}_10x_cellranger_count.err",
log ="run_{sample}_10x_cellranger_count.log"
params:
rname = "count",
batch = "-l nodes=1:ppn=16,mem=96gb",
- prefix = "{sample}",
+ id = "{sample}",
+ sample = sample_rename,
reference = config["references"][genome]["vdj_ref"],
fastqs = filterFastq
envmodules: config["tools"]["cellranger"][CELLRANGER]
@@ -42,13 +59,15 @@ rule count:
"""
# Remove output directory
# prior to running cellranger
- if [ -d '{params.prefix}' ]; then
- rm -rf '{params.prefix}/'
+ if [ -d '{params.id}' ]; then
+ if ! [ -f '{output.html}' ]; then
+ rm -rf '{params.id}/'
+ fi
fi
cellranger vdj \\
- --id {params.prefix} \\
- --sample {params.prefix} \\
+ --id {params.id} \\
+ --sample {params.sample} \\
--reference {params.reference} \\
--fastqs {params.fastqs} \\
2>{log.err} 1>{log.log}