From 47f9a6ed3b613671d3385c9b62cdf2042631bf87 Mon Sep 17 00:00:00 2001 From: Christian Busse Date: Wed, 21 Feb 2024 03:11:27 +0100 Subject: [PATCH] Update example and test files for R and Python --- lang/R/inst/extdata/airr-schema.yaml | 5 +- lang/R/inst/extdata/germline-example.json | 182 ++-- lang/R/inst/extdata/repertoire-example.yaml | 182 +++- lang/R/tests/data-tests/bad_genotype_set.json | 86 +- lang/R/tests/data-tests/bad_germline_set.json | 102 ++- lang/R/tests/data-tests/bad_repertoire.yaml | 96 +- .../tests/data-tests/good_combined_airr.json | 449 +++++++-- .../tests/data-tests/good_combined_airr.yaml | 287 ++++-- .../R/tests/data-tests/good_genotype_set.json | 74 +- .../R/tests/data-tests/good_germline_set.json | 108 ++- lang/R/tests/data-tests/good_repertoire.yaml | 146 ++- lang/python/airr/specs/airr-schema.yaml | 5 +- lang/python/tests/data/bad_genotype_set.json | 2 +- lang/python/tests/data/bad_germline_set.json | 94 +- lang/python/tests/data/bad_repertoire.yaml | 90 +- .../python/tests/data/good_combined_airr.json | 349 +++++-- .../python/tests/data/good_combined_airr.yaml | 856 ++++++++++-------- lang/python/tests/data/good_genotype_set.json | 2 +- lang/python/tests/data/good_germline_set.json | 108 ++- lang/python/tests/data/good_repertoire.yaml | 128 ++- specs/airr-schema-openapi3.yaml | 5 +- specs/airr-schema.yaml | 5 +- 22 files changed, 2364 insertions(+), 997 deletions(-) diff --git a/lang/R/inst/extdata/airr-schema.yaml b/lang/R/inst/extdata/airr-schema.yaml index 87a25b5bd..dd2c0c241 100644 --- a/lang/R/inst/extdata/airr-schema.yaml +++ b/lang/R/inst/extdata/airr-schema.yaml @@ -1571,10 +1571,7 @@ Study: - study_type - inclusion_exclusion_criteria - grants - - collected_by - - lab_name - - lab_address - - submitted_by + - contributors - pub_ids - keywords_study properties: diff --git a/lang/R/inst/extdata/germline-example.json b/lang/R/inst/extdata/germline-example.json index 926b6d428..9d41e5f38 100644 --- a/lang/R/inst/extdata/germline-example.json +++ b/lang/R/inst/extdata/germline-example.json @@ -1,17 +1,41 @@ { "GermlineSet": [{ "germline_set_id": "OGRDB:G00007", - "author": "William Lees", - "lab_name": "", - "lab_address": "Birkbeck College, University of London, Malet Street, London", - "acknowledgements": [], + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, "release_description": "", "release_date": "2021-11-24", "germline_set_name": "CAST IGH", "germline_set_ref": "OGRDB:G00007.1", - "pub_ids": "", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "pub_ids": [""], + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "locus": "IGH", @@ -19,11 +43,33 @@ { "allele_description_id": "OGRDB:A00301", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2DBF", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2DBF", "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", @@ -36,7 +82,10 @@ "sequence_type": "V", "functional": true, "inference_type": "rearranged_only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -70,7 +119,7 @@ "fwr3_start": 161, "fwr3_end": 294, "cdr3_start": 295, - "alignment": [ + "alignment_labels": [ "1", "2", "3", @@ -187,11 +236,33 @@ { "allele_description_id": "OGRDB:A00314", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2ETO", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", @@ -204,7 +275,10 @@ "sequence_type": "V", "functional": true, "inference_type": "rearranged_only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -238,7 +312,7 @@ "fwr3_start": 161, "fwr3_end": 294, "cdr3_start": 295, - "alignment": [ + "alignment_labels": [ "1", "2", "3", @@ -356,40 +430,40 @@ "curation": null }], - "GenotypeSet": [{ - "receptor_genotype_set_id": "1", - "genotype_class_list": [ - { - "receptor_genotype_id": "1", - "locus": "IGH", - "documented_alleles": [ - { - "label": "IGHV1-69*01", - "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", - "phasing": 1 - }, - { - "label": "IGHV1-69*02", - "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", - "phasing": 2 - } - ], - "undocumented_alleles": [ - { - "allele_name": "IGHD3-1*01_S1234", - "sequence": "agtagtagtagt", - "phasing": 1 - } - ], - "deleted_genes": [ - { - "label": "IGHV3-30-3", - "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", - "phasing": 1 - } - ], - "inference_process": "repertoire_sequencing" - } - ] - }] + "GenotypeSet": [{ + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }] } diff --git a/lang/R/inst/extdata/repertoire-example.yaml b/lang/R/inst/extdata/repertoire-example.yaml index 5d6808bcc..6adaa2361 100644 --- a/lang/R/inst/extdata/repertoire-example.yaml +++ b/lang/R/inst/extdata/repertoire-example.yaml @@ -11,31 +11,58 @@ Repertoire: id: null label: null study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" inclusion_exclusion_criteria: null - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" - pub_ids: "PMID:27005435" - collected_by: null + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null + pub_ids: ["PMID:27005435"] grants: null - keywords_study: + keywords_study: - "contains_ig" - "contains_tr" subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" - sex: F + sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: null - ancestry_population: null + ancestry_population: + id: null + label: null + location_birth: + id: null + label: null ethnicity: null race: null strain_name: null @@ -58,15 +85,15 @@ Repertoire: sample_processing_id: null sample_type: "peripheral venous puncture" tissue: - id: "UBERON_0000178" + id: "UBERON:0000178" label: "blood" tissue_processing: "Ficoll gradient" cell_subset: - id: "CL_0000788" + id: "CL:0000788" label: "naive B cell" cell_phenotype: "expression of CD20 and the absence of CD27" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -77,7 +104,7 @@ Repertoire: reverse_pcr_primer_target_location: null sequencing_platform: "Illumina MiSeq" sequencing_files: - sequencing_data_id: SRR2905656 + sequencing_data_id: SRA:SRR2905656 file_type: fastq filename: SRR2905656_R1.fastq.gz read_direction: forward @@ -85,6 +112,8 @@ Repertoire: paired_filename: SRR2905656_R2.fastq.gz paired_read_direction: reverse paired_read_length: 300 + index_filename: SRR2905656_R3.fastq.gz + index_length: 8 anatomic_site: null disease_state_sample: null collection_time_point_relative: null @@ -92,6 +121,9 @@ Repertoire: id: null label: null collection_time_point_reference: null + collection_location: + id: null + label: null biomaterial_provider: null cell_number: null cells_per_reaction: null @@ -134,13 +166,35 @@ Repertoire: id: null label: null study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" inclusion_exclusion_criteria: null - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" - pub_ids: "PMID:27005435" - collected_by: null + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null + pub_ids: ["PMID:27005435"] grants: null keywords_study: - "contains_ig" @@ -149,16 +203,21 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" - sex: F + sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: null - ancestry_population: null + ancestry_population: + id: null + label: null + location_birth: + id: null + label: null ethnicity: null race: null strain_name: null @@ -181,15 +240,15 @@ Repertoire: sample_processing_id: null sample_type: "peripheral venous puncture" tissue: - id: "UBERON_0000178" + id: "UBERON:0000178" label: "blood" tissue_processing: "Ficoll gradient" cell_subset: - id: "CL_0000787" + id: "CL:0000787" label: "memory B cell" cell_phenotype: "expression of CD20 and CD27" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -200,7 +259,7 @@ Repertoire: reverse_pcr_primer_target_location: null sequencing_platform: "Illumina MiSeq" sequencing_files: - sequencing_data_id: SRR2905655 + sequencing_data_id: SRA:SRR2905655 file_type: fastq filename: SRR2905655_R1.fastq.gz read_direction: forward @@ -208,6 +267,8 @@ Repertoire: paired_filename: SRR2905655_R2.fastq.gz paired_read_direction: reverse paired_read_length: 300 + index_filename: SRR2905655_R3.fastq.gz + index_length: 8 anatomic_site: null disease_state_sample: null collection_time_point_relative: null @@ -215,6 +276,9 @@ Repertoire: id: null label: null collection_time_point_reference: null + collection_location: + id: null + label: null biomaterial_provider: null cell_number: null cells_per_reaction: null @@ -257,13 +321,35 @@ Repertoire: id: null label: null study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" inclusion_exclusion_criteria: null - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" - pub_ids: "PMID:27005435" - collected_by: null + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null + pub_ids: ["PMID:27005435"] grants: null keywords_study: - "contains_ig" @@ -272,16 +358,21 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" - sex: F + sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: null - ancestry_population: null + ancestry_population: + id: null + label: null + location_birth: + id: null + label: null ethnicity: null race: null strain_name: null @@ -304,15 +395,15 @@ Repertoire: sample_processing_id: null sample_type: "peripheral venous puncture" tissue: - id: "UBERON_0000178" + id: "UBERON:0000178" label: "blood" tissue_processing: "Ficoll gradient" cell_subset: - id: "CL_0000895" + id: "CL:0000895" label: "naive thymus-derived CD4-positive, alpha-beta T cell" cell_phenotype: "expression of CD8 and absence of CD4 and CD45RO" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -323,7 +414,7 @@ Repertoire: reverse_pcr_primer_target_location: null sequencing_platform: "Illumina MiSeq" sequencing_files: - sequencing_data_id: SRR2905659 + sequencing_data_id: SRA:SRR2905659 file_type: fastq filename: SRR2905659_R1.fastq.gz read_direction: forward @@ -331,6 +422,8 @@ Repertoire: paired_filename: SRR2905659_R2.fastq.gz paired_read_direction: reverse paired_read_length: 300 + index_filename: SRR2905659_R3.fastq.gz + index_length: 8 anatomic_site: null disease_state_sample: null collection_time_point_relative: null @@ -338,6 +431,9 @@ Repertoire: id: null label: null collection_time_point_reference: null + collection_location: + id: null + label: null biomaterial_provider: null cell_number: null cells_per_reaction: null diff --git a/lang/R/tests/data-tests/bad_genotype_set.json b/lang/R/tests/data-tests/bad_genotype_set.json index 48825e1f8..01709d60a 100644 --- a/lang/R/tests/data-tests/bad_genotype_set.json +++ b/lang/R/tests/data-tests/bad_genotype_set.json @@ -1,44 +1,44 @@ { - "GenotypeSet": [{ - "receptor_genotype_set_id": "1", - "genotype_class_list": [ - { - "receptor_genotype_id": "1", - "locus": 1, - "documented_alleles": [ - { - "label": "IGHV1-69*01", - "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", - "phasing": 1 - }, - { - "label": "IGHV1-69*02", - "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", - "phasing": 2 - }, - { - "label": "IGHV1-69*02", - "name": "1234", - "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", - "phasing": 2 - } - ], - "undocumented_alleles": [ - { - "allele_name": "IGHD3-1*01_S1234", - "sequence": "agtagtagtagt", - "phasing": 1 - } - ], - "deleted_genes": [ - { - "label": "IGHV3-30-3", - "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", - "phasing": "1" - } - ], - "inference_process": "repertoire_sequencing" - } - ] - }] -} \ No newline at end of file + "GenotypeSet": [{ + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + }, + { + "label": "IGHV1-69*02", + "name": "1234", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": "1" + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }] +} diff --git a/lang/R/tests/data-tests/bad_germline_set.json b/lang/R/tests/data-tests/bad_germline_set.json index 0aeea9a2f..28531aabb 100644 --- a/lang/R/tests/data-tests/bad_germline_set.json +++ b/lang/R/tests/data-tests/bad_germline_set.json @@ -1,27 +1,71 @@ { "GermlineSet": [{ "germline_set_id": "OGRDB:G00007", - "author": "William Lees", - "lab_name": "", - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, "release_description": "", "release_date": "2021-11-24", "germline_set_name": "CAST IGH", "germline_set_ref": "OGRDB:G00007.1", "pub_ids": [""], - "species_typo": ["Mouse"], + "species": "Mouse", "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", - "locus": 1, + "locus": "IGH", "allele_descriptions": [ { "allele_description_id": "OGRDB:A00301", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2DBF", "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", @@ -29,7 +73,7 @@ "aliases": [ "watson_et_al:CAST_EiJ_IGHV5-3" ], - "locus": 1, + "locus": "IGH", "chromosome": null, "sequence_type": "V", "functional": true, @@ -66,7 +110,7 @@ "fwr3_start": 196, "fwr3_end": 312, "cdr3_start": 313, - "alignment": [ + "alignment_labels": [ "1", "2", "3", @@ -177,16 +221,38 @@ "unrearranged_support": [], "rearranged_support": [], "paralogs": [], - "notes": "Imported to OGRDB with the following notes:\r\nwatson_et_al: CAST_EiJ_IGHV5-3", + "notes": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3", "curational_tags": null }, { "allele_description_id": "OGRDB:A00314", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", @@ -231,7 +297,7 @@ "fwr3_start": 196, "fwr3_end": 312, "cdr3_start": 313, - "alignment": [ + "alignment_labels": [ "1", "2", "3", @@ -342,7 +408,7 @@ "unrearranged_support": [], "rearranged_support": [], "paralogs": [], - "notes": "Imported to OGRDB with the following notes:\r\nwatson_et_al: CAST_EiJ_IGHV8-2", + "notes": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2", "curational_tags": null } ], diff --git a/lang/R/tests/data-tests/bad_repertoire.yaml b/lang/R/tests/data-tests/bad_repertoire.yaml index 57b0b7312..f35355e98 100644 --- a/lang/R/tests/data-tests/bad_repertoire.yaml +++ b/lang/R/tests/data-tests/bad_repertoire.yaml @@ -8,21 +8,39 @@ Repertoire: study_id: PRJNA300878 study_title: "Homo sapiens B and T cell repertoire - MZ twins" study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + - role: "data curation" + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" pub_ids: ["PMID:27005435"] subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" - sex: F + sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 value: year linked_subjects: TW01B link_type: twin @@ -32,7 +50,7 @@ Repertoire: cell_subset: "Naive B cell" cell_phenotype: "expression of CD20 and the absence of CD27" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -56,21 +74,39 @@ Repertoire: study_id: PRJNA300878 study_title: "Homo sapiens B and T cell repertoire - MZ twins" study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + - role: "data curation" + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" pub_ids: ["PMID:27005435"] subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" - sex: F + sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 value: year linked_subjects: TW01B link_type: twin @@ -80,7 +116,7 @@ Repertoire: cell_subset: "Memory B cell" cell_phenotype: "expression of CD20 and CD27" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -104,21 +140,39 @@ Repertoire: study_id: PRJNA300878 study_title: "Homo sapiens B and T cell repertoire - MZ twins" study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + - role: "data curation" + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" pub_ids: ["PMID:27005435"] subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" - sex: F + sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 value: year linked_subjects: TW01B link_type: twin @@ -128,7 +182,7 @@ Repertoire: cell_subset: "Naive CD4+ T cell" cell_phenotype: "expression of CD8 and absence of CD4 and CD45RO" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" single_cell: false cell_isolation: FACS diff --git a/lang/R/tests/data-tests/good_combined_airr.json b/lang/R/tests/data-tests/good_combined_airr.json index aa7d52ec1..0ef2106ae 100644 --- a/lang/R/tests/data-tests/good_combined_airr.json +++ b/lang/R/tests/data-tests/good_combined_airr.json @@ -10,13 +10,52 @@ "label": null }, "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", - "study_contact": "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X", "inclusion_exclusion_criteria": null, - "lab_name": "Mark M. Davis", - "lab_address": "Stanford University", - "submitted_by": "Florian Rubelt", + "contributors": [ + { + "contributor_id": "1", + "name": "Florian Rubelt", + "orcid_id": { + "id": null, + "label": null + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + }, + { + "contributor_id": "2", + "name": "Mark M. Davis", + "orcid_id": { + "id": "ORCID:0000-0001-6868-657X", + "label": "Mark Davis" + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "supervision", + "degree": null + } + ] + } + ], "pub_ids": ["PMID:27005435"], - "collected_by": null, "grants": null, "keywords_study": [ "contains_ig", @@ -27,25 +66,25 @@ "subject_id": "TW01A", "synthetic": false, "species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, - "sex": "F", + "sex": "female", "age_min": 27, "age_max": 27, "age_unit": { - "id": "UO_0000036", + "id": "UO:0000036", "label": "year" }, "age_event": null, "ancestry_population": { - "id": null, - "label": null - }, - "location_birth": { - "id": null, - "label": null - }, + "id": null, + "label": null + }, + "location_birth": { + "id": null, + "label": null + }, "ethnicity": null, "race": null, "strain_name": null, @@ -65,7 +104,65 @@ "intervention": null, "medical_history": null } - ] + ], + "genotype": { + "receptor_genotype_set": { + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }, + "mhc_genotype_set": { + "mhc_genotype_set_id": "01847298-d0c2-11ee-bc66", + "mhc_genotype_list": [ + { + "mhc_genotype_id": "00be1c2e-d0c2-11ee-bfe7", + "mhc_class": "MHC-I", + "mhc_genotyping_method": "pcr_low_resolution", + "mhc_alleles": [ + { + "allele_designation": "01:01", + "gene": { + "id": "MRO-0000046", + "label": "HLA-A" + }, + "reference_set_ref": null + } + ] + } + ] + } + } }, "sample": [ { @@ -73,17 +170,17 @@ "sample_processing_id": null, "sample_type": "peripheral venous puncture", "tissue": { - "id": "UBERON_0000178", + "id": "UBERON:0000178", "label": "blood" }, "tissue_processing": "Ficoll gradient", "cell_subset": { - "id": "CL_0000788", + "id": "CL:0000788", "label": "naive B cell" }, "cell_phenotype": "expression of CD20 and the absence of CD27", "cell_species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "single_cell": false, @@ -115,7 +212,7 @@ "label": null }, "collection_time_point_reference": null, - "collection_location": { + "collection_location": { "id": null, "label": null }, @@ -169,13 +266,52 @@ "label": null }, "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", - "study_contact": "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X", "inclusion_exclusion_criteria": null, - "lab_name": "Mark M. Davis", - "lab_address": "Stanford University", - "submitted_by": "Florian Rubelt", + "contributors": [ + { + "contributor_id": "1", + "name": "Florian Rubelt", + "orcid_id": { + "id": null, + "label": null + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + }, + { + "contributor_id": "2", + "name": "Mark M. Davis", + "orcid_id": { + "id": "ORCID:0000-0001-6868-657X", + "label": "Mark Davis" + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "supervision", + "degree": null + } + ] + } + ], "pub_ids": ["PMID:27005435"], - "collected_by": null, "grants": null, "keywords_study": [ "contains_ig", @@ -186,25 +322,25 @@ "subject_id": "TW01A", "synthetic": false, "species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, - "sex": "F", + "sex": "female", "age_min": 27, "age_max": 27, "age_unit": { - "id": "UO_0000036", + "id": "UO:0000036", "label": "year" }, "age_event": null, "ancestry_population": { - "id": null, - "label": null - }, - "location_birth": { - "id": null, - "label": null - }, + "id": null, + "label": null + }, + "location_birth": { + "id": null, + "label": null + }, "ethnicity": null, "race": null, "strain_name": null, @@ -232,17 +368,17 @@ "sample_processing_id": null, "sample_type": "peripheral venous puncture", "tissue": { - "id": "UBERON_0000178", + "id": "UBERON:0000178", "label": "blood" }, "tissue_processing": "Ficoll gradient", "cell_subset": { - "id": "CL_0000787", + "id": "CL:0000787", "label": "memory B cell" }, "cell_phenotype": "expression of CD20 and CD27", "cell_species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "single_cell": false, @@ -274,7 +410,7 @@ "label": null }, "collection_time_point_reference": null, - "collection_location": { + "collection_location": { "id": null, "label": null }, @@ -328,13 +464,52 @@ "label": null }, "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", - "study_contact": "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X", "inclusion_exclusion_criteria": null, - "lab_name": "Mark M. Davis", - "lab_address": "Stanford University", - "submitted_by": "Florian Rubelt", + "contributors": [ + { + "contributor_id": "1", + "name": "Florian Rubelt", + "orcid_id": { + "id": null, + "label": null + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + }, + { + "contributor_id": "2", + "name": "Mark M. Davis", + "orcid_id": { + "id": "ORCID:0000-0001-6868-657X", + "label": "Mark Davis" + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "supervision", + "degree": null + } + ] + } + ], "pub_ids": ["PMID:27005435"], - "collected_by": null, "grants": null, "keywords_study": [ "contains_ig", @@ -345,25 +520,25 @@ "subject_id": "TW01A", "synthetic": false, "species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, - "sex": "F", + "sex": "female", "age_min": 27, "age_max": 27, "age_unit": { - "id": "UO_0000036", + "id": "UO:0000036", "label": "year" }, "age_event": null, "ancestry_population": { - "id": null, - "label": null - }, - "location_birth": { - "id": null, - "label": null - }, + "id": null, + "label": null + }, + "location_birth": { + "id": null, + "label": null + }, "ethnicity": null, "race": null, "strain_name": null, @@ -391,17 +566,17 @@ "sample_processing_id": null, "sample_type": "peripheral venous puncture", "tissue": { - "id": "UBERON_0000178", + "id": "UBERON:0000178", "label": "blood" }, "tissue_processing": "Ficoll gradient", "cell_subset": { - "id": "CL_0000895", + "id": "CL:0000895", "label": "naive thymus-derived CD4-positive, alpha-beta T cell" }, "cell_phenotype": "expression of CD8 and absence of CD4 and CD45RO", "cell_species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "single_cell": false, @@ -433,7 +608,7 @@ "label": null }, "collection_time_point_reference": null, - "collection_location": { + "collection_location": { "id": null, "label": null }, @@ -479,19 +654,44 @@ } ], + "GermlineSet": [{ "germline_set_id": "OGRDB:G00007", - "author": "William Lees", - "lab_name": "", - "lab_address": "Birkbeck College, University of London, Malet Street, London", - "acknowledgements": [], + "acknowledgements": [ + { + "contributor_id": "3", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department": null, + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, "release_description": "", "release_date": "2021-11-24", "germline_set_name": "CAST IGH", "germline_set_ref": "OGRDB:G00007.1", "pub_ids": [""], - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "locus": "IGH", @@ -499,15 +699,37 @@ { "allele_description_id": "OGRDB:A00301", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2DBF", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "3", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department": null, + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2DBF", "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", - "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "aliases": [ "watson_et_al:CAST_EiJ_IGHV5-3" ], @@ -516,7 +738,10 @@ "sequence_type": "V", "functional": true, "inference_type": "rearranged_only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -537,18 +762,20 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "fwr1_start": 1, - "fwr1_end": 78, - "cdr1_start": 79, - "cdr1_end": 114, - "fwr2_start": 115, - "fwr2_end": 165, - "cdr2_start": 166, - "cdr2_end": 195, - "fwr3_start": 196, - "fwr3_end": 312, - "cdr3_start": 313, - "alignment": [ + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, + "alignment_labels": [ "1", "2", "3", @@ -665,15 +892,37 @@ { "allele_description_id": "OGRDB:A00314", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2ETO", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "3", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department": null, + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", - "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", "aliases": [ "watson_et_al:CAST_EiJ_IGHV8-2" ], @@ -682,7 +931,10 @@ "sequence_type": "V", "functional": true, "inference_type": "rearranged_only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -703,18 +955,20 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "unaligned_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "aligned_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTG...GGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGA............GCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGT......GGGATGATGATAAGTACTATAACCCATCCCTGA...AGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", "fwr1_start": 1, - "fwr1_end": 78, - "cdr1_start": 79, - "cdr1_end": 114, - "fwr2_start": 115, - "fwr2_end": 165, - "cdr2_start": 166, - "cdr2_end": 195, - "fwr3_start": 196, - "fwr3_end": 312, - "cdr3_start": 313, - "alignment": [ + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, + "alignment_labels": [ "1", "2", "3", @@ -831,7 +1085,6 @@ ], "curation": null }], - "GenotypeSet": [{ "receptor_genotype_set_id": "1", "genotype_class_list": [ diff --git a/lang/R/tests/data-tests/good_combined_airr.yaml b/lang/R/tests/data-tests/good_combined_airr.yaml index f4fdcb0ef..2c9ab547c 100644 --- a/lang/R/tests/data-tests/good_combined_airr.yaml +++ b/lang/R/tests/data-tests/good_combined_airr.yaml @@ -21,13 +21,35 @@ Repertoire: a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level. - study_contact: Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X inclusion_exclusion_criteria: - lab_name: Mark M. Davis - lab_address: Stanford University - submitted_by: Florian Rubelt + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: + label: + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: + - role: "data curation" + degree: + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: pub_ids: ["PMID:27005435"] - collected_by: grants: keywords_study: - contains_ig @@ -36,13 +58,13 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens - sex: F + sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: ancestry_population: @@ -67,20 +89,54 @@ Repertoire: immunogen: intervention: medical_history: + genotype: + receptor_genotype_set: + receptor_genotype_set_id: "1" + genotype_class_list: + - receptor_genotype_id: "1" + locus: IGH + documented_alleles: + - label: IGHV1-69*01 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 1 + - label: IGHV1-69*02 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 2 + undocumented_alleles: + - allele_name: IGHD3-1*01_S1234 + sequence: agtagtagtagt + phasing: 1 + deleted_genes: + - label: IGHV3-30-3 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 1 + inference_process: repertoire_sequencing + mhc_genotype_set: + mhc_genotype_set_id: 01847298-d0c2-11ee-bc66 + mhc_genotype_list: + - mhc_genotype_id: 00be1c2e-d0c2-11ee-bfe7 + mhc_class: MHC-I + mhc_genotyping_method: pcr_low_resolution + mhc_alleles: + - allele_designation: "01:01" + gene: + id: MRO-0000046 + label: HLA-A + reference_set_ref: sample: - sample_id: TW01A_B_naive sample_processing_id: sample_type: peripheral venous puncture tissue: - id: UBERON_0000178 + id: UBERON:0000178 label: blood tissue_processing: Ficoll gradient cell_subset: - id: CL_0000788 + id: CL:0000788 label: naive B cell cell_phenotype: expression of CD20 and the absence of CD27 cell_species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens single_cell: false cell_isolation: FACS @@ -164,13 +220,35 @@ Repertoire: a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level. - study_contact: Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X inclusion_exclusion_criteria: - lab_name: Mark M. Davis - lab_address: Stanford University - submitted_by: Florian Rubelt + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: + label: + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: + - role: "data curation" + degree: + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: pub_ids: ["PMID:27005435"] - collected_by: grants: keywords_study: - contains_ig @@ -179,13 +257,13 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens - sex: F + sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: ancestry_population: @@ -215,15 +293,15 @@ Repertoire: sample_processing_id: sample_type: peripheral venous puncture tissue: - id: UBERON_0000178 + id: UBERON:0000178 label: blood tissue_processing: Ficoll gradient cell_subset: - id: CL_0000787 + id: CL:0000787 label: memory B cell cell_phenotype: expression of CD20 and CD27 cell_species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens single_cell: false cell_isolation: FACS @@ -307,13 +385,35 @@ Repertoire: a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level. - study_contact: Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X inclusion_exclusion_criteria: - lab_name: Mark M. Davis - lab_address: Stanford University - submitted_by: Florian Rubelt + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: + label: + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: + - role: "data curation" + degree: + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: pub_ids: ["PMID:27005435"] - collected_by: grants: keywords_study: - contains_ig @@ -322,13 +422,13 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens - sex: F + sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: ancestry_population: @@ -358,15 +458,15 @@ Repertoire: sample_processing_id: sample_type: peripheral venous puncture tissue: - id: UBERON_0000178 + id: UBERON:0000178 label: blood tissue_processing: Ficoll gradient cell_subset: - id: CL_0000895 + id: CL:0000895 label: naive thymus-derived CD4-positive, alpha-beta T cell cell_phenotype: expression of CD8 and absence of CD4 and CD45RO cell_species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens single_cell: false cell_isolation: FACS @@ -431,16 +531,27 @@ Repertoire: GermlineSet: - germline_set_id: OGRDB:G00007 - author: William Lees - lab_name: '' - lab_address: Birkbeck College, University of London, Malet Street, London - acknowledgements: [] + acknowledgements: + - contributor_id: "3" + name: William Lees + orcid_id: + id: ORCID:0000-0001-9834-6840 + label: William Lees + affiliation: + id: ROR:02mb95055 + label: Birkbeck, University of London + affiliation_department: + contributions: + - role: investigation + degree: null + - role: data curation + degree: null release_version: 1 - release_description: '' - release_date: '2021-11-24' + release_description: "" + release_date: "2021-11-24" germline_set_name: CAST IGH germline_set_ref: OGRDB:G00007.1 - pub_ids: [''] + pub_ids: [""] species: id: NCBITAXON:10090 label: Mus musculus @@ -450,15 +561,27 @@ GermlineSet: allele_descriptions: - allele_description_id: OGRDB:A00301 allele_description_ref: OGRDB:Mouse_IGH:IGHV-2DBF - maintainer: William Lees - acknowledgements: [] - lab_address: Birkbeck College, University of London, Malet Street, London + acknowledgements: + - contributor_id: "3" + name: William Lees + orcid_id: + id: ORCID:0000-0001-9834-6840 + label: William Lees + affiliation: + id: ROR:02mb95055 + label: Birkbeck, University of London + affiliation_department: + contributions: + - role: investigation + degree: + - role: data curation + degree: release_version: 1 - release_date: 24-Nov-2021 + release_date: "2021-11-24" release_description: First release label: IGHV-2DBF sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA - coding_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + coding_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA aliases: - watson_et_al:CAST_EiJ_IGHV5-3 locus: IGH @@ -488,18 +611,20 @@ GermlineSet: v_gene_delineations: - sequence_delineation_id: '1' delineation_scheme: IMGT + unaligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + aligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA fwr1_start: 1 - fwr1_end: 78 - cdr1_start: 79 - cdr1_end: 114 - fwr2_start: 115 - fwr2_end: 165 - cdr2_start: 166 - cdr2_end: 195 - fwr3_start: 196 - fwr3_end: 312 - cdr3_start: 313 - alignment: + fwr1_end: 75 + cdr1_start: 76 + cdr1_end: 110 + fwr2_start: 111 + fwr2_end: 150 + cdr2_start: 151 + cdr2_end: 160 + fwr3_start: 161 + fwr3_end: 294 + cdr3_start: 295 + alignment_labels: - '1' - '2' - '3' @@ -611,15 +736,27 @@ GermlineSet: curational_tags: - allele_description_id: OGRDB:A00314 allele_description_ref: OGRDB:Mouse_IGH:IGHV-2ETO - maintainer: William Lees - acknowledgements: [] - lab_address: Birkbeck College, University of London, Malet Street, London + acknowledgements: + - contributor_id: "3" + name: William Lees + orcid_id: + id: ORCID:0000-0001-9834-6840 + label: William Lees + affiliation: + id: ROR:02mb95055 + label: Birkbeck, University of London + affiliation_department: + contributions: + - role: investigation + degree: + - role: data curation + degree: release_version: 1 - release_date: 24-Nov-2021 + release_date: "2021-11-24" release_description: First release label: IGHV-2ETO sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC - coding_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + coding_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC aliases: - watson_et_al:CAST_EiJ_IGHV8-2 locus: IGH @@ -649,18 +786,20 @@ GermlineSet: v_gene_delineations: - sequence_delineation_id: '1' delineation_scheme: IMGT + unaligned_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + aligned_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTG...GGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGA............GCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGT......GGGATGATGATAAGTACTATAACCCATCCCTGA...AGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC fwr1_start: 1 - fwr1_end: 78 - cdr1_start: 79 - cdr1_end: 114 - fwr2_start: 115 - fwr2_end: 165 - cdr2_start: 166 - cdr2_end: 195 - fwr3_start: 196 - fwr3_end: 312 - cdr3_start: 313 - alignment: + fwr1_end: 75 + cdr1_start: 76 + cdr1_end: 110 + fwr2_start: 111 + fwr2_end: 150 + cdr2_start: 151 + cdr2_end: 160 + fwr3_start: 161 + fwr3_end: 294 + cdr3_start: 295 + alignment_labels: - '1' - '2' - '3' @@ -773,9 +912,9 @@ GermlineSet: curation: GenotypeSet: - - receptor_genotype_set_id: '1' + - receptor_genotype_set_id: "1" genotype_class_list: - - receptor_genotype_id: '1' + - receptor_genotype_id: "1" locus: IGH documented_alleles: - label: IGHV1-69*01 diff --git a/lang/R/tests/data-tests/good_genotype_set.json b/lang/R/tests/data-tests/good_genotype_set.json index 4335b02e1..abd24646c 100644 --- a/lang/R/tests/data-tests/good_genotype_set.json +++ b/lang/R/tests/data-tests/good_genotype_set.json @@ -1,38 +1,38 @@ { - "GenotypeSet": [{ - "receptor_genotype_set_id": "1", - "genotype_class_list": [ - { - "receptor_genotype_id": "1", - "locus": "IGH", - "documented_alleles": [ - { - "label": "IGHV1-69*01", - "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", - "phasing": 1 - }, - { - "label": "IGHV1-69*02", - "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", - "phasing": 2 - } - ], - "undocumented_alleles": [ - { - "allele_name": "IGHD3-1*01_S1234", - "sequence": "agtagtagtagt", - "phasing": 1 - } - ], - "deleted_genes": [ - { - "label": "IGHV3-30-3", - "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", - "phasing": 1 - } - ], - "inference_process": "repertoire_sequencing" - } - ] - }] -} \ No newline at end of file + "GenotypeSet": [{ + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }] +} diff --git a/lang/R/tests/data-tests/good_germline_set.json b/lang/R/tests/data-tests/good_germline_set.json index 41ecf5f7d..e74c590dc 100644 --- a/lang/R/tests/data-tests/good_germline_set.json +++ b/lang/R/tests/data-tests/good_germline_set.json @@ -1,17 +1,41 @@ { "GermlineSet": [{ "germline_set_id": "OGRDB:G00007", - "author": "William Lees", - "lab_name": "", - "lab_address": "Birkbeck College, University of London, Malet Street, London", - "acknowledgements": [], + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, "release_description": "", "release_date": "2021-11-24", "germline_set_name": "CAST IGH", "germline_set_ref": "OGRDB:G00007.1", "pub_ids": [""], - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "locus": "IGH", @@ -19,11 +43,33 @@ { "allele_description_id": "OGRDB:A00301", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2DBF", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2DBF", "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", @@ -36,7 +82,10 @@ "sequence_type": "V", "functional": true, "inference_type": "rearranged_only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -70,7 +119,7 @@ "fwr3_start": 161, "fwr3_end": 294, "cdr3_start": 295, - "alignment": [ + "alignment_labels": [ "1", "2", "3", @@ -187,11 +236,33 @@ { "allele_description_id": "OGRDB:A00314", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2ETO", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", @@ -204,7 +275,10 @@ "sequence_type": "V", "functional": true, "inference_type": "rearranged_only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -238,7 +312,7 @@ "fwr3_start": 161, "fwr3_end": 294, "cdr3_start": 295, - "alignment": [ + "alignment_labels": [ "1", "2", "3", diff --git a/lang/R/tests/data-tests/good_repertoire.yaml b/lang/R/tests/data-tests/good_repertoire.yaml index c935c9b67..6adaa2361 100644 --- a/lang/R/tests/data-tests/good_repertoire.yaml +++ b/lang/R/tests/data-tests/good_repertoire.yaml @@ -11,28 +11,50 @@ Repertoire: id: null label: null study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" inclusion_exclusion_criteria: null - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null pub_ids: ["PMID:27005435"] - collected_by: null grants: null - keywords_study: + keywords_study: - "contains_ig" - "contains_tr" subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" - sex: F + sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: null ancestry_population: @@ -63,15 +85,15 @@ Repertoire: sample_processing_id: null sample_type: "peripheral venous puncture" tissue: - id: "UBERON_0000178" + id: "UBERON:0000178" label: "blood" tissue_processing: "Ficoll gradient" cell_subset: - id: "CL_0000788" + id: "CL:0000788" label: "naive B cell" cell_phenotype: "expression of CD20 and the absence of CD27" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -82,7 +104,7 @@ Repertoire: reverse_pcr_primer_target_location: null sequencing_platform: "Illumina MiSeq" sequencing_files: - sequencing_data_id: SRR2905656 + sequencing_data_id: SRA:SRR2905656 file_type: fastq filename: SRR2905656_R1.fastq.gz read_direction: forward @@ -90,6 +112,8 @@ Repertoire: paired_filename: SRR2905656_R2.fastq.gz paired_read_direction: reverse paired_read_length: 300 + index_filename: SRR2905656_R3.fastq.gz + index_length: 8 anatomic_site: null disease_state_sample: null collection_time_point_relative: null @@ -142,13 +166,35 @@ Repertoire: id: null label: null study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" inclusion_exclusion_criteria: null - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null pub_ids: ["PMID:27005435"] - collected_by: null grants: null keywords_study: - "contains_ig" @@ -157,13 +203,13 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" - sex: F + sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: null ancestry_population: @@ -194,15 +240,15 @@ Repertoire: sample_processing_id: null sample_type: "peripheral venous puncture" tissue: - id: "UBERON_0000178" + id: "UBERON:0000178" label: "blood" tissue_processing: "Ficoll gradient" cell_subset: - id: "CL_0000787" + id: "CL:0000787" label: "memory B cell" cell_phenotype: "expression of CD20 and CD27" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -213,7 +259,7 @@ Repertoire: reverse_pcr_primer_target_location: null sequencing_platform: "Illumina MiSeq" sequencing_files: - sequencing_data_id: SRR2905655 + sequencing_data_id: SRA:SRR2905655 file_type: fastq filename: SRR2905655_R1.fastq.gz read_direction: forward @@ -221,6 +267,8 @@ Repertoire: paired_filename: SRR2905655_R2.fastq.gz paired_read_direction: reverse paired_read_length: 300 + index_filename: SRR2905655_R3.fastq.gz + index_length: 8 anatomic_site: null disease_state_sample: null collection_time_point_relative: null @@ -273,13 +321,35 @@ Repertoire: id: null label: null study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" inclusion_exclusion_criteria: null - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null pub_ids: ["PMID:27005435"] - collected_by: null grants: null keywords_study: - "contains_ig" @@ -288,13 +358,13 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" - sex: F + sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: null ancestry_population: @@ -325,15 +395,15 @@ Repertoire: sample_processing_id: null sample_type: "peripheral venous puncture" tissue: - id: "UBERON_0000178" + id: "UBERON:0000178" label: "blood" tissue_processing: "Ficoll gradient" cell_subset: - id: "CL_0000895" + id: "CL:0000895" label: "naive thymus-derived CD4-positive, alpha-beta T cell" cell_phenotype: "expression of CD8 and absence of CD4 and CD45RO" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -344,7 +414,7 @@ Repertoire: reverse_pcr_primer_target_location: null sequencing_platform: "Illumina MiSeq" sequencing_files: - sequencing_data_id: SRR2905659 + sequencing_data_id: SRA:SRR2905659 file_type: fastq filename: SRR2905659_R1.fastq.gz read_direction: forward @@ -352,6 +422,8 @@ Repertoire: paired_filename: SRR2905659_R2.fastq.gz paired_read_direction: reverse paired_read_length: 300 + index_filename: SRR2905659_R3.fastq.gz + index_length: 8 anatomic_site: null disease_state_sample: null collection_time_point_relative: null diff --git a/lang/python/airr/specs/airr-schema.yaml b/lang/python/airr/specs/airr-schema.yaml index 87a25b5bd..dd2c0c241 100644 --- a/lang/python/airr/specs/airr-schema.yaml +++ b/lang/python/airr/specs/airr-schema.yaml @@ -1571,10 +1571,7 @@ Study: - study_type - inclusion_exclusion_criteria - grants - - collected_by - - lab_name - - lab_address - - submitted_by + - contributors - pub_ids - keywords_study properties: diff --git a/lang/python/tests/data/bad_genotype_set.json b/lang/python/tests/data/bad_genotype_set.json index c58a39027..01709d60a 100644 --- a/lang/python/tests/data/bad_genotype_set.json +++ b/lang/python/tests/data/bad_genotype_set.json @@ -41,4 +41,4 @@ } ] }] -} \ No newline at end of file +} diff --git a/lang/python/tests/data/bad_germline_set.json b/lang/python/tests/data/bad_germline_set.json index 168cc1fa5..28531aabb 100644 --- a/lang/python/tests/data/bad_germline_set.json +++ b/lang/python/tests/data/bad_germline_set.json @@ -1,27 +1,71 @@ { "GermlineSet": [{ "germline_set_id": "OGRDB:G00007", - "author": "William Lees", - "lab_name": "", - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, "release_description": "", "release_date": "2021-11-24", "germline_set_name": "CAST IGH", "germline_set_ref": "OGRDB:G00007.1", "pub_ids": [""], - "species": ["Mouse"], + "species": "Mouse", "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "locus": "IGH", "allele_descriptions": [ { "allele_description_id": "OGRDB:A00301", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2DBF", "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", @@ -66,7 +110,7 @@ "fwr3_start": 196, "fwr3_end": 312, "cdr3_start": 313, - "alignment": [ + "alignment_labels": [ "1", "2", "3", @@ -182,11 +226,33 @@ }, { "allele_description_id": "OGRDB:A00314", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", @@ -231,7 +297,7 @@ "fwr3_start": 196, "fwr3_end": 312, "cdr3_start": 313, - "alignment": [ + "alignment_labels": [ "1", "2", "3", diff --git a/lang/python/tests/data/bad_repertoire.yaml b/lang/python/tests/data/bad_repertoire.yaml index 2de377cb3..f35355e98 100644 --- a/lang/python/tests/data/bad_repertoire.yaml +++ b/lang/python/tests/data/bad_repertoire.yaml @@ -8,21 +8,39 @@ Repertoire: study_id: PRJNA300878 study_title: "Homo sapiens B and T cell repertoire - MZ twins" study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + - role: "data curation" + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" pub_ids: ["PMID:27005435"] subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 value: year linked_subjects: TW01B link_type: twin @@ -32,7 +50,7 @@ Repertoire: cell_subset: "Naive B cell" cell_phenotype: "expression of CD20 and the absence of CD27" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -56,21 +74,39 @@ Repertoire: study_id: PRJNA300878 study_title: "Homo sapiens B and T cell repertoire - MZ twins" study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + - role: "data curation" + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" pub_ids: ["PMID:27005435"] subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 value: year linked_subjects: TW01B link_type: twin @@ -80,7 +116,7 @@ Repertoire: cell_subset: "Memory B cell" cell_phenotype: "expression of CD20 and CD27" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -104,21 +140,39 @@ Repertoire: study_id: PRJNA300878 study_title: "Homo sapiens B and T cell repertoire - MZ twins" study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + - role: "data curation" + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" pub_ids: ["PMID:27005435"] subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 value: year linked_subjects: TW01B link_type: twin @@ -128,7 +182,7 @@ Repertoire: cell_subset: "Naive CD4+ T cell" cell_phenotype: "expression of CD8 and absence of CD4 and CD45RO" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" single_cell: false cell_isolation: FACS diff --git a/lang/python/tests/data/good_combined_airr.json b/lang/python/tests/data/good_combined_airr.json index 9101b24a9..0ef2106ae 100644 --- a/lang/python/tests/data/good_combined_airr.json +++ b/lang/python/tests/data/good_combined_airr.json @@ -10,13 +10,52 @@ "label": null }, "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", - "study_contact": "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X", "inclusion_exclusion_criteria": null, - "lab_name": "Mark M. Davis", - "lab_address": "Stanford University", - "submitted_by": "Florian Rubelt", + "contributors": [ + { + "contributor_id": "1", + "name": "Florian Rubelt", + "orcid_id": { + "id": null, + "label": null + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + }, + { + "contributor_id": "2", + "name": "Mark M. Davis", + "orcid_id": { + "id": "ORCID:0000-0001-6868-657X", + "label": "Mark Davis" + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "supervision", + "degree": null + } + ] + } + ], "pub_ids": ["PMID:27005435"], - "collected_by": null, "grants": null, "keywords_study": [ "contains_ig", @@ -27,25 +66,25 @@ "subject_id": "TW01A", "synthetic": false, "species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "sex": "female", "age_min": 27, "age_max": 27, "age_unit": { - "id": "UO_0000036", + "id": "UO:0000036", "label": "year" }, "age_event": null, "ancestry_population": { - "id": null, - "label": null - }, - "location_birth": { - "id": null, - "label": null - }, + "id": null, + "label": null + }, + "location_birth": { + "id": null, + "label": null + }, "ethnicity": null, "race": null, "strain_name": null, @@ -104,10 +143,10 @@ ] }, "mhc_genotype_set": { - "mhc_genotype_set_id": "this is a unique identifier", + "mhc_genotype_set_id": "01847298-d0c2-11ee-bc66", "mhc_genotype_list": [ { - "mhc_genotype_id": "unique", + "mhc_genotype_id": "00be1c2e-d0c2-11ee-bfe7", "mhc_class": "MHC-I", "mhc_genotyping_method": "pcr_low_resolution", "mhc_alleles": [ @@ -117,7 +156,7 @@ "id": "MRO-0000046", "label": "HLA-A" }, - "reference_set_ref": "blah" + "reference_set_ref": null } ] } @@ -131,17 +170,17 @@ "sample_processing_id": null, "sample_type": "peripheral venous puncture", "tissue": { - "id": "UBERON_0000178", + "id": "UBERON:0000178", "label": "blood" }, "tissue_processing": "Ficoll gradient", "cell_subset": { - "id": "CL_0000788", + "id": "CL:0000788", "label": "naive B cell" }, "cell_phenotype": "expression of CD20 and the absence of CD27", "cell_species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "single_cell": false, @@ -173,7 +212,7 @@ "label": null }, "collection_time_point_reference": null, - "collection_location": { + "collection_location": { "id": null, "label": null }, @@ -227,13 +266,52 @@ "label": null }, "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", - "study_contact": "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X", "inclusion_exclusion_criteria": null, - "lab_name": "Mark M. Davis", - "lab_address": "Stanford University", - "submitted_by": "Florian Rubelt", + "contributors": [ + { + "contributor_id": "1", + "name": "Florian Rubelt", + "orcid_id": { + "id": null, + "label": null + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + }, + { + "contributor_id": "2", + "name": "Mark M. Davis", + "orcid_id": { + "id": "ORCID:0000-0001-6868-657X", + "label": "Mark Davis" + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "supervision", + "degree": null + } + ] + } + ], "pub_ids": ["PMID:27005435"], - "collected_by": null, "grants": null, "keywords_study": [ "contains_ig", @@ -244,25 +322,25 @@ "subject_id": "TW01A", "synthetic": false, "species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "sex": "female", "age_min": 27, "age_max": 27, "age_unit": { - "id": "UO_0000036", + "id": "UO:0000036", "label": "year" }, "age_event": null, "ancestry_population": { - "id": null, - "label": null - }, - "location_birth": { - "id": null, - "label": null - }, + "id": null, + "label": null + }, + "location_birth": { + "id": null, + "label": null + }, "ethnicity": null, "race": null, "strain_name": null, @@ -290,17 +368,17 @@ "sample_processing_id": null, "sample_type": "peripheral venous puncture", "tissue": { - "id": "UBERON_0000178", + "id": "UBERON:0000178", "label": "blood" }, "tissue_processing": "Ficoll gradient", "cell_subset": { - "id": "CL_0000787", + "id": "CL:0000787", "label": "memory B cell" }, "cell_phenotype": "expression of CD20 and CD27", "cell_species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "single_cell": false, @@ -332,7 +410,7 @@ "label": null }, "collection_time_point_reference": null, - "collection_location": { + "collection_location": { "id": null, "label": null }, @@ -386,13 +464,52 @@ "label": null }, "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", - "study_contact": "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X", "inclusion_exclusion_criteria": null, - "lab_name": "Mark M. Davis", - "lab_address": "Stanford University", - "submitted_by": "Florian Rubelt", + "contributors": [ + { + "contributor_id": "1", + "name": "Florian Rubelt", + "orcid_id": { + "id": null, + "label": null + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + }, + { + "contributor_id": "2", + "name": "Mark M. Davis", + "orcid_id": { + "id": "ORCID:0000-0001-6868-657X", + "label": "Mark Davis" + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "supervision", + "degree": null + } + ] + } + ], "pub_ids": ["PMID:27005435"], - "collected_by": null, "grants": null, "keywords_study": [ "contains_ig", @@ -403,25 +520,25 @@ "subject_id": "TW01A", "synthetic": false, "species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "sex": "female", "age_min": 27, "age_max": 27, "age_unit": { - "id": "UO_0000036", + "id": "UO:0000036", "label": "year" }, "age_event": null, "ancestry_population": { - "id": null, - "label": null - }, - "location_birth": { - "id": null, - "label": null - }, + "id": null, + "label": null + }, + "location_birth": { + "id": null, + "label": null + }, "ethnicity": null, "race": null, "strain_name": null, @@ -449,17 +566,17 @@ "sample_processing_id": null, "sample_type": "peripheral venous puncture", "tissue": { - "id": "UBERON_0000178", + "id": "UBERON:0000178", "label": "blood" }, "tissue_processing": "Ficoll gradient", "cell_subset": { - "id": "CL_0000895", + "id": "CL:0000895", "label": "naive thymus-derived CD4-positive, alpha-beta T cell" }, "cell_phenotype": "expression of CD8 and absence of CD4 and CD45RO", "cell_species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "single_cell": false, @@ -491,7 +608,7 @@ "label": null }, "collection_time_point_reference": null, - "collection_location": { + "collection_location": { "id": null, "label": null }, @@ -540,17 +657,41 @@ "GermlineSet": [{ "germline_set_id": "OGRDB:G00007", - "author": "William Lees", - "lab_name": "", - "lab_address": "Birkbeck College, University of London, Malet Street, London", - "acknowledgements": [], + "acknowledgements": [ + { + "contributor_id": "3", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department": null, + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, "release_description": "", "release_date": "2021-11-24", "germline_set_name": "CAST IGH", "germline_set_ref": "OGRDB:G00007.1", "pub_ids": [""], - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "locus": "IGH", @@ -558,15 +699,37 @@ { "allele_description_id": "OGRDB:A00301", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2DBF", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "3", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department": null, + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2DBF", - "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", - "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "aliases": [ "watson_et_al:CAST_EiJ_IGHV5-3" ], @@ -575,7 +738,10 @@ "sequence_type": "V", "functional": true, "inference_type": "rearranged_only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -596,8 +762,8 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", - "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "fwr1_start": 1, "fwr1_end": 75, "cdr1_start": 76, @@ -609,7 +775,7 @@ "fwr3_start": 161, "fwr3_end": 294, "cdr3_start": 295, - "alignment": [ + "alignment_labels": [ "1", "2", "3", @@ -726,11 +892,33 @@ { "allele_description_id": "OGRDB:A00314", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2ETO", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "3", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department": null, + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", @@ -743,7 +931,10 @@ "sequence_type": "V", "functional": true, "inference_type": "rearranged_only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -764,8 +955,8 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", - "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", - "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "unaligned_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "aligned_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTG...GGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGA............GCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGT......GGGATGATGATAAGTACTATAACCCATCCCTGA...AGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", "fwr1_start": 1, "fwr1_end": 75, "cdr1_start": 76, @@ -777,7 +968,7 @@ "fwr3_start": 161, "fwr3_end": 294, "cdr3_start": 295, - "alignment": [ + "alignment_labels": [ "1", "2", "3", diff --git a/lang/python/tests/data/good_combined_airr.yaml b/lang/python/tests/data/good_combined_airr.yaml index 80d0fe3a2..2c9ab547c 100644 --- a/lang/python/tests/data/good_combined_airr.yaml +++ b/lang/python/tests/data/good_combined_airr.yaml @@ -21,13 +21,35 @@ Repertoire: a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level. - study_contact: Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X inclusion_exclusion_criteria: - lab_name: Mark M. Davis - lab_address: Stanford University - submitted_by: Florian Rubelt + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: + label: + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: + - role: "data curation" + degree: + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: pub_ids: ["PMID:27005435"] - collected_by: grants: keywords_study: - contains_ig @@ -36,13 +58,13 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: ancestry_population: @@ -69,9 +91,9 @@ Repertoire: medical_history: genotype: receptor_genotype_set: - receptor_genotype_set_id: '1' + receptor_genotype_set_id: "1" genotype_class_list: - - receptor_genotype_id: '1' + - receptor_genotype_id: "1" locus: IGH documented_alleles: - label: IGHV1-69*01 @@ -90,31 +112,31 @@ Repertoire: phasing: 1 inference_process: repertoire_sequencing mhc_genotype_set: - mhc_genotype_set_id: "this is a unique identifier" + mhc_genotype_set_id: 01847298-d0c2-11ee-bc66 mhc_genotype_list: - - mhc_genotype_id: unique + - mhc_genotype_id: 00be1c2e-d0c2-11ee-bfe7 mhc_class: MHC-I mhc_genotyping_method: pcr_low_resolution mhc_alleles: - allele_designation: "01:01" gene: - id: "MRO-0000046" - label: "HLA-A" - reference_set_ref: blah + id: MRO-0000046 + label: HLA-A + reference_set_ref: sample: - sample_id: TW01A_B_naive sample_processing_id: sample_type: peripheral venous puncture tissue: - id: UBERON_0000178 + id: UBERON:0000178 label: blood tissue_processing: Ficoll gradient cell_subset: - id: CL_0000788 + id: CL:0000788 label: naive B cell cell_phenotype: expression of CD20 and the absence of CD27 cell_species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens single_cell: false cell_isolation: FACS @@ -198,13 +220,35 @@ Repertoire: a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level. - study_contact: Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X inclusion_exclusion_criteria: - lab_name: Mark M. Davis - lab_address: Stanford University - submitted_by: Florian Rubelt + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: + label: + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: + - role: "data curation" + degree: + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: pub_ids: ["PMID:27005435"] - collected_by: grants: keywords_study: - contains_ig @@ -213,13 +257,13 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: ancestry_population: @@ -249,15 +293,15 @@ Repertoire: sample_processing_id: sample_type: peripheral venous puncture tissue: - id: UBERON_0000178 + id: UBERON:0000178 label: blood tissue_processing: Ficoll gradient cell_subset: - id: CL_0000787 + id: CL:0000787 label: memory B cell cell_phenotype: expression of CD20 and CD27 cell_species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens single_cell: false cell_isolation: FACS @@ -341,13 +385,35 @@ Repertoire: a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level. - study_contact: Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X inclusion_exclusion_criteria: - lab_name: Mark M. Davis - lab_address: Stanford University - submitted_by: Florian Rubelt + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: + label: + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: + - role: "data curation" + degree: + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: pub_ids: ["PMID:27005435"] - collected_by: grants: keywords_study: - contains_ig @@ -356,13 +422,13 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: ancestry_population: @@ -392,15 +458,15 @@ Repertoire: sample_processing_id: sample_type: peripheral venous puncture tissue: - id: UBERON_0000178 + id: UBERON:0000178 label: blood tissue_processing: Ficoll gradient cell_subset: - id: CL_0000895 + id: CL:0000895 label: naive thymus-derived CD4-positive, alpha-beta T cell cell_phenotype: expression of CD8 and absence of CD4 and CD45RO cell_species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens single_cell: false cell_isolation: FACS @@ -464,357 +530,391 @@ Repertoire: analysis_provenance_id: 4625424004665971176-242ac11c-0001-012 GermlineSet: -- acknowledgements: [] - allele_descriptions: - - acknowledgements: [] - aliases: - - watson_et_al:CAST_EiJ_IGHV5-3 - allele_description_id: OGRDB:A00301 - allele_description_ref: OGRDB:Mouse_IGH:IGHV-2DBF - allele_designation: null - chromosome: null - coding_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA - curation: 'Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3' - curational_tags: null - functional: true - gene_designation: null - gene_end: null - gene_start: null - inference_type: rearranged_only - lab_address: Birkbeck College, University of London, Malet Street, London - label: IGHV-2DBF - leader_1_end: null - leader_1_start: null - leader_2_end: null - leader_2_start: null - locus: IGH - maintainer: William Lees - paralogs: [] - rearranged_support: [] - release_date: 24-Nov-2021 - release_description: First release + - germline_set_id: OGRDB:G00007 + acknowledgements: + - contributor_id: "3" + name: William Lees + orcid_id: + id: ORCID:0000-0001-9834-6840 + label: William Lees + affiliation: + id: ROR:02mb95055 + label: Birkbeck, University of London + affiliation_department: + contributions: + - role: investigation + degree: null + - role: data curation + degree: null release_version: 1 - sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA - sequence_type: V + release_description: "" + release_date: "2021-11-24" + germline_set_name: CAST IGH + germline_set_ref: OGRDB:G00007.1 + pub_ids: [""] species: id: NCBITAXON:10090 label: Mus musculus species_subgroup: CAST_EiJ species_subgroup_type: strain - status: active - subgroup_designation: null - unrearranged_support: [] - utr_5_prime_end: null - utr_5_prime_start: null - v_gene_delineations: - - aligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA - alignment: - - '1' - - '2' - - '3' - - '4' - - '5' - - '6' - - '7' - - '8' - - '9' - - '10' - - '11' - - '12' - - '13' - - '14' - - '15' - - '16' - - '17' - - '18' - - '19' - - '20' - - '21' - - '22' - - '23' - - '24' - - '25' - - '26' - - '27' - - '28' - - '29' - - '30' - - '31' - - '32' - - '33' - - '34' - - '35' - - '36' - - '37' - - '38' - - '39' - - '40' - - '41' - - '42' - - '43' - - '44' - - '45' - - '46' - - '47' - - '48' - - '49' - - '50' - - '51' - - '52' - - '53' - - '54' - - '55' - - '56' - - '57' - - '58' - - '59' - - '60' - - '61' - - '62' - - '63' - - '64' - - '65' - - '66' - - '67' - - '68' - - '69' - - '70' - - '71' - - '72' - - '73' - - '74' - - '75' - - '76' - - '77' - - '78' - - '79' - - '80' - - '81' - - '82' - - '83' - - '84' - - '85' - - '86' - - '87' - - '88' - - '89' - - '90' - - '91' - - '92' - - '93' - - '94' - - '95' - - '96' - - '97' - - '98' - - '99' - - '100' - - '101' - - '102' - - '103' - - '104' - cdr1_end: 110 - cdr1_start: 76 - cdr2_end: 160 - cdr2_start: 151 - cdr3_start: 295 - delineation_scheme: IMGT - fwr1_end: 75 - fwr1_start: 1 - fwr2_end: 150 - fwr2_start: 111 - fwr3_end: 294 - fwr3_start: 161 - sequence_delineation_id: '1' - unaligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA - v_rs_end: null - v_rs_start: null - - acknowledgements: [] - aliases: - - watson_et_al:CAST_EiJ_IGHV8-2 - allele_description_id: OGRDB:A00314 - allele_description_ref: OGRDB:Mouse_IGH:IGHV-2ETO - allele_designation: null - chromosome: null - coding_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC - curation: 'Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2' - curational_tags: null - functional: true - gene_designation: null - gene_end: null - gene_start: null - inference_type: rearranged_only - lab_address: Birkbeck College, University of London, Malet Street, London - label: IGHV-2ETO - leader_1_end: null - leader_1_start: null - leader_2_end: null - leader_2_start: null locus: IGH - maintainer: William Lees - paralogs: [] - rearranged_support: [] - release_date: 24-Nov-2021 - release_description: First release - release_version: 1 - sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC - sequence_type: V - species: - id: NCBITAXON:10090 - label: Mus musculus - species_subgroup: CAST_EiJ - species_subgroup_type: strain - status: active - subgroup_designation: null - unrearranged_support: [] - utr_5_prime_end: null - utr_5_prime_start: null - v_gene_delineations: - - aligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA - alignment: - - '1' - - '2' - - '3' - - '4' - - '5' - - '6' - - '7' - - '8' - - '9' - - '10' - - '11' - - '12' - - '13' - - '14' - - '15' - - '16' - - '17' - - '18' - - '19' - - '20' - - '21' - - '22' - - '23' - - '24' - - '25' - - '26' - - '27' - - '28' - - '29' - - '30' - - '31' - - '32' - - '33' - - '34' - - '35' - - '36' - - '37' - - '38' - - '39' - - '40' - - '41' - - '42' - - '43' - - '44' - - '45' - - '46' - - '47' - - '48' - - '49' - - '50' - - '51' - - '52' - - '53' - - '54' - - '55' - - '56' - - '57' - - '58' - - '59' - - '60' - - '61' - - '62' - - '63' - - '64' - - '65' - - '66' - - '67' - - '68' - - '69' - - '70' - - '71' - - '72' - - '73' - - '74' - - '75' - - '76' - - '77' - - '78' - - '79' - - '80' - - '81' - - '82' - - '83' - - '84' - - '85' - - '86' - - '87' - - '88' - - '89' - - '90' - - '91' - - '92' - - '93' - - '94' - - '95' - - '96' - - '97' - - '98' - - '99' - - '100' - - '101' - - '102' - - '103' - - '104' - cdr1_end: 110 - cdr1_start: 76 - cdr2_end: 160 - cdr2_start: 151 - cdr3_start: 295 - delineation_scheme: IMGT - fwr1_end: 75 - fwr1_start: 1 - fwr2_end: 150 - fwr2_start: 111 - fwr3_end: 294 - fwr3_start: 161 - sequence_delineation_id: '1' - unaligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA - v_rs_end: null - v_rs_start: null - author: William Lees - curation: null - germline_set_id: OGRDB:G00007 - germline_set_name: CAST IGH - germline_set_ref: OGRDB:G00007.1 - lab_address: Birkbeck College, University of London, Malet Street, London - lab_name: '' - locus: IGH - pub_ids: [''] - release_date: '2021-11-24' - release_description: '' - release_version: 1 - species: - id: NCBITAXON:10090 - label: Mus musculus - species_subgroup: CAST_EiJ - species_subgroup_type: strain - + allele_descriptions: + - allele_description_id: OGRDB:A00301 + allele_description_ref: OGRDB:Mouse_IGH:IGHV-2DBF + acknowledgements: + - contributor_id: "3" + name: William Lees + orcid_id: + id: ORCID:0000-0001-9834-6840 + label: William Lees + affiliation: + id: ROR:02mb95055 + label: Birkbeck, University of London + affiliation_department: + contributions: + - role: investigation + degree: + - role: data curation + degree: + release_version: 1 + release_date: "2021-11-24" + release_description: First release + label: IGHV-2DBF + sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + coding_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + aliases: + - watson_et_al:CAST_EiJ_IGHV5-3 + locus: IGH + chromosome: + sequence_type: V + functional: true + inference_type: rearranged_only + species: + id: NCBITAXON:10090 + label: Mus musculus + species_subgroup: CAST_EiJ + species_subgroup_type: strain + status: active + gene_designation: + subgroup_designation: + allele_designation: + gene_start: + gene_end: + utr_5_prime_start: + utr_5_prime_end: + leader_1_start: + leader_1_end: + leader_2_start: + leader_2_end: + v_rs_start: + v_rs_end: + v_gene_delineations: + - sequence_delineation_id: '1' + delineation_scheme: IMGT + unaligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + aligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + fwr1_start: 1 + fwr1_end: 75 + cdr1_start: 76 + cdr1_end: 110 + fwr2_start: 111 + fwr2_end: 150 + cdr2_start: 151 + cdr2_end: 160 + fwr3_start: 161 + fwr3_end: 294 + cdr3_start: 295 + alignment_labels: + - '1' + - '2' + - '3' + - '4' + - '5' + - '6' + - '7' + - '8' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + - '15' + - '16' + - '17' + - '18' + - '19' + - '20' + - '21' + - '22' + - '23' + - '24' + - '25' + - '26' + - '27' + - '28' + - '29' + - '30' + - '31' + - '32' + - '33' + - '34' + - '35' + - '36' + - '37' + - '38' + - '39' + - '40' + - '41' + - '42' + - '43' + - '44' + - '45' + - '46' + - '47' + - '48' + - '49' + - '50' + - '51' + - '52' + - '53' + - '54' + - '55' + - '56' + - '57' + - '58' + - '59' + - '60' + - '61' + - '62' + - '63' + - '64' + - '65' + - '66' + - '67' + - '68' + - '69' + - '70' + - '71' + - '72' + - '73' + - '74' + - '75' + - '76' + - '77' + - '78' + - '79' + - '80' + - '81' + - '82' + - '83' + - '84' + - '85' + - '86' + - '87' + - '88' + - '89' + - '90' + - '91' + - '92' + - '93' + - '94' + - '95' + - '96' + - '97' + - '98' + - '99' + - '100' + - '101' + - '102' + - '103' + - '104' + unrearranged_support: [] + rearranged_support: [] + paralogs: [] + curation: 'Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3' + curational_tags: + - allele_description_id: OGRDB:A00314 + allele_description_ref: OGRDB:Mouse_IGH:IGHV-2ETO + acknowledgements: + - contributor_id: "3" + name: William Lees + orcid_id: + id: ORCID:0000-0001-9834-6840 + label: William Lees + affiliation: + id: ROR:02mb95055 + label: Birkbeck, University of London + affiliation_department: + contributions: + - role: investigation + degree: + - role: data curation + degree: + release_version: 1 + release_date: "2021-11-24" + release_description: First release + label: IGHV-2ETO + sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + coding_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + aliases: + - watson_et_al:CAST_EiJ_IGHV8-2 + locus: IGH + chromosome: + sequence_type: V + functional: true + inference_type: rearranged_only + species: + id: NCBITAXON:10090 + label: Mus musculus + species_subgroup: CAST_EiJ + species_subgroup_type: strain + status: active + gene_designation: + subgroup_designation: + allele_designation: + gene_start: + gene_end: + utr_5_prime_start: + utr_5_prime_end: + leader_1_start: + leader_1_end: + leader_2_start: + leader_2_end: + v_rs_start: + v_rs_end: + v_gene_delineations: + - sequence_delineation_id: '1' + delineation_scheme: IMGT + unaligned_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + aligned_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTG...GGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGA............GCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGT......GGGATGATGATAAGTACTATAACCCATCCCTGA...AGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + fwr1_start: 1 + fwr1_end: 75 + cdr1_start: 76 + cdr1_end: 110 + fwr2_start: 111 + fwr2_end: 150 + cdr2_start: 151 + cdr2_end: 160 + fwr3_start: 161 + fwr3_end: 294 + cdr3_start: 295 + alignment_labels: + - '1' + - '2' + - '3' + - '4' + - '5' + - '6' + - '7' + - '8' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + - '15' + - '16' + - '17' + - '18' + - '19' + - '20' + - '21' + - '22' + - '23' + - '24' + - '25' + - '26' + - '27' + - '28' + - '29' + - '30' + - '31' + - '32' + - '33' + - '34' + - '35' + - '36' + - '37' + - '38' + - '39' + - '40' + - '41' + - '42' + - '43' + - '44' + - '45' + - '46' + - '47' + - '48' + - '49' + - '50' + - '51' + - '52' + - '53' + - '54' + - '55' + - '56' + - '57' + - '58' + - '59' + - '60' + - '61' + - '62' + - '63' + - '64' + - '65' + - '66' + - '67' + - '68' + - '69' + - '70' + - '71' + - '72' + - '73' + - '74' + - '75' + - '76' + - '77' + - '78' + - '79' + - '80' + - '81' + - '82' + - '83' + - '84' + - '85' + - '86' + - '87' + - '88' + - '89' + - '90' + - '91' + - '92' + - '93' + - '94' + - '95' + - '96' + - '97' + - '98' + - '99' + - '100' + - '101' + - '102' + - '103' + - '104' + unrearranged_support: [] + rearranged_support: [] + paralogs: [] + curation: 'Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2' + curational_tags: + curation: GenotypeSet: - - receptor_genotype_set_id: '1' + - receptor_genotype_set_id: "1" genotype_class_list: - - receptor_genotype_id: '1' + - receptor_genotype_id: "1" locus: IGH documented_alleles: - label: IGHV1-69*01 diff --git a/lang/python/tests/data/good_genotype_set.json b/lang/python/tests/data/good_genotype_set.json index ba10f56e9..abd24646c 100644 --- a/lang/python/tests/data/good_genotype_set.json +++ b/lang/python/tests/data/good_genotype_set.json @@ -35,4 +35,4 @@ } ] }] -} \ No newline at end of file +} diff --git a/lang/python/tests/data/good_germline_set.json b/lang/python/tests/data/good_germline_set.json index 41ecf5f7d..e74c590dc 100644 --- a/lang/python/tests/data/good_germline_set.json +++ b/lang/python/tests/data/good_germline_set.json @@ -1,17 +1,41 @@ { "GermlineSet": [{ "germline_set_id": "OGRDB:G00007", - "author": "William Lees", - "lab_name": "", - "lab_address": "Birkbeck College, University of London, Malet Street, London", - "acknowledgements": [], + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, "release_description": "", "release_date": "2021-11-24", "germline_set_name": "CAST IGH", "germline_set_ref": "OGRDB:G00007.1", "pub_ids": [""], - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "locus": "IGH", @@ -19,11 +43,33 @@ { "allele_description_id": "OGRDB:A00301", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2DBF", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2DBF", "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", @@ -36,7 +82,10 @@ "sequence_type": "V", "functional": true, "inference_type": "rearranged_only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -70,7 +119,7 @@ "fwr3_start": 161, "fwr3_end": 294, "cdr3_start": 295, - "alignment": [ + "alignment_labels": [ "1", "2", "3", @@ -187,11 +236,33 @@ { "allele_description_id": "OGRDB:A00314", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2ETO", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", @@ -204,7 +275,10 @@ "sequence_type": "V", "functional": true, "inference_type": "rearranged_only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -238,7 +312,7 @@ "fwr3_start": 161, "fwr3_end": 294, "cdr3_start": 295, - "alignment": [ + "alignment_labels": [ "1", "2", "3", diff --git a/lang/python/tests/data/good_repertoire.yaml b/lang/python/tests/data/good_repertoire.yaml index 9bf3a4653..6adaa2361 100644 --- a/lang/python/tests/data/good_repertoire.yaml +++ b/lang/python/tests/data/good_repertoire.yaml @@ -11,28 +11,50 @@ Repertoire: id: null label: null study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" inclusion_exclusion_criteria: null - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null pub_ids: ["PMID:27005435"] - collected_by: null grants: null - keywords_study: + keywords_study: - "contains_ig" - "contains_tr" subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: null ancestry_population: @@ -63,15 +85,15 @@ Repertoire: sample_processing_id: null sample_type: "peripheral venous puncture" tissue: - id: "UBERON_0000178" + id: "UBERON:0000178" label: "blood" tissue_processing: "Ficoll gradient" cell_subset: - id: "CL_0000788" + id: "CL:0000788" label: "naive B cell" cell_phenotype: "expression of CD20 and the absence of CD27" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -144,13 +166,35 @@ Repertoire: id: null label: null study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" inclusion_exclusion_criteria: null - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null pub_ids: ["PMID:27005435"] - collected_by: null grants: null keywords_study: - "contains_ig" @@ -159,13 +203,13 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: null ancestry_population: @@ -196,15 +240,15 @@ Repertoire: sample_processing_id: null sample_type: "peripheral venous puncture" tissue: - id: "UBERON_0000178" + id: "UBERON:0000178" label: "blood" tissue_processing: "Ficoll gradient" cell_subset: - id: "CL_0000787" + id: "CL:0000787" label: "memory B cell" cell_phenotype: "expression of CD20 and CD27" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -277,13 +321,35 @@ Repertoire: id: null label: null study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" inclusion_exclusion_criteria: null - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null pub_ids: ["PMID:27005435"] - collected_by: null grants: null keywords_study: - "contains_ig" @@ -292,13 +358,13 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 label: year age_event: null ancestry_population: @@ -329,15 +395,15 @@ Repertoire: sample_processing_id: null sample_type: "peripheral venous puncture" tissue: - id: "UBERON_0000178" + id: "UBERON:0000178" label: "blood" tissue_processing: "Ficoll gradient" cell_subset: - id: "CL_0000895" + id: "CL:0000895" label: "naive thymus-derived CD4-positive, alpha-beta T cell" cell_phenotype: "expression of CD8 and absence of CD4 and CD45RO" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" single_cell: false cell_isolation: FACS diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index 1ae5ad012..d6c6d48e2 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -1667,10 +1667,7 @@ Study: - study_type - inclusion_exclusion_criteria - grants - - collected_by - - lab_name - - lab_address - - submitted_by + - contributors - pub_ids - keywords_study properties: diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index 87a25b5bd..dd2c0c241 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -1571,10 +1571,7 @@ Study: - study_type - inclusion_exclusion_criteria - grants - - collected_by - - lab_name - - lab_address - - submitted_by + - contributors - pub_ids - keywords_study properties: