diff --git a/notebooks/KBG/KBG.ipynb b/notebooks/KBG/KBG.ipynb index 595afcd9e..4e2de6aed 100644 --- a/notebooks/KBG/KBG.ipynb +++ b/notebooks/KBG/KBG.ipynb @@ -83,7 +83,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Patients Created: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 328/328 [00:00<00:00, 565.93it/s]\n" + "Patients Created: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 328/328 [00:01<00:00, 196.20it/s]" ] }, { @@ -94,21 +94,398 @@ ] }, { - "ename": "AttributeError", - "evalue": "'TranscriptAnnotation' object has no attribute '_protein_id'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m fpath_phenopackets \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mphenopackets\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 4\u001b[0m cohort_creator \u001b[38;5;241m=\u001b[39m configure_caching_cohort_creator(hpo)\n\u001b[0;32m----> 5\u001b[0m cohort \u001b[38;5;241m=\u001b[39m \u001b[43mload_phenopacket_folder\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfpath_phenopackets\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcohort_creator\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/Programs/genophenocorr/src/genophenocorr/preprocessing/_config.py:233\u001b[0m, in \u001b[0;36mload_phenopacket_folder\u001b[0;34m(pp_directory, cohort_creator, validation_policy)\u001b[0m\n\u001b[1;32m 231\u001b[0m cohort_iter \u001b[38;5;241m=\u001b[39m tqdm(pps, desc\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPatients Created\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 232\u001b[0m notepad \u001b[38;5;241m=\u001b[39m cohort_creator\u001b[38;5;241m.\u001b[39mprepare_notepad(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(pps)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m phenopacket(s) found at `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpp_directory\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m--> 233\u001b[0m cohort \u001b[38;5;241m=\u001b[39m \u001b[43mcohort_creator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcohort_iter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnotepad\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 236\u001b[0m validation_summary \u001b[38;5;241m=\u001b[39m _summarize_validation(validation_policy, notepad)\n\u001b[1;32m 237\u001b[0m \u001b[38;5;28mprint\u001b[39m(os\u001b[38;5;241m.\u001b[39mlinesep\u001b[38;5;241m.\u001b[39mjoin(validation_summary), file\u001b[38;5;241m=\u001b[39msys\u001b[38;5;241m.\u001b[39mstderr)\n", - "File \u001b[0;32m~/Documents/Programs/genophenocorr/src/genophenocorr/preprocessing/_patient.py:54\u001b[0m, in \u001b[0;36mCohortCreator.process\u001b[0;34m(self, inputs, notepad)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(patients) \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 51\u001b[0m notepad\u001b[38;5;241m.\u001b[39madd_warning(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCohort must include \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(patients)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m>=1 members\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 52\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFix issues in patients to enable the analysis\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 54\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mCohort\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_patients\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpatients\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/Programs/genophenocorr/src/genophenocorr/model/_cohort.py:107\u001b[0m, in \u001b[0;36mCohort.from_patients\u001b[0;34m(members, include_patients_with_no_HPO, include_patients_with_no_variants)\u001b[0m\n\u001b[1;32m 105\u001b[0m var_counts, pheno_count \u001b[38;5;241m=\u001b[39m Counter(), Counter() \u001b[38;5;66;03m# , prot_counts\u001b[39;00m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mlen\u001b[39m(members))\n\u001b[0;32m--> 107\u001b[0m members \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mset\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmembers\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 108\u001b[0m excluded_members \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m patient \u001b[38;5;129;01min\u001b[39;00m members:\n", - "File \u001b[0;32m~/Documents/Programs/genophenocorr/src/genophenocorr/model/_cohort.py:91\u001b[0m, in \u001b[0;36mPatient.__hash__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__hash__\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mint\u001b[39m:\n\u001b[0;32m---> 91\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mhash\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpatient_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariants\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mphenotypes\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/Programs/genophenocorr/src/genophenocorr/model/_variant.py:434\u001b[0m, in \u001b[0;36mVariant.__hash__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 433\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__hash__\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mint\u001b[39m:\n\u001b[0;32m--> 434\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mhash\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 435\u001b[0m \u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvariant_coordinates\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtx_annotations\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenotypes\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/Programs/genophenocorr/src/genophenocorr/model/_variant.py:171\u001b[0m, in \u001b[0;36mTranscriptAnnotation.__hash__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__hash__\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mint\u001b[39m:\n\u001b[1;32m 170\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mhash\u001b[39m((\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgene_id, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhgvsc_id, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_preferred, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtranscript_id, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moverlapping_exons, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvariant_effects,\n\u001b[0;32m--> 171\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprotein_id\u001b[49m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprotein_effect_location))\n", - "File \u001b[0;32m~/Documents/Programs/genophenocorr/src/genophenocorr/model/_variant.py:134\u001b[0m, in \u001b[0;36mTranscriptAnnotation.protein_id\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprotein_id\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m typing\u001b[38;5;241m.\u001b[39mOptional[\u001b[38;5;28mstr\u001b[39m]:\n\u001b[1;32m 130\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 131\u001b[0m \u001b[38;5;124;03m Returns:\u001b[39;00m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;124;03m Optional[str]: The protein accession ID for the protein relevant to the variant\u001b[39;00m\n\u001b[1;32m 133\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 134\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_protein_id\u001b[49m\n", - "\u001b[0;31mAttributeError\u001b[0m: 'TranscriptAnnotation' object has no attribute '_protein_id'" + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "Validated under none policy\n", + "Showing errors and warnings\n", + "328 phenopacket(s) found at `phenopackets`\n", + " patient #0\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Novara, 2017_P2[PMID_36446582_Novara,_2017_P2]. Remove variant from testing\n", + " ·Patient PMID_36446582_Novara,_2017_P2 has no variants to work with\n", + " patient #1\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P13[PMID_36446582_Goldenberg2016_P13]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P13 has no variants to work with\n", + " patient #3\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Ockeloen2015_P20[PMID_36446582_Ockeloen2015_P20]. Remove variant from testing\n", + " ·Patient PMID_36446582_Ockeloen2015_P20 has no variants to work with\n", + " patient #7\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Kutkowska-Kazmierczak2021_P22[PMID_36446582_Kutkowska-Kazmierczak2021_P22]. Remove variant from testing\n", + " ·Patient PMID_36446582_Kutkowska-Kazmierczak2021_P22 has no variants to work with\n", + " patient #10\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Gnazzo, 2020_P31[PMID_36446582_Gnazzo,_2020_P31]. Remove variant from testing\n", + " ·Patient PMID_36446582_Gnazzo,_2020_P31 has no variants to work with\n", + " patient #13\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P29[PMID_36446582_Goldenberg2016_P29]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P29 has no variants to work with\n", + " patient #14\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Kutkowska-Kazmierczak2021_P18[PMID_36446582_Kutkowska-Kazmierczak2021_P18]. Remove variant from testing\n", + " ·Patient PMID_36446582_Kutkowska-Kazmierczak2021_P18 has no variants to work with\n", + " patient #24\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P1[PMID_36446582_Goldenberg2016_P1]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P1 has no variants to work with\n", + " patient #29\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P33[PMID_36446582_Goldenberg2016_P33]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P33 has no variants to work with\n", + " patient #31\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Kutkowska-Kazmierczak2021_P14[PMID_36446582_Kutkowska-Kazmierczak2021_P14]. Remove variant from testing\n", + " ·Patient PMID_36446582_Kutkowska-Kazmierczak2021_P14 has no variants to work with\n", + " patient #36\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Kutkowska-Kazmierczak2021_P15[PMID_36446582_Kutkowska-Kazmierczak2021_P15]. Remove variant from testing\n", + " ·Patient PMID_36446582_Kutkowska-Kazmierczak2021_P15 has no variants to work with\n", + " patient #39\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P32[PMID_36446582_Goldenberg2016_P32]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P32 has no variants to work with\n", + " patient #40\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Bucerzan2020[PMID_36446582_Bucerzan2020]. Remove variant from testing\n", + " ·Patient PMID_36446582_Bucerzan2020 has no variants to work with\n", + " patient #43\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P24[PMID_36446582_Goldenberg2016_P24]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P24 has no variants to work with\n", + " patient #47\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Miyatake, 2013[PMID_36446582_Miyatake,_2013]. Remove variant from testing\n", + " ·Patient PMID_36446582_Miyatake,_2013 has no variants to work with\n", + " patient #53\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient KBG1[PMID_36446582_KBG1]. Remove variant from testing\n", + " ·Patient PMID_36446582_KBG1 has no variants to work with\n", + " patient #57\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Sacharow, 2012_P2[PMID_36446582_Sacharow,_2012_P2]. Remove variant from testing\n", + " ·Patient PMID_36446582_Sacharow,_2012_P2 has no variants to work with\n", + " patient #59\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Kutkowska-Kazmierczak2021_P19[PMID_36446582_Kutkowska-Kazmierczak2021_P19]. Remove variant from testing\n", + " ·Patient PMID_36446582_Kutkowska-Kazmierczak2021_P19 has no variants to work with\n", + " patient #61\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P28[PMID_36446582_Goldenberg2016_P28]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P28 has no variants to work with\n", + " patient #65\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Gnazzo, 2020_P30[PMID_36446582_Gnazzo,_2020_P30]. Remove variant from testing\n", + " ·Patient PMID_36446582_Gnazzo,_2020_P30 has no variants to work with\n", + " patient #67\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Kutkowska-Kazmierczak2021_P23[PMID_36446582_Kutkowska-Kazmierczak2021_P23]. Remove variant from testing\n", + " ·Patient PMID_36446582_Kutkowska-Kazmierczak2021_P23 has no variants to work with\n", + " patient #68\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Novara, 2017_P11[PMID_36446582_Novara,_2017_P11]. Remove variant from testing\n", + " ·Patient PMID_36446582_Novara,_2017_P11 has no variants to work with\n", + " patient #72\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P12[PMID_36446582_Goldenberg2016_P12]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P12 has no variants to work with\n", + " patient #73\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Novara, 2017_P3[PMID_36446582_Novara,_2017_P3]. Remove variant from testing\n", + " ·Patient PMID_36446582_Novara,_2017_P3 has no variants to work with\n", + " patient #74\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Khalifa, 2013_P1B[PMID_36446582_Khalifa,_2013_P1B]. Remove variant from testing\n", + " ·Patient PMID_36446582_Khalifa,_2013_P1B has no variants to work with\n", + " patient #75\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Isrie, 2012_P2[PMID_36446582_Isrie,_2012_P2]. Remove variant from testing\n", + " ·Patient PMID_36446582_Isrie,_2012_P2 has no variants to work with\n", + " patient #76\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P19[PMID_36446582_Goldenberg2016_P19]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P19 has no variants to work with\n", + " patient #79\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Novara, 2017_P8[PMID_36446582_Novara,_2017_P8]. Remove variant from testing\n", + " ·Patient PMID_36446582_Novara,_2017_P8 has no variants to work with\n", + " patient #82\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient KBG26[PMID_36446582_KBG26]. Remove variant from testing\n", + " ·Patient PMID_36446582_KBG26 has no variants to work with\n", + " patient #86\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Scarano, 2013_P11[PMID_36446582_Scarano,_2013_P11]. Remove variant from testing\n", + " ·Patient PMID_36446582_Scarano,_2013_P11 has no variants to work with\n", + " patient #88\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Crippa2015_P1[PMID_36446582_Crippa2015_P1]. Remove variant from testing\n", + " ·Patient PMID_36446582_Crippa2015_P1 has no variants to work with\n", + " patient #95\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Novara, 2017_P4[PMID_36446582_Novara,_2017_P4]. Remove variant from testing\n", + " ·Patient PMID_36446582_Novara,_2017_P4 has no variants to work with\n", + " patient #98\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Parenti2021_P23[PMID_36446582_Parenti2021_P23]. Remove variant from testing\n", + " ·Patient PMID_36446582_Parenti2021_P23 has no variants to work with\n", + " patient #133\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Novara, 2017_P5[PMID_36446582_Novara,_2017_P5]. Remove variant from testing\n", + " ·Patient PMID_36446582_Novara,_2017_P5 has no variants to work with\n", + " patient #135\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Willemsen2010_P1[PMID_36446582_Willemsen2010_P1]. Remove variant from testing\n", + " ·Patient PMID_36446582_Willemsen2010_P1 has no variants to work with\n", + " patient #139\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P22[PMID_36446582_Goldenberg2016_P22]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P22 has no variants to work with\n", + " patient #140\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Scarano, 2013_P10[PMID_36446582_Scarano,_2013_P10]. Remove variant from testing\n", + " ·Patient PMID_36446582_Scarano,_2013_P10 has no variants to work with\n", + " patient #147\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Novara, 2017_P9[PMID_36446582_Novara,_2017_P9]. Remove variant from testing\n", + " ·Patient PMID_36446582_Novara,_2017_P9 has no variants to work with\n", + " patient #150\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P18[PMID_36446582_Goldenberg2016_P18]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P18 has no variants to work with\n", + " patient #156\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Willemsen2010_P2[PMID_36446582_Willemsen2010_P2]. Remove variant from testing\n", + " ·Patient PMID_36446582_Willemsen2010_P2 has no variants to work with\n", + " patient #159\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Spengler, 2013[PMID_36446582_Spengler,_2013]. Remove variant from testing\n", + " ·Patient PMID_36446582_Spengler,_2013 has no variants to work with\n", + " patient #164\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Youngs2011[PMID_36446582_Youngs2011]. Remove variant from testing\n", + " ·Patient PMID_36446582_Youngs2011 has no variants to work with\n", + " patient #165\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P21[PMID_36446582_Goldenberg2016_P21]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P21 has no variants to work with\n", + " patient #166\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Crippa2015_P3[PMID_36446582_Crippa2015_P3]. Remove variant from testing\n", + " ·Patient PMID_36446582_Crippa2015_P3 has no variants to work with\n", + " patient #191\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Palumbo 2016[PMID_36446582_Palumbo_2016]. Remove variant from testing\n", + " ·Patient PMID_36446582_Palumbo_2016 has no variants to work with\n", + " patient #202\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Novara, 2017_P7[PMID_36446582_Novara,_2017_P7]. Remove variant from testing\n", + " ·Patient PMID_36446582_Novara,_2017_P7 has no variants to work with\n", + " patient #203\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient KBG9[PMID_36446582_KBG9]. Remove variant from testing\n", + " ·Patient PMID_36446582_KBG9 has no variants to work with\n", + " patient #226\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient KBG25[PMID_36446582_KBG25]. Remove variant from testing\n", + " ·Patient PMID_36446582_KBG25 has no variants to work with\n", + " patient #229\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P4[PMID_36446582_Goldenberg2016_P4]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P4 has no variants to work with\n", + " patient #230\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Scarano, 2013_P12[PMID_36446582_Scarano,_2013_P12]. Remove variant from testing\n", + " ·Patient PMID_36446582_Scarano,_2013_P12 has no variants to work with\n", + " patient #232\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P20[PMID_36446582_Goldenberg2016_P20]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P20 has no variants to work with\n", + " patient #233\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Crippa2015_P2[PMID_36446582_Crippa2015_P2]. Remove variant from testing\n", + " ·Patient PMID_36446582_Crippa2015_P2 has no variants to work with\n", + " patient #235\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P36[PMID_36446582_Goldenberg2016_P36]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P36 has no variants to work with\n", + " patient #239\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Willemsen2010_P3[PMID_36446582_Willemsen2010_P3]. Remove variant from testing\n", + " ·Patient PMID_36446582_Willemsen2010_P3 has no variants to work with\n", + " patient #244\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Sacharow, 2012_P1[PMID_36446582_Sacharow,_2012_P1]. Remove variant from testing\n", + " ·Patient PMID_36446582_Sacharow,_2012_P1 has no variants to work with\n", + " patient #247\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Srivastava, 2017_P1[PMID_36446582_Srivastava,_2017_P1]. Remove variant from testing\n", + " ·Patient PMID_36446582_Srivastava,_2017_P1 has no variants to work with\n", + " patient #258\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Novara, 2017_P12[PMID_36446582_Novara,_2017_P12]. Remove variant from testing\n", + " ·Patient PMID_36446582_Novara,_2017_P12 has no variants to work with\n", + " patient #260\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Kutkowska-Kazmierczak2021_P20[PMID_36446582_Kutkowska-Kazmierczak2021_P20]. Remove variant from testing\n", + " ·Patient PMID_36446582_Kutkowska-Kazmierczak2021_P20 has no variants to work with\n", + " patient #263\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient KBG38[PMID_36446582_KBG38]. Remove variant from testing\n", + " ·Patient PMID_36446582_KBG38 has no variants to work with\n", + " patient #266\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Isrie, 2012_P1[PMID_36446582_Isrie,_2012_P1]. Remove variant from testing\n", + " ·Patient PMID_36446582_Isrie,_2012_P1 has no variants to work with\n", + " patient #267\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Khalifa, 2013_P1A[PMID_36446582_Khalifa,_2013_P1A]. Remove variant from testing\n", + " ·Patient PMID_36446582_Khalifa,_2013_P1A has no variants to work with\n", + " patient #270\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Willemsen2010_P4[PMID_36446582_Willemsen2010_P4]. Remove variant from testing\n", + " ·Patient PMID_36446582_Willemsen2010_P4 has no variants to work with\n", + " patient #271\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Gnazzo, 2020_P29[PMID_36446582_Gnazzo,_2020_P29]. Remove variant from testing\n", + " ·Patient PMID_36446582_Gnazzo,_2020_P29 has no variants to work with\n", + " patient #274\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Kutkowska-Kazmierczak2021_P16[PMID_36446582_Kutkowska-Kazmierczak2021_P16]. Remove variant from testing\n", + " ·Patient PMID_36446582_Kutkowska-Kazmierczak2021_P16 has no variants to work with\n", + " patient #280\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P3[PMID_36446582_Goldenberg2016_P3]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P3 has no variants to work with\n", + " patient #283\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient KBG22[PMID_36446582_KBG22]. Remove variant from testing\n", + " ·Patient PMID_36446582_KBG22 has no variants to work with\n", + " patient #287\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient KBG2[PMID_36446582_KBG2]. Remove variant from testing\n", + " ·Patient PMID_36446582_KBG2 has no variants to work with\n", + " patient #291\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Lim2014[PMID_36446582_Lim2014]. Remove variant from testing\n", + " ·Patient PMID_36446582_Lim2014 has no variants to work with\n", + " patient #295\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient KBG23[PMID_36446582_KBG23]. Remove variant from testing\n", + " ·Patient PMID_36446582_KBG23 has no variants to work with\n", + " patient #296\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P2[PMID_36446582_Goldenberg2016_P2]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P2 has no variants to work with\n", + " patient #298\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P26[PMID_36446582_Goldenberg2016_P26]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P26 has no variants to work with\n", + " patient #299\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient KBG58[PMID_36446582_KBG58]. Remove variant from testing\n", + " ·Patient PMID_36446582_KBG58 has no variants to work with\n", + " patient #301\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Kutkowska-Kazmierczak2021_P17[PMID_36446582_Kutkowska-Kazmierczak2021_P17]. Remove variant from testing\n", + " ·Patient PMID_36446582_Kutkowska-Kazmierczak2021_P17 has no variants to work with\n", + " patient #308\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Novara, 2017_P1[PMID_36446582_Novara,_2017_P1]. Remove variant from testing\n", + " ·Patient PMID_36446582_Novara,_2017_P1 has no variants to work with\n", + " patient #309\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Goldenberg2016_P10[PMID_36446582_Goldenberg2016_P10]. Remove variant from testing\n", + " ·Patient PMID_36446582_Goldenberg2016_P10 has no variants to work with\n", + " patient #315\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Kutkowska-Kazmierczak2021_P21[PMID_36446582_Kutkowska-Kazmierczak2021_P21]. Remove variant from testing\n", + " ·Patient PMID_36446582_Kutkowska-Kazmierczak2021_P21 has no variants to work with\n", + " patient #318\n", + " variants\n", + " warnings:\n", + " ·Expected a VCF record, a VRS CNV, or an expression with `hgvs.c` but had an error retrieving any from patient Behnert, 2018[PMID_36446582_Behnert,_2018]. Remove variant from testing\n", + " ·Patient PMID_36446582_Behnert,_2018 has no variants to work with\n" ] } ], @@ -116,7 +493,7 @@ "from genophenocorr.preprocessing import configure_caching_cohort_creator, load_phenopacket_folder\n", "\n", "fpath_phenopackets = 'phenopackets'\n", - "cohort_creator = configure_caching_cohort_creator(hpo)\n", + "cohort_creator = configure_caching_cohort_creator(hpo, cache_dir='temp_cache')\n", "cohort = load_phenopacket_folder(fpath_phenopackets, cohort_creator)" ] }, @@ -132,7 +509,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "6a233bd3", "metadata": {}, "outputs": [], @@ -152,7 +529,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "3b7a08b2", "metadata": {}, "outputs": [], @@ -165,30 +542,225 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "c4960518", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
ItemDescription
Description of the cohort. 77 individuals were removed from the cohort because they had no HPO terms.
Total Individuals328
Excluded Individuals77: Kutkowska-Kazmierczak2021_P22[PMID_36446582_Kutkowska-Kazmierczak2021_P22];Goldenberg2016_P3[PMID_36446582_Goldenberg2016_P3];KBG22[PMID_36446582_KBG22];Scarano, 2013_P11[PMID_36446582_Scarano,_2013_P11];Gnazzo, 2020_P30[PMID_36446582_Gnazzo,_2020_P30];Sacharow, 2012_P2[PMID_36446582_Sacharow,_2012_P2];Willemsen2010_P1[PMID_36446582_Willemsen2010_P1];Sacharow, 2012_P1[PMID_36446582_Sacharow,_2012_P1];Isrie, 2012_P1[PMID_36446582_Isrie,_2012_P1];Goldenberg2016_P36[PMID_36446582_Goldenberg2016_P36];Scarano, 2013_P12[PMID_36446582_Scarano,_2013_P12];Goldenberg2016_P13[PMID_36446582_Goldenberg2016_P13];Gnazzo, 2020_P31[PMID_36446582_Gnazzo,_2020_P31];Goldenberg2016_P19[PMID_36446582_Goldenberg2016_P19];KBG58[PMID_36446582_KBG58];Khalifa, 2013_P1A[PMID_36446582_Khalifa,_2013_P1A];Willemsen2010_P2[PMID_36446582_Willemsen2010_P2];Goldenberg2016_P24[PMID_36446582_Goldenberg2016_P24];Goldenberg2016_P33[PMID_36446582_Goldenberg2016_P33];Novara, 2017_P3[PMID_36446582_Novara,_2017_P3];Bucerzan2020[PMID_36446582_Bucerzan2020];Crippa2015_P3[PMID_36446582_Crippa2015_P3];Kutkowska-Kazmierczak2021_P18[PMID_36446582_Kutkowska-Kazmierczak2021_P18];Kutkowska-Kazmierczak2021_P16[PMID_36446582_Kutkowska-Kazmierczak2021_P16];Goldenberg2016_P18[PMID_36446582_Goldenberg2016_P18];Goldenberg2016_P26[PMID_36446582_Goldenberg2016_P26];KBG2[PMID_36446582_KBG2];Crippa2015_P2[PMID_36446582_Crippa2015_P2];Crippa2015_P1[PMID_36446582_Crippa2015_P1];Youngs2011[PMID_36446582_Youngs2011];Kutkowska-Kazmierczak2021_P14[PMID_36446582_Kutkowska-Kazmierczak2021_P14];Goldenberg2016_P32[PMID_36446582_Goldenberg2016_P32];Goldenberg2016_P1[PMID_36446582_Goldenberg2016_P1];Palumbo 2016[PMID_36446582_Palumbo_2016];Isrie, 2012_P2[PMID_36446582_Isrie,_2012_P2];Kutkowska-Kazmierczak2021_P19[PMID_36446582_Kutkowska-Kazmierczak2021_P19];KBG23[PMID_36446582_KBG23];Kutkowska-Kazmierczak2021_P21[PMID_36446582_Kutkowska-Kazmierczak2021_P21];Gnazzo, 2020_P29[PMID_36446582_Gnazzo,_2020_P29];Khalifa, 2013_P1B[PMID_36446582_Khalifa,_2013_P1B];Novara, 2017_P7[PMID_36446582_Novara,_2017_P7];Novara, 2017_P11[PMID_36446582_Novara,_2017_P11];Lim2014[PMID_36446582_Lim2014];Willemsen2010_P3[PMID_36446582_Willemsen2010_P3];Novara, 2017_P9[PMID_36446582_Novara,_2017_P9];Parenti2021_P23[PMID_36446582_Parenti2021_P23];KBG9[PMID_36446582_KBG9];Kutkowska-Kazmierczak2021_P15[PMID_36446582_Kutkowska-Kazmierczak2021_P15];Goldenberg2016_P22[PMID_36446582_Goldenberg2016_P22];Novara, 2017_P8[PMID_36446582_Novara,_2017_P8];Novara, 2017_P4[PMID_36446582_Novara,_2017_P4];Spengler, 2013[PMID_36446582_Spengler,_2013];Goldenberg2016_P20[PMID_36446582_Goldenberg2016_P20];Miyatake, 2013[PMID_36446582_Miyatake,_2013];Novara, 2017_P5[PMID_36446582_Novara,_2017_P5];Kutkowska-Kazmierczak2021_P20[PMID_36446582_Kutkowska-Kazmierczak2021_P20];Scarano, 2013_P10[PMID_36446582_Scarano,_2013_P10];Novara, 2017_P12[PMID_36446582_Novara,_2017_P12];Goldenberg2016_P21[PMID_36446582_Goldenberg2016_P21];Ockeloen2015_P20[PMID_36446582_Ockeloen2015_P20];Willemsen2010_P4[PMID_36446582_Willemsen2010_P4];Goldenberg2016_P12[PMID_36446582_Goldenberg2016_P12];Behnert, 2018[PMID_36446582_Behnert,_2018];Kutkowska-Kazmierczak2021_P17[PMID_36446582_Kutkowska-Kazmierczak2021_P17];KBG38[PMID_36446582_KBG38];KBG25[PMID_36446582_KBG25];KBG26[PMID_36446582_KBG26];Goldenberg2016_P4[PMID_36446582_Goldenberg2016_P4];Novara, 2017_P1[PMID_36446582_Novara,_2017_P1];Srivastava, 2017_P1[PMID_36446582_Srivastava,_2017_P1];Goldenberg2016_P28[PMID_36446582_Goldenberg2016_P28];Goldenberg2016_P29[PMID_36446582_Goldenberg2016_P29];KBG1[PMID_36446582_KBG1];Novara, 2017_P2[PMID_36446582_Novara,_2017_P2];Goldenberg2016_P2[PMID_36446582_Goldenberg2016_P2];Goldenberg2016_P10[PMID_36446582_Goldenberg2016_P10];Kutkowska-Kazmierczak2021_P23[PMID_36446582_Kutkowska-Kazmierczak2021_P23]
Total Unique HPO Terms27
Total Unique Variants251
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "display(HTML(viewer.cohort_summary_table(cohort)))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "c39889b9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
HPO TermCount
Counts of annotations to HPO terms for the 328 in the cohort
Macrodontia (HP:0001572)182
Intellectual disability (HP:0001249)159
Abnormality of the hand (HP:0001155)156
Global developmental delay (HP:0001263)133
Short stature (HP:0004322)115
Abnormal external nose morphology (HP:0010938)112
Thick eyebrow (HP:0000574)105
Long philtrum (HP:0000343)103
Hearing impairment (HP:0000365)74
Triangular face (HP:0000325)68
Abnormality of the outer ear (HP:0000356)58
Attention deficit hyperactivity disorder (HP:0007018)51
Autistic behavior (HP:0000729)42
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "display(HTML(viewer.hpo_term_counts_table(cohort))) ## Add Labels to output" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "f294ca99", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
VariantEffectCountKey
c.1903_1907delFRAMESHIFT_VARIANT3316_89284634_89284639_GTGTTT_G
c.2408_2412delFRAMESHIFT_VARIANT1016_89284129_89284134_CTTTTT_C
c.1381_1384delFRAMESHIFT_VARIANT816_89285157_89285161_GTTTC_G
c.2398_2401delFRAMESHIFT_VARIANT816_89284140_89284144_TTTTC_T
c.6792_6793insCFRAMESHIFT_VARIANT516_89279749_89279749_C_CG
c.7481_7482insCFRAMESHIFT_VARIANT516_89275180_89275180_A_AG
c.2182_2183delFRAMESHIFT_VARIANT316_89284358_89284360_GAT_G
c.7570-1G>CSPLICE_ACCEPTOR_VARIANT316_89274958_89274958_C_G
c.2175_2178delFRAMESHIFT_VARIANT316_89284363_89284367_CTTTG_C
c.4406G>ASTOP_GAINED316_89282136_89282136_C_T
c.3224_3227delFRAMESHIFT_VARIANT316_89283314_89283318_CCTTT_C
c.2197C>TSTOP_GAINED316_89284345_89284345_G_A
c.1977C>GSTOP_GAINED316_89284565_89284565_G_C
c.3832A>TSTOP_GAINED316_89282710_89282710_T_A
c.4384_4385insAFRAMESHIFT_VARIANT216_89282157_89282157_C_CT
c.3590_3594delFRAMESHIFT_VARIANT216_89282947_89282952_CTTTTT_C
c.3704_3707delFRAMESHIFT_VARIANT216_89282834_89282838_CTGTT_C
c.6513_6514insCFRAMESHIFT_VARIANT216_89280028_89280028_C_CG
c.2329_2332delFRAMESHIFT_VARIANT216_89284209_89284213_TTCTC_T
c.1318C>TSTOP_GAINED216_89285224_89285224_G_A
c.7534C>TMISSENSE_VARIANT216_89275128_89275128_G_A
c.4087C>TSTOP_GAINED216_89282455_89282455_G_A
c.1385_1388delFRAMESHIFT_VARIANT216_89285153_89285157_TTTTG_T
c.7216C>TSTOP_GAINED216_89279326_89279326_G_A
c.3309_3310insAFRAMESHIFT_VARIANT216_89283232_89283232_C_CT
c.5790C>ASTOP_GAINED216_89280752_89280752_G_T
c.5488G>TSTOP_GAINED216_89281054_89281054_C_A
c.3045delFRAMESHIFT_VARIANT216_89283496_89283497_CG_C
c.1367_1370delFRAMESHIFT_VARIANT216_89285171_89285175_CTTCT_C
\n", + "

Additionally, the following variants were observed 1 or fewer times: \n", + "c.4964_4965del; c.4206C>G; c.1389_1390insA; c.2512C>T; c.3019C>T; c.7607G>A; c.4283_4286del; c.5145C>G; c.2751_2752insT; c.2395A>T; c.7363del; c.7834G>T; c.5146G>T; c.3180_3181insA; c.3774_3775del; c.1763C>A; c.7180C>T; c.4558del; c.4391_4392del; c.6472G>T; c.4374_4375del; c.7192C>T; c.3310_3311insG; c.1711_1723del; c.6340C>T; c.6968_6975del; c.3591_3594del; c.2297_2300del; c.831del; c.548_551del; c.1731_1732insT; c.6409_6410del; c.2412del; c.3974del; c.1940_1941delinsT; c.4107_4108del; c.4528_4529del; c.915del; c.3153del; c.4389_4390del; c.1285_1286del; c.2828_2829del; c.6015_6016insA; c.6766C>T; c.5199_5227del; c.7411_7422del; c.5123C>A; c.6817_6833del; c.3931C>T; c.5205del; c.1457C>G; c.1120G>T; c.3437_3461del; c.3382_3383del; c.4786G>T; c.3222_3223insA; c.6071_6084del; c.1785_1786delinsTT; c.4498C>T; c.6628G>T; c.3046del; c.867C>G; c.3339G>A; c.5889del; c.4103_4104del; c.3582del; c.2866G>T; c.7356_7357insC; c.6364_6367del; c.1173C>G; c.7570_7572del; c.2647G>T; c.1801C>T; c.505G>T; c.3770_3771del; c.3295_3296del; c.1893_1894insA; c.6682del; c.4218C>A; c.2305del; c.6184del; c.3771_3772insA; c.6691_6692insG; c.3448C>T; c.4177_4189del; c.211_226+1del; c.7407C>G; c.3905_3906del; c.5426_5430del; c.6701del; c.5483C>A; c.5274_5275insC; c.1460_1463del; c.3888_3889insC; c.2650del; c.7083del; c.3334del; c.3193A>T; c.5953_5954del; c.7552C>T; c.7000C>T; c.3460G>T; c.2593_2594insT; c.7189C>T; c.4408A>T; c.6053_6057del; c.4171C>T; c.7471A>C; c.3208_3209del; c.4529_4530insC; c.2692C>T; c.6187G>T; c.520C>T; c.2130del; c.3198_3199del; c.7470+2T>C; c.3123_3126del; c.7535G>A; c.7416C>G; c.866_867insA; c.2367del; c.5957_5958del; c.5712_5713insT; c.1846G>T; c.3221_3222del; c.7471-1G>C; c.2765_2766del; c.7753C>T.

\n", + "

Use the entry in the \"Key\" column to investigate whether specific variants display genotype-phenotype correlations

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "display(HTML(viewer.variants_table(cohort, tx_id))) " ] @@ -203,10 +775,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "ec6c204f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ValueError", + "evalue": "Unknown protein fallback annotator type ", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 9\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mgenophenocorr\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01manalysis\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpredicate\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BooleanPredicate, GroupingPredicate\n\u001b[1;32m 4\u001b[0m analysis_config \u001b[38;5;241m=\u001b[39m CohortAnalysisConfiguration\u001b[38;5;241m.\u001b[39mbuilder()\\\n\u001b[1;32m 5\u001b[0m \u001b[38;5;241m.\u001b[39mmissing_implies_excluded(\u001b[38;5;28;01mTrue\u001b[39;00m)\\\n\u001b[1;32m 6\u001b[0m \u001b[38;5;241m.\u001b[39mpval_correction(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfdr_bh\u001b[39m\u001b[38;5;124m'\u001b[39m)\\\n\u001b[1;32m 7\u001b[0m \u001b[38;5;241m.\u001b[39mmin_perc_patients_w_hpo(\u001b[38;5;241m0.1\u001b[39m)\\\n\u001b[1;32m 8\u001b[0m \u001b[38;5;241m.\u001b[39mbuild()\n\u001b[0;32m----> 9\u001b[0m analysis \u001b[38;5;241m=\u001b[39m \u001b[43mconfigure_cohort_analysis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcohort\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhpo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43manalysis_config\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/Programs/genophenocorr/src/genophenocorr/analysis/_config.py:188\u001b[0m, in \u001b[0;36mconfigure_cohort_analysis\u001b[0;34m(cohort, hpo, protein_source, cache_dir, config)\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cache_dir \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 187\u001b[0m cache_dir \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(os\u001b[38;5;241m.\u001b[39mgetcwd(), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.genophenocorr_cache\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m--> 188\u001b[0m protein_metadata_service \u001b[38;5;241m=\u001b[39m \u001b[43m_configure_protein_service\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprotein_source\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 191\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m CommunistCohortAnalysis(cohort, hpo, protein_metadata_service,\n\u001b[1;32m 192\u001b[0m missing_implies_excluded\u001b[38;5;241m=\u001b[39mconfig\u001b[38;5;241m.\u001b[39mmissing_implies_excluded,\n\u001b[1;32m 193\u001b[0m include_sv\u001b[38;5;241m=\u001b[39mconfig\u001b[38;5;241m.\u001b[39minclude_sv,\n\u001b[1;32m 194\u001b[0m p_val_correction\u001b[38;5;241m=\u001b[39mconfig\u001b[38;5;241m.\u001b[39mpval_correction,\n\u001b[1;32m 195\u001b[0m min_perc_patients_w_hpo\u001b[38;5;241m=\u001b[39mconfig\u001b[38;5;241m.\u001b[39mmin_perc_patients_w_hpo)\n", + "File \u001b[0;32m~/Documents/Programs/genophenocorr/src/genophenocorr/analysis/_config.py:200\u001b[0m, in \u001b[0;36m_configure_protein_service\u001b[0;34m(protein_fallback, cache_dir)\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_configure_protein_service\u001b[39m(protein_fallback: \u001b[38;5;28mstr\u001b[39m, cache_dir) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ProteinMetadataService:\n\u001b[1;32m 198\u001b[0m \u001b[38;5;66;03m# (1) ProteinMetadataService\u001b[39;00m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;66;03m# Setup fallback\u001b[39;00m\n\u001b[0;32m--> 200\u001b[0m protein_fallback \u001b[38;5;241m=\u001b[39m \u001b[43m_configure_fallback_protein_service\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprotein_fallback\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;66;03m# Setup protein metadata cache\u001b[39;00m\n\u001b[1;32m 202\u001b[0m prot_cache_dir \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(cache_dir, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mprotein_cache\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[0;32m~/Documents/Programs/genophenocorr/src/genophenocorr/analysis/_config.py:213\u001b[0m, in \u001b[0;36m_configure_fallback_protein_service\u001b[0;34m(protein_fallback)\u001b[0m\n\u001b[1;32m 211\u001b[0m fallback1 \u001b[38;5;241m=\u001b[39m UniprotProteinMetadataService()\n\u001b[1;32m 212\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 213\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUnknown protein fallback annotator type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mprotein_fallback\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 214\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fallback1\n", + "\u001b[0;31mValueError\u001b[0m: Unknown protein fallback annotator type " + ] + } + ], "source": [ "from genophenocorr.analysis import configure_cohort_analysis, CohortAnalysisConfiguration\n", "from genophenocorr.analysis.predicate import BooleanPredicate, GroupingPredicate\n", diff --git a/src/genophenocorr/analysis/_config.py b/src/genophenocorr/analysis/_config.py index 0f5844fa4..ecc6272af 100644 --- a/src/genophenocorr/analysis/_config.py +++ b/src/genophenocorr/analysis/_config.py @@ -169,6 +169,7 @@ def build(self) -> CohortAnalysisConfiguration: def configure_cohort_analysis(cohort: Cohort, hpo: hpotk.MinimalOntology, protein_source: str = 'UNIPROT', + cache_dir: typing.Optional[str] = None, config: typing.Optional[CohortAnalysisConfiguration] = None) -> CohortAnalysis: """ Configure :class:`genophenocorr.analysis.CohortAnalysis` for given `cohort`. @@ -182,7 +183,9 @@ def configure_cohort_analysis(cohort: Cohort, """ if config is None: config = CohortAnalysisConfiguration.builder().build() - protein_metadata_service = _configure_protein_service(protein_source) + if cache_dir is None: + cache_dir = os.path.join(os.getcwd(), '.genophenocorr_cache') + protein_metadata_service = _configure_protein_service(protein_source, cache_dir) return CommunistCohortAnalysis(cohort, hpo, protein_metadata_service, diff --git a/src/genophenocorr/preprocessing/_config.py b/src/genophenocorr/preprocessing/_config.py index a17d77c03..d987a9c39 100644 --- a/src/genophenocorr/preprocessing/_config.py +++ b/src/genophenocorr/preprocessing/_config.py @@ -16,8 +16,6 @@ from ._patient import CohortCreator from ._phenopacket import PhenopacketPatientCreator from ._phenotype import PhenotypeCreator -from ._protein import ProteinAnnotationCache, ProtCachingMetadataService -from ._uniprot import UniprotProteinMetadataService from ._variant import VarCachingFunctionalAnnotator, VariantAnnotationCache from ._vep import VepFunctionalAnnotator from ._vv import VVHgvsVariantCoordinateFinder