From 6e07feff656fc518c05cfdf719df93afbdec914b Mon Sep 17 00:00:00 2001 From: Johannes Linder Date: Wed, 18 Sep 2024 10:21:22 -0700 Subject: [PATCH 01/32] Updated/cleaned as part of revision. --- src/scripts/borzoi_test_apa_folds_polaydb.py | 80 +++++++---------- src/scripts/borzoi_test_apa_polaydb.py | 60 +++++++------ src/scripts/borzoi_test_exons_folds.py | 14 ++- src/scripts/borzoi_test_genes.py | 22 +++-- src/scripts/borzoi_test_genes_folds.py | 50 +++++++---- src/scripts/borzoi_test_tss_folds_gencode.py | 94 ++++++-------------- src/scripts/borzoi_test_tss_gencode.py | 88 +++++++++--------- 7 files changed, 196 insertions(+), 212 deletions(-) diff --git a/src/scripts/borzoi_test_apa_folds_polaydb.py b/src/scripts/borzoi_test_apa_folds_polaydb.py index 0ff4c15..423bb41 100755 --- a/src/scripts/borzoi_test_apa_folds_polaydb.py +++ b/src/scripts/borzoi_test_apa_folds_polaydb.py @@ -21,6 +21,8 @@ """ borzoi_test_apa_folds_polaydb.py + +Measure accuracy at polyadenylation-level for multiple model replicates. """ ################################################################################ @@ -29,13 +31,6 @@ def main(): usage = "usage: %prog [options] ..." parser = OptionParser(usage) - parser.add_option( - "-a", - "--alt", - dest="alternative", - default="two-sided", - help="Statistical test alternative [Default: %default]", - ) parser.add_option( "-c", dest="crosses", @@ -50,13 +45,6 @@ def main(): type="int", help="Dataset index [Default:%default]", ) - parser.add_option( - "--d_ref", - dest="dataset_ref_i", - default=None, - type="int", - help="Reference Dataset index [Default:%default]", - ) parser.add_option( "-e", dest="conda_env", @@ -67,26 +55,19 @@ def main(): "-f", dest="fold_subset", default=None, - help="Run a subset of folds (encoded as comma-separated string) [Default:%default]", - ) - parser.add_option("-g", dest="apa_file", default="polyadb_human_v3.csv.gz") - parser.add_option( - "--label_exp", - dest="label_exp", - default="Experiment", - help="Experiment label [Default: %default]", + type="int", + help="Run a subset of folds [Default:%default]", ) parser.add_option( - "--label_ref", - dest="label_ref", - default="Reference", - help="Reference label [Default: %default]", + "--f_list", + dest="fold_subset_list", + default=None, + help="Run a subset of folds (encoded as comma-separated string) [Default:%default]", ) parser.add_option( - "-m", - dest="metric", - default="pearsonr", - help="Train/test metric [Default: Pearsonr or AUPRC]", + "-g", + dest="apa_file", + default="polyadb_human_v3.csv.gz" ) parser.add_option( "--name", @@ -101,14 +82,9 @@ def main(): help="Output experiment directory [Default: %default]", ) parser.add_option( - "-p", dest="out_stem", default=None, help="Output plot stem [Default: %default]" - ) - parser.add_option("-q", dest="queue", default="geforce") - parser.add_option( - "-r", - dest="ref_dir", - default=None, - help="Reference directory for statistical tests", + "-q", + dest="queue", + default="geforce" ) parser.add_option( "--rc", @@ -124,13 +100,6 @@ def main(): type="str", help="Ensemble prediction shifts [Default: %default]", ) - parser.add_option( - "--status", - dest="status", - default=False, - action="store_true", - help="Update metric status; do not run jobs [Default: %default]", - ) parser.add_option( "-t", dest="targets_file", @@ -138,6 +107,13 @@ def main(): type="str", help="File specifying target indexes and labels in table format", ) + parser.add_option( + "-u", + dest="untransform_old", + default=False, + action="store_true", + help="Untransform old models [Default: %default]", + ) (options, args) = parser.parse_args() if len(args) < 2: @@ -161,12 +137,16 @@ def main(): # count folds num_folds = len([dkey for dkey in data_stats if dkey.startswith("fold")]) - - fold_index = [fold_i for fold_i in range(num_folds)] # subset folds if options.fold_subset is not None: - fold_index = [int(fold_str) for fold_str in options.fold_subset.split(",")] + num_folds = min(options.fold_subset, num_folds) + + fold_index = [fold_i for fold_i in range(num_folds)] + + # subset folds (list) + if options.fold_subset_list is not None: + fold_index = [int(fold_str) for fold_str in options.fold_subset_list.split(",")] if options.queue == "standard": num_cpu = 4 @@ -192,7 +172,7 @@ def main(): model_file = "%s/train/model%d_best.h5" % (it_dir, options.dataset_i) # check if done - acc_file = "%s/acc.txt" % out_dir + acc_file = "%s/apa_preds_polyadb.tsv.gz" % out_dir if os.path.isfile(acc_file): # print('%s already generated.' % acc_file) pass @@ -209,6 +189,8 @@ def main(): cmd += " --shifts %s" % options.shifts if options.targets_file is not None: cmd += " -t %s" % options.targets_file + if options.untransform_old: + cmd += " -u" cmd += " %s" % params_file cmd += " %s" % model_file cmd += " %s/data%d" % (it_dir, head_i) diff --git a/src/scripts/borzoi_test_apa_polaydb.py b/src/scripts/borzoi_test_apa_polaydb.py index 2f26efa..eecf01c 100755 --- a/src/scripts/borzoi_test_apa_polaydb.py +++ b/src/scripts/borzoi_test_apa_polaydb.py @@ -33,11 +33,6 @@ Measure accuracy at polyadenylation-level. """ - -def eprint(*args, **kwargs): - print(*args, file=sys.stderr, **kwargs) - - ################################################################################ # main ################################################################################ @@ -89,6 +84,13 @@ def main(): default=None, help="TFR pattern string appended to data_dir/tfrecords for subsetting [Default: %default]", ) + parser.add_option( + "-u", + dest="untransform_old", + default=False, + action="store_true", + help="Untransform old models [Default: %default]", + ) (options, args) = parser.parse_args() if len(args) != 4: @@ -132,9 +134,6 @@ def main(): num_targets = targets_df.shape[0] num_targets_strand = targets_strand_df.shape[0] - # save sqrt'd tracks - sqrt_mask = np.array([ss.find("sqrt") != -1 for ss in targets_strand_df.sum_stat]) - # read model parameters with open(params_file) as params_open: params = json.load(params_open) @@ -188,9 +187,6 @@ def main(): # filter for 3' UTR polyA sites only apa_df = apa_df.query("site_type == '3\\' most exon'").copy().reset_index(drop=True) - eprint("len(apa_df) = " + str(len(apa_df))) - print("len(apa_df) = " + str(len(apa_df))) - apa_df["start_hg38"] = apa_df["position_hg38"] apa_df["end_hg38"] = apa_df["position_hg38"] + 1 @@ -207,6 +203,11 @@ def main(): apa_pr = pr.PyRanges( apa_df[["Chromosome", "Start", "End", "pas_id", "cut_mode", "pas_strand"]] ) + + # get strands + pas_strand_dict = {} + for _, row in apa_df.iterrows() : + pas_strand_dict[row['pas_id']] = row['pas_strand'] ####################################################### # intersect APA sites w/ preds, targets @@ -214,9 +215,6 @@ def main(): # intersect seqs, APA sites seqs_apa_pr = seqs_pr.join(apa_pr) - eprint("len(seqs_apa_pr.df) = " + str(len(seqs_apa_pr.df))) - print("len(seqs_apa_pr.df) = " + str(len(seqs_apa_pr.df))) - # hash preds/targets by pas_id apa_preds_dict = {} apa_targets_dict = {} @@ -228,8 +226,7 @@ def main(): y = y.numpy()[..., targets_df.index] t0 = time.time() - eprint("Sequence %d..." % si) - print("Sequence %d..." % si, end="") + print("Sequence %d..." % si, end="", flush=True) for bsi in range(x.shape[0]): seq = seqs_df.iloc[si + bsi] @@ -276,14 +273,11 @@ def main(): apa_preds_dict.setdefault(pas_id, []).append(yhb) apa_targets_dict.setdefault(pas_id, []).append(yb) else: - eprint("(Warning: len(yb) <= 0)") + print("(Warning: len(yb) <= 0)", flush=True) # advance sequence table index si += x.shape[0] - eprint("DONE in %ds." % (time.time() - t0)) - print("DONE in %ds." % (time.time() - t0)) - - eprint("len(apa_preds_dict) = " + str(len(apa_preds_dict))) + print("DONE in %ds." % (time.time() - t0), flush=True) if si % 128 == 0: gc.collect() @@ -300,14 +294,22 @@ def main(): apa_targets_gi = np.concatenate(apa_targets_dict[pas_id], axis=0).astype( "float32" ) - - # undo scale - apa_preds_gi /= np.expand_dims(targets_strand_df.scale, axis=0) - apa_targets_gi /= np.expand_dims(targets_strand_df.scale, axis=0) - - # undo sqrt - apa_preds_gi[:, sqrt_mask] = apa_preds_gi[:, sqrt_mask] ** (4 / 3) - apa_targets_gi[:, sqrt_mask] = apa_targets_gi[:, sqrt_mask] ** (4 / 3) + + # slice strand + if pas_strand_dict[pas_id] == "+": + pas_strand_mask = (targets_df.strand != "-").to_numpy() + else: + pas_strand_mask = (targets_df.strand != "+").to_numpy() + apa_preds_gi = apa_preds_gi[:, pas_strand_mask] + apa_targets_gi = apa_targets_gi[:, pas_strand_mask] + + # untransform + if options.untransform_old: + apa_preds_gi = dataset.untransform_preds1(apa_preds_gi, targets_strand_df, unscale=True, unclip=False) + apa_targets_gi = dataset.untransform_preds1(apa_targets_gi, targets_strand_df, unscale=True, unclip=False) + else: + apa_preds_gi = dataset.untransform_preds(apa_preds_gi, targets_strand_df, unscale=True, unclip=False) + apa_targets_gi = dataset.untransform_preds(apa_targets_gi, targets_strand_df, unscale=True, unclip=False) # mean coverage apa_preds_gi = apa_preds_gi.mean(axis=0) diff --git a/src/scripts/borzoi_test_exons_folds.py b/src/scripts/borzoi_test_exons_folds.py index 02c70d3..7091465 100755 --- a/src/scripts/borzoi_test_exons_folds.py +++ b/src/scripts/borzoi_test_exons_folds.py @@ -94,6 +94,12 @@ def main(): type="int", help="Run a subset of folds [Default:%default]", ) + parser.add_option( + "--f_list", + dest="fold_subset_list", + default=None, + help="Run a subset of folds (encoded as comma-separated string) [Default:%default]", + ) parser.add_option( "-g", dest="exons_gff", @@ -195,6 +201,12 @@ def main(): # subset folds if options.fold_subset is not None: num_folds = min(options.fold_subset, num_folds) + + fold_index = [fold_i for fold_i in range(num_folds)] + + # subset folds (list) + if options.fold_subset_list is not None: + fold_index = [int(fold_str) for fold_str in options.fold_subset_list.split(",")] if options.queue == "standard": num_cpu = 4 @@ -209,7 +221,7 @@ def main(): jobs = [] for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = "%s/f%dc%d" % (options.exp_dir, fi, ci) if options.dataset_i is None: diff --git a/src/scripts/borzoi_test_genes.py b/src/scripts/borzoi_test_genes.py index e36fa54..bfdb2aa 100755 --- a/src/scripts/borzoi_test_genes.py +++ b/src/scripts/borzoi_test_genes.py @@ -117,6 +117,13 @@ def main(): action="store_true", help="Untransform old models [Default: %default]", ) + parser.add_option( + "--store_span", + dest="store_span", + default=False, + action="store_true", + help="Store predicted/measured gene span coverage profiles [Default: %default]", + ) (options, args) = parser.parse_args() if len(args) != 4: @@ -323,16 +330,21 @@ def main(): preds_log = np.log2(gene_preds_gi[:, ti] + 1) targets_log = np.log2(gene_targets_gi[:, ti] + 1) gene_corr_gi[ti] = pearsonr(preds_log, targets_log)[0] - # gene_corr_gi[ti] = pearsonr(gene_preds_gi[:,ti], gene_targets_gi[:,ti])[0] else: gene_corr_gi[ti] = np.nan gene_within.append(gene_corr_gi) gene_wvar.append(gene_targets_gi.var(axis=0)) - # TEMP: save gene preds/targets - # os.makedirs('%s/gene_within' % options.out_dir, exist_ok=True) - # np.save('%s/gene_within/%s_preds.npy' % (options.out_dir, gene_id), gene_preds_gi.astype('float16')) - # np.save('%s/gene_within/%s_targets.npy' % (options.out_dir, gene_id), gene_targets_gi.astype('float16')) + # optionally store raw coverage profiles for gene span + if options.store_span: + hash_code = str(gene_id.split(".")[0][-1]) # last digit of gene id + + os.makedirs('%s/gene_within' % options.out_dir, exist_ok=True) + os.makedirs('%s/gene_within/%s' % (options.out_dir, hash_code), exist_ok=True) + os.makedirs('%s/gene_within/%s/preds' % (options.out_dir, hash_code), exist_ok=True) + os.makedirs('%s/gene_within/%s/targets' % (options.out_dir, hash_code), exist_ok=True) + np.save('%s/gene_within/%s/preds/%s_preds.npy' % (options.out_dir, hash_code, gene_id), gene_preds_gi.astype('float16')) + np.save('%s/gene_within/%s/targets/%s_targets.npy' % (options.out_dir, hash_code, gene_id), gene_targets_gi.astype('float16')) # mean coverage gene_preds_gi = gene_preds_gi.mean(axis=0) / float(pool_width) diff --git a/src/scripts/borzoi_test_genes_folds.py b/src/scripts/borzoi_test_genes_folds.py index 13196ba..b558205 100755 --- a/src/scripts/borzoi_test_genes_folds.py +++ b/src/scripts/borzoi_test_genes_folds.py @@ -28,9 +28,9 @@ import slurm """ -borzoi_test_folds.py +borzoi_test_genes_folds.py -Train Borzoi model replicates using given parameters and data. +Measure accuracy at gene-level for multiple model replicates. """ ################################################################################ @@ -89,6 +89,13 @@ def main(): action="store_true", help="Untransform old models [Default: %default]", ) + parser.add_option( + "--store_span", + dest="store_span", + default=False, + action="store_true", + help="Store predicted/measured gene span coverage profiles [Default: %default]", + ) # folds parser.add_option( @@ -129,6 +136,13 @@ def main(): "-f", dest="fold_subset", default=None, + type="int", + help="Run a subset of folds [Default:%default]", + ) + parser.add_option( + "--f_list", + dest="fold_subset_list", + default=None, help="Run a subset of folds (encoded as comma-separated string) [Default:%default]", ) parser.add_option( @@ -167,9 +181,16 @@ def main(): help="Output experiment directory [Default: %default]", ) parser.add_option( - "-p", dest="out_stem", default=None, help="Output plot stem [Default: %default]" + "-p", + dest="out_stem", + default=None, + help="Output plot stem [Default: %default]" + ) + parser.add_option( + "-q", + dest="queue", + default="geforce" ) - parser.add_option("-q", dest="queue", default="geforce") parser.add_option( "-s", dest="sub_dir", @@ -182,13 +203,6 @@ def main(): default=None, help="Reference directory for statistical tests", ) - parser.add_option( - "--status", - dest="status", - default=False, - action="store_true", - help="Update metric status; do not run jobs [Default: %default]", - ) (options, args) = parser.parse_args() @@ -213,12 +227,16 @@ def main(): # count folds num_folds = len([dkey for dkey in data_stats if dkey.startswith("fold")]) - - fold_index = [fold_i for fold_i in range(num_folds)] # subset folds if options.fold_subset is not None: - fold_index = [int(fold_str) for fold_str in options.fold_subset.split(",")] + num_folds = min(options.fold_subset, num_folds) + + fold_index = [fold_i for fold_i in range(num_folds)] + + # subset folds (list) + if options.fold_subset_list is not None: + fold_index = [int(fold_str) for fold_str in options.fold_subset_list.split(",")] if options.queue == "standard": num_cpu = 8 @@ -253,7 +271,7 @@ def main(): cmd = ". /home/drk/anaconda3/etc/profile.d/conda.sh;" cmd += " conda activate %s;" % options.conda_env cmd += " time borzoi_test_genes.py" - # cmd += ' --head %d' % head_i + cmd += ' --head %d' % head_i cmd += " -o %s" % out_dir if options.rc: cmd += " --rc" @@ -265,6 +283,8 @@ def main(): cmd += ' --pseudo_qtl %.2f' % options.pseudo_qtl if options.untransform_old: cmd += ' -u' + if options.store_span: + cmd += ' --store_span' if options.span: cmd += " --span" job_mem = 240000 diff --git a/src/scripts/borzoi_test_tss_folds_gencode.py b/src/scripts/borzoi_test_tss_folds_gencode.py index 0cdc500..1b65130 100644 --- a/src/scripts/borzoi_test_tss_folds_gencode.py +++ b/src/scripts/borzoi_test_tss_folds_gencode.py @@ -13,25 +13,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # ========================================================================= -from optparse import OptionParser, OptionGroup -import glob +from optparse import OptionParser import json import os -import pdb -import sys - -from natsort import natsorted -import numpy as np -import pandas as pd -from scipy.stats import wilcoxon, ttest_rel -import matplotlib.pyplot as plt -import seaborn as sns import slurm """ borzoi_test_tss_folds_gencode.py +Measure accuracy at TSS-level for multiple model replicates. """ ################################################################################ @@ -40,13 +31,6 @@ def main(): usage = 'usage: %prog [options] ...' parser = OptionParser(usage) - parser.add_option( - '-a', - '--alt', - dest='alternative', - default='two-sided', - help='Statistical test alternative [Default: %default]', - ) parser.add_option( '-c', dest='crosses', @@ -61,13 +45,6 @@ def main(): type='int', help='Dataset index [Default:%default]', ) - parser.add_option( - '--d_ref', - dest='dataset_ref_i', - default=None, - type='int', - help='Reference Dataset index [Default:%default]', - ) parser.add_option( '-e', dest='conda_env', @@ -78,6 +55,13 @@ def main(): '-f', dest='fold_subset', default=None, + type='int', + help='Run a subset of folds [Default:%default]', + ) + parser.add_option( + '--f_list', + dest='fold_subset_list', + default=None, help='Run a subset of folds (encoded as comma-separated string) [Default:%default]', ) parser.add_option( @@ -85,24 +69,6 @@ def main(): dest='tss_file', default='/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_tss2.bed', ) - parser.add_option( - '--label_exp', - dest='label_exp', - default='Experiment', - help='Experiment label [Default: %default]', - ) - parser.add_option( - '--label_ref', - dest='label_ref', - default='Reference', - help='Reference label [Default: %default]', - ) - parser.add_option( - '-m', - dest='metric', - default='pearsonr', - help='Train/test metric [Default: Pearsonr or AUPRC]', - ) parser.add_option( '--name', dest='name', @@ -115,23 +81,11 @@ def main(): default=None, help='Output experiment directory [Default: %default]', ) - parser.add_option( - '-p', - dest='out_stem', - default=None, - help='Output plot stem [Default: %default]', - ) parser.add_option( '-q', dest='queue', default='geforce', ) - parser.add_option( - '-r', - dest='ref_dir', - default=None, - help='Reference directory for statistical tests', - ) parser.add_option( '--rc', dest='rc', @@ -160,13 +114,6 @@ def main(): action='store_true', help='Store max instead of avg bin value in local window [Default: %default]', ) - parser.add_option( - '--status', - dest='status', - default=False, - action='store_true', - help='Update metric status; do not run jobs [Default: %default]', - ) parser.add_option( '-t', dest='targets_file', @@ -174,6 +121,13 @@ def main(): type='str', help='File specifying target indexes and labels in table format', ) + parser.add_option( + '-u', + dest='untransform_old', + default=False, + action='store_true', + help='Untransform old models [Default: %default]', + ) (options, args) = parser.parse_args() if len(args) < 2: @@ -197,12 +151,16 @@ def main(): # count folds num_folds = len([dkey for dkey in data_stats if dkey.startswith("fold")]) - - fold_index = [fold_i for fold_i in range(num_folds)] # subset folds if options.fold_subset is not None: - fold_index = [int(fold_str) for fold_str in options.fold_subset.split(",")] + num_folds = min(options.fold_subset, num_folds) + + fold_index = [fold_i for fold_i in range(num_folds)] + + # subset folds (list) + if options.fold_subset_list is not None: + fold_index = [int(fold_str) for fold_str in options.fold_subset_list.split(",")] if options.queue == 'standard': num_cpu = 4 @@ -236,12 +194,12 @@ def main(): model_file = '%s/train/model%d_best.h5' % (it_dir, options.dataset_i) # check if done - acc_file = '%s/acc.txt' % out_dir + acc_file = '%s/tss_preds_gencode.tsv.gz' % out_dir if os.path.isfile(acc_file): # print('%s already generated.' % acc_file) pass else: - # basenji test + # evaluate cmd = '. /home/drk/anaconda3/etc/profile.d/conda.sh;' cmd += ' conda activate %s;' % options.conda_env cmd += ' time borzoi_test_tss_gencode.py' @@ -257,6 +215,8 @@ def main(): cmd += ' --maxcov' if options.targets_file is not None: cmd += ' -t %s' % options.targets_file + if options.untransform_old: + cmd += ' -u' cmd += ' %s' % params_file cmd += ' %s' % model_file cmd += ' %s/data%d' % (it_dir, head_i) diff --git a/src/scripts/borzoi_test_tss_gencode.py b/src/scripts/borzoi_test_tss_gencode.py index c5cad57..3e88a02 100644 --- a/src/scripts/borzoi_test_tss_gencode.py +++ b/src/scripts/borzoi_test_tss_gencode.py @@ -13,31 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. # ========================================================================= - from optparse import OptionParser import gc import json -import pdb import os import time import sys -import h5py -#from intervaltree import IntervalTree import numpy as np import pandas as pd import pyranges as pr -from scipy.stats import pearsonr -from sklearn.metrics import explained_variance_score -import tensorflow as tf -#from tqdm import tqdm - -from basenji import bed -from basenji import dataset -from basenji import seqnn -from basenji import trainer -#import pygene -#from qnorm import quantile_normalize + +from baskerville import dataset +from baskerville import seqnn ''' borzoi_test_tss_gencode.py @@ -45,9 +33,6 @@ Measure accuracy at TSS-level. ''' -def eprint(*args, **kwargs): - print(*args, file=sys.stderr, **kwargs) - ################################################################################ # main ################################################################################ @@ -113,6 +98,13 @@ def main(): default=None, help='TFR pattern string appended to data_dir/tfrecords for subsetting [Default: %default]', ) + parser.add_option( + "-u", + dest="untransform_old", + default=False, + action="store_true", + help="Untransform old models [Default: %default]", + ) (options, args) = parser.parse_args() if len(args) != 4: @@ -154,9 +146,6 @@ def main(): num_targets = targets_df.shape[0] num_targets_strand = targets_strand_df.shape[0] - # save sqrt'd tracks - sqrt_mask = np.array([ss.find('sqrt') != -1 for ss in targets_strand_df.sum_stat]) - # read model parameters with open(params_file) as params_open: params = json.load(params_open) @@ -203,6 +192,11 @@ def main(): tss_df = pd.read_csv(tss_file, sep='\t', names=['Chromosome', 'Start', 'End', 'tss_id', 'feat1', 'tss_strand']) tss_pr = pr.PyRanges(tss_df) + + # get strands + tss_strand_dict = {} + for _, row in tss_df.iterrows() : + tss_strand_dict[row['tss_id']] = row['tss_strand'] ####################################################### # intersect TSS sites w/ preds, targets @@ -210,9 +204,6 @@ def main(): # intersect seqs, TSS sites seqs_tss_pr = seqs_pr.join(tss_pr) - eprint("len(seqs_tss_pr.df) = " + str(len(seqs_tss_pr.df))) - print("len(seqs_tss_pr.df) = " + str(len(seqs_tss_pr.df))) - # hash preds/targets by tss_id tss_preds_dict = {} tss_targets_dict = {} @@ -221,11 +212,10 @@ def main(): for x, y in eval_data.dataset: # predict only if gene overlaps yh = None - y = y.numpy()[...,targets_df.index] + y = y.numpy()[..., targets_df.index] t0 = time.time() - eprint('Sequence %d...' % si) - print('Sequence %d...' % si, end='') + print('Sequence %d...' % si, end='', flush=True) for bsi in range(x.shape[0]): seq = seqs_df.iloc[si+bsi] @@ -263,26 +253,22 @@ def main(): yh = seqnn_model(x) # slice gene region - yhb = yh[bsi,bin_start:bin_end].astype('float16') - yb = y[bsi,bin_start:bin_end].astype('float16') + yhb = yh[bsi, bin_start:bin_end].astype('float16') + yb = y[bsi, bin_start:bin_end].astype('float16') if len(yb) > 0: - tss_preds_dict.setdefault(tss_id,[]).append(yhb) - tss_targets_dict.setdefault(tss_id,[]).append(yb) + tss_preds_dict.setdefault(tss_id, []).append(yhb) + tss_targets_dict.setdefault(tss_id, []).append(yb) else: - eprint("(Warning: len(yb) <= 0)") + print("(Warning: len(yb) <= 0)", flush=True) # advance sequence table index si += x.shape[0] - eprint('DONE in %ds.' % (time.time()-t0)) - print('DONE in %ds.' % (time.time()-t0)) - - eprint("len(tss_preds_dict) = " + str(len(tss_preds_dict))) + print('DONE in %ds.' % (time.time() - t0), flush=True) if si % 128 == 0: gc.collect() - ####################################################### # aggregate TSS bin values into arrays @@ -292,15 +278,25 @@ def main(): for tss_id in tss_ids: tss_preds_gi = np.concatenate(tss_preds_dict[tss_id], axis=0).astype('float32') - tss_targets_gi = np.concatenate(tss_targets_dict[tss_id], axis=0).astype('float32') - - # undo scale - tss_preds_gi /= np.expand_dims(targets_strand_df.scale, axis=0) - tss_targets_gi /= np.expand_dims(targets_strand_df.scale, axis=0) - - # undo sqrt - tss_preds_gi[:,sqrt_mask] = tss_preds_gi[:,sqrt_mask]**(4/3) - tss_targets_gi[:,sqrt_mask] = tss_targets_gi[:,sqrt_mask]**(4/3) + tss_targets_gi = np.concatenate(tss_targets_dict[tss_id], axis=0).astype( + 'float32' + ) + + # slice strand + if tss_strand_dict[tss_id] == "+": + tss_strand_mask = (targets_df.strand != "-").to_numpy() + else: + tss_strand_mask = (targets_df.strand != "+").to_numpy() + tss_preds_gi = tss_preds_gi[:, tss_strand_mask] + tss_targets_gi = tss_targets_gi[:, tss_strand_mask] + + # untransform + if options.untransform_old: + tss_preds_gi = dataset.untransform_preds1(tss_preds_gi, targets_strand_df, unscale=True, unclip=False) + tss_targets_gi = dataset.untransform_preds1(tss_targets_gi, targets_strand_df, unscale=True, unclip=False) + else: + tss_preds_gi = dataset.untransform_preds(tss_preds_gi, targets_strand_df, unscale=True, unclip=False) + tss_targets_gi = dataset.untransform_preds(tss_targets_gi, targets_strand_df, unscale=True, unclip=False) # mean (or max) coverage tss_preds_gi = tss_preds_gi.max(axis=0) if options.maxcov else tss_preds_gi.mean(axis=0) From 24c4e514eee418065c96f833d70266d16e27bed4 Mon Sep 17 00:00:00 2001 From: Johannes Linder Date: Fri, 20 Sep 2024 14:07:09 -0700 Subject: [PATCH 02/32] Revision updates (untransform_old flag, support for missing fold 0, etc). --- src/scripts/basenji_bench_classify.py | 407 ++++++++++ src/scripts/basenji_bench_gtex_folds.py | 739 ++++++++++++++++++ src/scripts/basenji_gtex_coef.py | 336 ++++++++ src/scripts/basenji_sad.py | 529 +++++++++++++ src/scripts/borzoi_bench_gtex_folds.py | 54 +- src/scripts/borzoi_bench_ipaqtl_folds.py | 195 +++-- src/scripts/borzoi_bench_paqtl_folds.py | 195 +++-- src/scripts/borzoi_bench_sqtl_folds.py | 195 ++--- src/scripts/borzoi_bench_trip_folds.py | 40 +- src/scripts/borzoi_gtex_coef.py | 4 +- src/scripts/borzoi_satg_gene_gpu.py | 584 +++++++------- ...borzoi_satg_gene_gpu_crispr_ism_shuffle.py | 700 +++++++++-------- .../borzoi_satg_gene_gpu_focused_ism.py | 399 ++++++---- src/scripts/borzoi_satg_polya_gpu.py | 583 +++++++------- src/scripts/borzoi_satg_splice_gpu.py | 697 +++++++++-------- src/scripts/borzoi_sed_folds.py | 58 +- src/scripts/borzoi_sed_ipaqtl_cov.py | 78 +- src/scripts/borzoi_sed_paqtl_cov.py | 69 +- 18 files changed, 4077 insertions(+), 1785 deletions(-) create mode 100644 src/scripts/basenji_bench_classify.py create mode 100644 src/scripts/basenji_bench_gtex_folds.py create mode 100644 src/scripts/basenji_gtex_coef.py create mode 100644 src/scripts/basenji_sad.py diff --git a/src/scripts/basenji_bench_classify.py b/src/scripts/basenji_bench_classify.py new file mode 100644 index 0000000..5df06e9 --- /dev/null +++ b/src/scripts/basenji_bench_classify.py @@ -0,0 +1,407 @@ +#!/usr/bin/env python +from optparse import OptionParser +import joblib +import os +import pdb + +import h5py +import numpy as np +import pandas as pd +from sklearn.ensemble import RandomForestClassifier +from sklearn.linear_model import RidgeClassifier +from sklearn.metrics import average_precision_score, roc_auc_score, roc_curve +from sklearn.model_selection import KFold + +import matplotlib +import matplotlib.pyplot as plt +import seaborn as sns + +''' +basenji_bench_classify.py +''' + +################################################################################ +# main +################################################################################ +def main(): + usage = 'usage: %prog [options] ' + parser = OptionParser(usage) + parser.add_option( + '-a', + dest='abs_value', + default=False, + action='store_true' + ) + parser.add_option( + '-i', + dest='iterations', + default=1, + type='int', + help='Cross-validation iterations [Default: %default]' + ) + parser.add_option( + '--indel', + dest='indel', + default=False, + action='store_true', + help='Add indel size as feature [Default: %default]' + ) + parser.add_option( + '--iscale', + dest='indel_scale', + default=0.1, + type='float', + help='Scale indel SAD [Default: %default]' + ) + parser.add_option( + '-l', + dest='log', + default=False, + action='store_true' + ) + parser.add_option( + '-m', + dest='model_pkl', + help='Dimension reduction model' + ) + parser.add_option( + '--msl', + dest='msl', + default=1, + type='int', + help='Random forest min_samples_leaf [Default: %default]' + ) + parser.add_option( + '-o', + dest='out_dir', + default='class_out' + ) + parser.add_option( + '-p', + dest='parallel_threads', + default=1, + type='int', + help='Parallel threads passed to scikit-learn n_jobs [Default: %default]' + ) + parser.add_option( + '-r', + dest='random_seed', + default=None, + type='int' + ) + parser.add_option( + '-s', + dest='save_preds', + default=False, + action='store_true', + help='Save predictions across iterations [Default: %default]' + ) + parser.add_option( + '--stat', + dest='sad_stat', + default='SAD', + help='HDF5 key stat to consider. [Default: %default]' + ) + parser.add_option( + '-t', + dest='targets_file', + default=None + ) + (options,args) = parser.parse_args() + + if len(args) != 2: + parser.error('Must provide positive and negative variant predictions.') + else: + sadp_file = args[0] + sadn_file = args[1] + + np.random.seed(options.random_seed) + + if not os.path.isdir(options.out_dir): + os.mkdir(options.out_dir) + + # read dimension reduction model + if options.model_pkl: + model = joblib.load(options.model_pkl) + + if options.targets_file is None: + target_slice = None + else: + targets_df = pd.read_csv(options.targets_file, sep='\t', index_col=0) + target_slice = targets_df.index + + # read positive/negative variants + Xp = read_sad(sadp_file, options.sad_stat, target_slice) + Xn = read_sad(sadn_file, options.sad_stat, target_slice) + if options.log: + Xp = np.arcsinh(Xp) + Xn = np.arcsinh(Xn) + if options.abs_value: + Xp = np.abs(Xp) + Xn = np.abs(Xn) + if options.model_pkl: + Xp = model.transform(Xp) + Xn = model.transform(Xn) + + if options.indel: + Ip = read_indel(sadp_file) + In = read_indel(sadn_file) + Ip = np.expand_dims(Ip, axis=-1) + In = np.expand_dims(In, axis=-1) + Xp = np.concatenate([Xp,Ip], axis=1) + Xn = np.concatenate([Xn,In], axis=1) + elif options.indel_scale != 1: + Ip = read_indel(sadp_file, indel_bool=True) + In = read_indel(sadn_file, indel_bool=True) + Xp[Ip] = options.indel_scale*Xp[Ip] + Xn[Ip] = options.indel_scale*Xn[Ip] + + # combine + X = np.concatenate([Xp, Xn], axis=0) + y = np.array([True]*Xp.shape[0] + [False]*Xn.shape[0], dtype='bool') + + # train classifier + if X.shape[1] == 1: + aurocs, fpr_folds, tpr_folds, fpr_mean, tpr_mean = fold_roc(X, y, folds=8) + + # save preds + if options.save_preds: + np.save('%s/preds.npy' % options.out_dir, X) + else: + # aurocs, fpr_folds, tpr_folds, fpr_full, tpr_full = ridge_roc(X, y, folds=8, alpha=10000) + aurocs, fpr_folds, tpr_folds, fpr_mean, tpr_mean, preds = randfor_roc(X, y, folds=8, + iterations=options.iterations, min_samples_leaf=options.msl, + random_state=options.random_seed, n_jobs=options.parallel_threads) + + # save preds + if options.save_preds: + np.save('%s/preds.npy' % options.out_dir, preds) + + # save full model + model = randfor_full(X, y, min_samples_leaf=options.msl) + joblib.dump(model, '%s/model.pkl' % options.out_dir) + + # save + np.save('%s/aurocs.npy' % options.out_dir, aurocs) + np.save('%s/fpr_mean.npy' % options.out_dir, fpr_mean) + np.save('%s/tpr_mean.npy' % options.out_dir, tpr_mean) + + # print stats + stats_out = open('%s/stats.txt' % options.out_dir, 'w') + auroc_stdev = np.std(aurocs) / np.sqrt(len(aurocs)) + print('AUROC: %.4f (%.4f)' % (np.mean(aurocs), auroc_stdev), file=stats_out) + stats_out.close() + + # plot roc + plot_roc(fpr_folds, tpr_folds, options.out_dir) + + +def fold_roc(X, y, folds=8, random_state=44): + """Compute ROC for a single value, sans model.""" + aurocs = [] + fpr_folds = [] + tpr_folds = [] + + fpr_mean = np.linspace(0, 1, 256) + tpr_mean = [] + + # preds_full = np.zeros(y.shape) + + kf = KFold(n_splits=folds, shuffle=True, random_state=random_state) + + for train_index, test_index in kf.split(X): + # predict test set (as is) + preds = X[test_index,:] + + # save + # preds_full[test_index] = preds.squeeze() + + # compute ROC curve + fpr, tpr, _ = roc_curve(y[test_index], preds) + fpr_folds.append(fpr) + tpr_folds.append(tpr) + + interp_tpr = np.interp(fpr_mean, fpr, tpr) + interp_tpr[0] = 0.0 + tpr_mean.append(interp_tpr) + + # compute AUROC + aurocs.append(roc_auc_score(y[test_index], preds)) + + # fpr_full, tpr_full, _ = roc_curve(y, preds_full) + tpr_mean = np.array(tpr_mean).mean(axis=0) + + return np.array(aurocs), np.array(fpr_folds), np.array(tpr_folds), fpr_mean, tpr_mean + + +def plot_roc(fprs, tprs, out_dir): + plt.figure(figsize=(4,4)) + + for fi in range(len(fprs)): + plt.plot(fprs[fi], tprs[fi], alpha=0.25) + + ax = plt.gca() + ax.set_xlabel('False positive rate') + ax.set_ylabel('True positive rate') + + sns.despine() + plt.tight_layout() + + plt.savefig('%s/roc.pdf' % out_dir) + plt.close() + + +def randfor_full(X, y, min_samples_leaf=1, random_state=None, n_jobs=1): + """Compute a single random forest on the full data.""" + model = RandomForestClassifier(n_estimators=100, max_features='log2', max_depth=64, + min_samples_leaf=min_samples_leaf, min_samples_split=2, + random_state=random_state, n_jobs=n_jobs) + model.fit(X, y) + return model + + +def randfor_roc(X, y, folds=8, iterations=1, n_estimators=100, + min_samples_leaf=1, random_state=None, n_jobs=1): + """Compute ROC using a random forest.""" + aurocs = [] + fpr_folds = [] + tpr_folds = [] + fpr_fulls = [] + tpr_fulls = [] + preds_return = [] + + fpr_mean = np.linspace(0, 1, 256) + tpr_mean = [] + + for i in range(iterations): + rs_iter = random_state + i + preds_full = np.zeros(y.shape) + + kf = KFold(n_splits=folds, shuffle=True, random_state=rs_iter) + + for train_index, test_index in kf.split(X): + # fit model + if random_state is None: + rs_rf = None + else: + rs_rf = rs_iter+test_index[0] + model = RandomForestClassifier(n_estimators=n_estimators, max_features='log2', max_depth=64, + min_samples_leaf=min_samples_leaf, min_samples_split=2, + random_state=rs_rf, n_jobs=n_jobs) + model.fit(X[train_index,:], y[train_index]) + + # predict test set + preds = model.predict_proba(X[test_index,:])[:,1] + + # save + preds_full[test_index] = preds.squeeze() + + # compute ROC curve + fpr, tpr, _ = roc_curve(y[test_index], preds) + fpr_folds.append(fpr) + tpr_folds.append(tpr) + + interp_tpr = np.interp(fpr_mean, fpr, tpr) + interp_tpr[0] = 0.0 + tpr_mean.append(interp_tpr) + + # compute AUROC + aurocs.append(roc_auc_score(y[test_index], preds)) + + fpr_full, tpr_full, _ = roc_curve(y, preds_full) + fpr_fulls.append(fpr_full) + tpr_fulls.append(tpr_full) + preds_return.append(preds_full) + + aurocs = np.array(aurocs) + tpr_mean = np.array(tpr_mean).mean(axis=0) + preds_return = np.array(preds_return).T + + return aurocs, fpr_folds, tpr_folds, fpr_mean, tpr_mean, preds_return + +def ridge_roc(X, y, folds=8, iterations=1, alpha=1, random_state=None): + """Compute ROC using a random forest.""" + aurocs = [] + fpr_folds = [] + tpr_folds = [] + fpr_fulls = [] + tpr_fulls = [] + preds_return = [] + + fpr_mean = np.linspace(0, 1, 256) + tpr_mean = [] + + for i in range(iterations): + rs_iter = random_state + i + preds_full = np.zeros(y.shape) + + kf = KFold(n_splits=folds, shuffle=True, random_state=rs_iter) + + for train_index, test_index in kf.split(X): + # fit model + if random_state is None: + rs_rf = None + else: + rs_rf = rs_iter+test_index[0] + model = RidgeClassifier(alpha=alpha, random_state=rs_rf) + model.fit(X[train_index,:], y[train_index]) + + # predict test set + preds = model._predict_proba_lr(X[test_index,:])[:,1] + + # save + preds_full[test_index] = preds.squeeze() + + # compute ROC curve + fpr, tpr, _ = roc_curve(y[test_index], preds) + fpr_folds.append(fpr) + tpr_folds.append(tpr) + + interp_tpr = np.interp(fpr_mean, fpr, tpr) + interp_tpr[0] = 0.0 + tpr_mean.append(interp_tpr) + + # compute AUROC + aurocs.append(roc_auc_score(y[test_index], preds)) + + fpr_full, tpr_full, _ = roc_curve(y, preds_full) + fpr_fulls.append(fpr_full) + tpr_fulls.append(tpr_full) + preds_return.append(preds_full) + + aurocs = np.array(aurocs) + tpr_mean = np.array(tpr_mean).mean(axis=0) + preds_return = np.array(preds_return).T + + return aurocs, fpr_folds, tpr_folds, fpr_mean, tpr_mean, preds_return + +def read_indel(sad_file, indel_abs=True, indel_bool=False): + with h5py.File(sad_file, 'r') as sad_open: + try: + ref_alleles = [ra.decode('UTF-8') for ra in sad_open['ref_allele']] + alt_alleles = [aa.decode('UTF-8') for aa in sad_open['alt_allele']] + except KeyError: + ref_alleles = [ra.decode('UTF-8') for ra in sad_open['ref']] + alt_alleles = [aa.decode('UTF-8') for aa in sad_open['alt']] + num_variants = len(ref_alleles) + indels = np.array([len(ref_alleles[vi])-len(alt_alleles[vi]) for vi in range(num_variants)]) + if indel_abs: + indels = np.abs(indels) + if indel_bool: + indels = (indels != 0) + return indels + +def read_sad(sad_file, sad_stat, target_slice): + with h5py.File(sad_file, 'r') as sad_open: + sad = sad_open[sad_stat][:] + if target_slice is not None: + sad = sad[...,target_slice] + sad = np.nan_to_num(sad).astype('float32') + return sad + + +################################################################################ +# __main__ +################################################################################ +if __name__ == '__main__': + main() diff --git a/src/scripts/basenji_bench_gtex_folds.py b/src/scripts/basenji_bench_gtex_folds.py new file mode 100644 index 0000000..4ea5110 --- /dev/null +++ b/src/scripts/basenji_bench_gtex_folds.py @@ -0,0 +1,739 @@ +#!/usr/bin/env python +# Copyright 2019 Calico LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========================================================================= +from optparse import OptionParser, OptionGroup +import glob +import json +import pickle +import pdb +import os +import shutil +import sys + +import h5py +import numpy as np +import pandas as pd + +import slurm + +""" +basenji_bench_gtex_folds.py + +Benchmark Basenji model replicates on GTEx eQTL classification task. +""" + +################################################################################ +# main +################################################################################ +def main(): + usage = 'usage: %prog [options] ' + parser = OptionParser(usage) + + # sad options + sad_options = OptionGroup(parser, 'basenji_sad.py options') + sad_options.add_option( + '-f', + dest='genome_fasta', + default='%s/assembly/ucsc/hg38.fa' % os.environ['HG38'], + help='Genome FASTA for sequences [Default: %default]' + ) + sad_options.add_option( + '-n', + dest='norm_file', + default=None, + help='Normalize SAD scores' + ) + sad_options.add_option( + '-o', + dest='out_dir', + default='gtex', + help='Output directory for tables and plots [Default: %default]' + ) + sad_options.add_option( + '--rc', + dest='rc', + default=False, + action='store_true', + help='Average forward and reverse complement predictions [Default: %default]' + ) + sad_options.add_option( + '--shifts', + dest='shifts', + default='0', + type='str', + help='Ensemble prediction shifts [Default: %default]' + ) + sad_options.add_option( + '--stats', + dest='sad_stats', + default='SAD', + help='Comma-separated list of stats to save. [Default: %default]' + ) + sad_options.add_option( + '-t', + dest='targets_file', + default=None, + type='str', + help='File specifying target indexes and labels in table format' + ) + sad_options.add_option( + '--ti', + dest='track_indexes', + default=None, + type='str', + help='Comma-separated list of target indexes to output BigWig tracks' + ) + sad_options.add_option( + '--threads', + dest='threads', + default=False, + action='store_true', + help='Run CPU math and output in a separate thread [Default: %default]' + ) + sad_options.add_option( + '-u', + dest='untransform_old', + default=False, + action='store_true', + ) + sad_options.add_option( + '--no_untransform', + dest='no_untransform', + default=False, + action='store_true', + ) + parser.add_option_group(sad_options) + + # classify + class_options = OptionGroup(parser, 'basenji_bench_classify.py options') + class_options.add_option( + '--cn', + dest='class_name', + default=None, + help='Classifier name extension [Default: %default]' + ) + class_options.add_option( + '--ct', + dest='class_targets_file', + default=None, + help='Targets slice for the classifier stage [Default: %default]' + ) + class_options.add_option( + '--msl', + dest='msl', + default=1, + type='int', + help='Random forest min_samples_leaf [Default: %default]' + ) + parser.add_option_group(class_options) + + # cross-fold + fold_options = OptionGroup(parser, 'cross-fold options') + fold_options.add_option( + '-c', + dest='crosses', + default=1, + type='int', + help='Number of cross-fold rounds [Default:%default]' + ) + fold_options.add_option( + '--folds', + dest='fold_subset', + default=1, + type='int', + help='Run a subset of folds [Default:%default]', + ) + fold_options.add_option( + '--f_list', + dest='fold_subset_list', + default=None, + help='Run a subset of folds (encoded as comma-separated string) [Default:%default]', + ) + fold_options.add_option( + '-d', + dest='data_head', + default=None, + type='int', + help='Index for dataset/head [Default: %default]' + ) + fold_options.add_option( + '-e', + dest='conda_env', + default='tf210', + help='Anaconda environment [Default: %default]' + ) + fold_options.add_option( + '-g', + dest='gtex_vcf_dir', + default='/home/drk/seqnn/data/gtex_fine/susie_pip90' + ) + fold_options.add_option( + '--name', + dest='name', + default='gtex', + help='SLURM name prefix [Default: %default]' + ) + fold_options.add_option( + '--max_proc', + dest='max_proc', + default=None, + type='int', + help='Maximum concurrent processes [Default: %default]' + ) + fold_options.add_option( + '-p', + dest='processes', + default=None, + type='int', + help='Number of processes, passed by multi script' + ) + fold_options.add_option( + '-q', + dest='queue', + default='geforce', + help='SLURM queue on which to run the jobs [Default: %default]' + ) + parser.add_option_group(fold_options) + + (options, args) = parser.parse_args() + + if len(args) != 2: + parser.error('Must provide parameters file and cross-fold directory') + else: + params_file = args[0] + exp_dir = args[1] + + ####################################################### + # prep work + + # set folds + num_folds = 1 + if options.fold_subset is not None: + num_folds = options.fold_subset + + fold_index = [fold_i for fold_i in range(num_folds)] + + # subset folds (list) + if options.fold_subset_list is not None: + fold_index = [int(fold_str) for fold_str in options.fold_subset_list.split(",")] + + # extract output subdirectory name + gtex_out_dir = options.out_dir + + # split SNP stats + sad_stats = options.sad_stats.split(',') + + # merge study/tissue variants + mpos_vcf_file = '%s/pos_merge.vcf' % options.gtex_vcf_dir + mneg_vcf_file = '%s/neg_merge.vcf' % options.gtex_vcf_dir + + ################################################################ + # SAD + + # SAD command base + cmd_base = '. /home/jlinder/anaconda3/etc/profile.d/conda.sh;' + cmd_base += ' conda activate %s;' % options.conda_env + cmd_base += ' echo $HOSTNAME;' + + jobs = [] + + for ci in range(options.crosses): + for fi in fold_index: + it_dir = '%s/f%dc%d' % (exp_dir, fi, ci) + name = '%s-f%dc%d' % (options.name, fi, ci) + + # update output directory + it_out_dir = '%s/%s' % (it_dir, gtex_out_dir) + os.makedirs(it_out_dir, exist_ok=True) + + # choose model + model_file = '%s/train/model_best.h5' % it_dir + if options.data_head is not None: + model_file = '%s/train/model%d_best.h5' % (it_dir, options.data_head) + + ######################################## + # negative jobs + + # pickle options + options.out_dir = '%s/merge_neg' % it_out_dir + os.makedirs(options.out_dir, exist_ok=True) + options_pkl_file = '%s/options.pkl' % options.out_dir + options_pkl = open(options_pkl_file, 'wb') + pickle.dump(options, options_pkl) + options_pkl.close() + + # create base fold command + cmd_fold = '%s time basenji_sad.py %s %s %s' % ( + cmd_base, options_pkl_file, params_file, model_file) + + for pi in range(options.processes): + sad_file = '%s/job%d/sad.h5' % (options.out_dir, pi) + if not complete_h5(sad_file, sad_stats): + cmd_job = '%s %s %d' % (cmd_fold, mneg_vcf_file, pi) + j = slurm.Job(cmd_job, '%s_neg%d' % (name,pi), + '%s/job%d.out' % (options.out_dir,pi), + '%s/job%d.err' % (options.out_dir,pi), + '%s/job%d.sb' % (options.out_dir,pi), + queue=options.queue, gpu=1, cpu=2, + mem=60000, time='7-0:0:0') + jobs.append(j) + + ######################################## + # positive jobs + + # pickle options + options.out_dir = '%s/merge_pos' % it_out_dir + os.makedirs(options.out_dir, exist_ok=True) + options_pkl_file = '%s/options.pkl' % options.out_dir + options_pkl = open(options_pkl_file, 'wb') + pickle.dump(options, options_pkl) + options_pkl.close() + + # create base fold command + cmd_fold = '%s time basenji_sad.py %s %s %s' % ( + cmd_base, options_pkl_file, params_file, model_file) + + for pi in range(options.processes): + sad_file = '%s/job%d/sad.h5' % (options.out_dir, pi) + if not complete_h5(sad_file, sad_stats): + cmd_job = '%s %s %d' % (cmd_fold, mpos_vcf_file, pi) + j = slurm.Job(cmd_job, '%s_pos%d' % (name,pi), + '%s/job%d.out' % (options.out_dir,pi), + '%s/job%d.err' % (options.out_dir,pi), + '%s/job%d.sb' % (options.out_dir,pi), + queue=options.queue, gpu=1, cpu=2, + mem=30000, time='7-0:0:0') + jobs.append(j) + + slurm.multi_run(jobs, max_proc=options.max_proc, verbose=True, + launch_sleep=10, update_sleep=60) + + ####################################################### + # collect output + + for ci in range(options.crosses): + for fi in fold_index: + it_out_dir = '%s/f%dc%d/%s' % (exp_dir, fi, ci, gtex_out_dir) + + # collect negatives + neg_out_dir = '%s/merge_neg' % it_out_dir + if not os.path.isfile('%s/sad.h5' % neg_out_dir): + collect_h5('sad.h5', neg_out_dir, options.processes) + + # collect positives + pos_out_dir = '%s/merge_pos' % it_out_dir + if not os.path.isfile('%s/sad.h5' % pos_out_dir): + collect_h5('sad.h5', pos_out_dir, options.processes) + + ################################################################ + # split study/tissue variants + + for ci in range(options.crosses): + for fi in fold_index: + it_out_dir = '%s/f%dc%d/%s' % (exp_dir, fi, ci, gtex_out_dir) + print(it_out_dir) + + # split positives + split_sad(it_out_dir, 'pos', options.gtex_vcf_dir, sad_stats) + + # split negatives + split_sad(it_out_dir, 'neg', options.gtex_vcf_dir, sad_stats) + + ################################################################ + # ensemble + + ensemble_dir = '%s/ensemble' % exp_dir + if not os.path.isdir(ensemble_dir): + os.mkdir(ensemble_dir) + + gtex_dir = '%s/%s' % (ensemble_dir, gtex_out_dir) + if not os.path.isdir(gtex_dir): + os.mkdir(gtex_dir) + + for gtex_pos_vcf in glob.glob('%s/*_pos.vcf' % options.gtex_vcf_dir): + gtex_neg_vcf = gtex_pos_vcf.replace('_pos.','_neg.') + pos_base = os.path.splitext(os.path.split(gtex_pos_vcf)[1])[0] + neg_base = os.path.splitext(os.path.split(gtex_neg_vcf)[1])[0] + + # collect SAD files + sad_pos_files = [] + sad_neg_files = [] + for ci in range(options.crosses): + for fi in fold_index: + it_dir = '%s/f%dc%d' % (exp_dir, fi, ci) + it_out_dir = '%s/%s' % (it_dir, gtex_out_dir) + + sad_pos_file = '%s/%s/sad.h5' % (it_out_dir, pos_base) + sad_pos_files.append(sad_pos_file) + + sad_neg_file = '%s/%s/sad.h5' % (it_out_dir, neg_base) + sad_neg_files.append(sad_neg_file) + + # ensemble + ens_pos_dir = '%s/%s' % (gtex_dir, pos_base) + os.makedirs(ens_pos_dir, exist_ok=True) + ens_pos_file = '%s/sad.h5' % (ens_pos_dir) + if not os.path.isfile(ens_pos_file): + ensemble_sad_h5(ens_pos_file, sad_pos_files) + + ens_neg_dir = '%s/%s' % (gtex_dir, neg_base) + os.makedirs(ens_neg_dir, exist_ok=True) + ens_neg_file = '%s/sad.h5' % (ens_neg_dir) + if not os.path.isfile(ens_neg_file): + ensemble_sad_h5(ens_neg_file, sad_neg_files) + + + ################################################################ + # fit classifiers + ################################################################ + + cmd_base = 'basenji_bench_classify.py -i 100 -p 2 -r 44 -s' + cmd_base += ' --msl %d' % options.msl + + if options.class_targets_file is not None: + cmd_base += ' -t %s' % options.class_targets_file + + jobs = [] + for ci in range(options.crosses): + for fi in fold_index: + it_dir = '%s/f%dc%d' % (exp_dir, fi, ci) + it_out_dir = '%s/%s' % (it_dir, gtex_out_dir) + + for gtex_pos_vcf in glob.glob('%s/*_pos.vcf' % options.gtex_vcf_dir): + tissue = os.path.splitext(os.path.split(gtex_pos_vcf)[1])[0][:-4] + sad_pos = '%s/%s_pos/sad.h5' % (it_out_dir, tissue) + sad_neg = '%s/%s_neg/sad.h5' % (it_out_dir, tissue) + for sad_stat in sad_stats: + class_out_dir = '%s/%s_class-%s' % (it_out_dir, tissue, sad_stat) + if options.class_name is not None: + class_out_dir += '-%s' % options.class_name + if not os.path.isfile('%s/stats.txt' % class_out_dir): + cmd_class = '%s -o %s --stat %s' % (cmd_base, class_out_dir, sad_stat) + cmd_class += ' %s %s' % (sad_pos, sad_neg) + j = slurm.Job(cmd_class, tissue, + '%s.out'%class_out_dir, '%s.err'%class_out_dir, + queue='standard', cpu=2, + mem=22000, time='1-0:0:0') + jobs.append(j) + + # ensemble + for gtex_pos_vcf in glob.glob('%s/*_pos.vcf' % options.gtex_vcf_dir): + tissue = os.path.splitext(os.path.split(gtex_pos_vcf)[1])[0][:-4] + sad_pos = '%s/%s_pos/sad.h5' % (gtex_dir, tissue) + sad_neg = '%s/%s_neg/sad.h5' % (gtex_dir, tissue) + for sad_stat in sad_stats: + class_out_dir = '%s/%s_class-%s' % (gtex_dir, tissue, sad_stat) + if options.class_name is not None: + class_out_dir += '-%s' % options.class_name + if not os.path.isfile('%s/stats.txt' % class_out_dir): + cmd_class = '%s -o %s --stat %s' % (cmd_base, class_out_dir, sad_stat) + cmd_class += ' %s %s' % (sad_pos, sad_neg) + j = slurm.Job(cmd_class, tissue, + '%s.out'%class_out_dir, '%s.err'%class_out_dir, + queue='standard', cpu=2, + mem=22000, time='1-0:0:0') + jobs.append(j) + + slurm.multi_run(jobs, verbose=True) + + ################################################################ + # coefficient analysis + + cmd_base = 'basenji_gtex_coef.py -g %s' % options.gtex_vcf_dir + + jobs = [] + for ci in range(options.crosses): + for fi in fold_index: + it_dir = '%s/f%dc%d' % (exp_dir, fi, ci) + it_out_dir = '%s/%s' % (it_dir, gtex_out_dir) + coef_out_dir = '%s/coef' % it_out_dir + + if not os.path.isfile('%s/metrics.tsv' % coef_out_dir): + cmd_coef = f'{cmd_base} -o {coef_out_dir} {it_out_dir}' + j = slurm.Job(cmd_coef, 'coef', + f'{coef_out_dir}.out', f'{coef_out_dir}.err', + queue='standard', cpu=2, + mem=30000, time='12:0:0') + jobs.append(j) + + # ensemble + it_out_dir = f'{exp_dir}/ensemble/{gtex_out_dir}' + coef_out_dir = '%s/coef' % it_out_dir + + if not os.path.isfile('%s/metrics.tsv' % coef_out_dir): + cmd_coef = f'{cmd_base} -o {coef_out_dir} {it_out_dir}' + j = slurm.Job(cmd_coef, 'coef', + f'{coef_out_dir}.out', f'{coef_out_dir}.err', + queue='standard', cpu=2, + mem=30000, time='12:0:0') + jobs.append(j) + + slurm.multi_run(jobs, verbose=True) + + +def complete_h5(h5_file, sad_stats): + if os.path.isfile(h5_file): + try: + with h5py.File(h5_file, 'r') as h5_open: + for ss in sad_stats: + sad = h5_open[ss][:] + if (sad != 0).sum() > 0: + return True + return False + except: + return False + else: + return False + + +def collect_h5(file_name, out_dir, num_procs): + # count variants + num_variants = 0 + for pi in range(num_procs): + # open job + job_h5_file = '%s/job%d/%s' % (out_dir, pi, file_name) + job_h5_open = h5py.File(job_h5_file, 'r') + num_variants += len(job_h5_open['snp']) + job_h5_open.close() + + # initialize final h5 + final_h5_file = '%s/%s' % (out_dir, file_name) + final_h5_open = h5py.File(final_h5_file, 'w') + + # keep dict for string values + final_strings = {} + + job0_h5_file = '%s/job0/%s' % (out_dir, file_name) + job0_h5_open = h5py.File(job0_h5_file, 'r') + for key in job0_h5_open.keys(): + if key in ['percentiles', 'target_ids', 'target_labels']: + # copy + final_h5_open.create_dataset(key, data=job0_h5_open[key]) + + elif key[-4:] == '_pct': + values = np.zeros(job0_h5_open[key].shape) + final_h5_open.create_dataset(key, data=values) + + elif job0_h5_open[key].dtype.char == 'S': + final_strings[key] = [] + + elif job0_h5_open[key].ndim == 1: + final_h5_open.create_dataset(key, shape=(num_variants,), dtype=job0_h5_open[key].dtype) + + else: + num_targets = job0_h5_open[key].shape[1] + final_h5_open.create_dataset(key, shape=(num_variants, num_targets), dtype=job0_h5_open[key].dtype) + + job0_h5_open.close() + + # set values + vi = 0 + for pi in range(num_procs): + # open job + job_h5_file = '%s/job%d/%s' % (out_dir, pi, file_name) + job_h5_open = h5py.File(job_h5_file, 'r') + + # append to final + for key in job_h5_open.keys(): + if key in ['percentiles', 'target_ids', 'target_labels']: + # once is enough + pass + + elif key[-4:] == '_pct': + # average + u_k1 = np.array(final_h5_open[key]) + x_k = np.array(job_h5_open[key]) + final_h5_open[key][:] = u_k1 + (x_k - u_k1) / (pi+1) + + else: + if job_h5_open[key].dtype.char == 'S': + final_strings[key] += list(job_h5_open[key]) + else: + job_variants = job_h5_open[key].shape[0] + try: + final_h5_open[key][vi:vi+job_variants] = job_h5_open[key] + except TypeError as e: + print(e) + print(f'{job_h5_file} ${key} has the wrong shape. Remove this file and rerun') + exit() + + vi += job_variants + job_h5_open.close() + + # create final string datasets + for key in final_strings: + final_h5_open.create_dataset(key, + data=np.array(final_strings[key], dtype='S')) + + final_h5_open.close() + + +def ensemble_sad_h5(ensemble_h5_file, scores_files): + # open ensemble + ensemble_h5 = h5py.File(ensemble_h5_file, 'w') + + # transfer base + base_keys = ['alt_allele','chr','pos','ref_allele','snp','target_ids','target_labels'] + sad_stats = [] + sad_shapes = [] + scores0_h5 = h5py.File(scores_files[0], 'r') + for key in scores0_h5.keys(): + if key in base_keys: + ensemble_h5.create_dataset(key, data=scores0_h5[key]) + else: + sad_stats.append(key) + sad_shapes.append(scores0_h5[key].shape) + scores0_h5.close() + + # average stats + num_folds = len(scores_files) + for si, sad_stat in enumerate(sad_stats): + # initialize ensemble array + sad_values = np.zeros(shape=sad_shapes[si], dtype='float32') + + # read and add folds + for scores_file in scores_files: + with h5py.File(scores_file, 'r') as scores_h5: + sad_values += scores_h5[sad_stat][:].astype('float32') + + # normalize and downcast + sad_values /= num_folds + sad_values = sad_values.astype('float16') + + # save + ensemble_h5.create_dataset(sad_stat, data=sad_values) + + ensemble_h5.close() + + +def options_string(options, group_options, rep_dir): + options_str = '' + + for opt in group_options.option_list: + opt_str = opt.get_opt_string() + opt_value = options.__dict__[opt.dest] + + # wrap askeriks in "" + if type(opt_value) == str and opt_value.find('*') != -1: + opt_value = '"%s"' % opt_value + + # no value for bools + elif type(opt_value) == bool: + if not opt_value: + opt_str = '' + opt_value = '' + + # skip Nones + elif opt_value is None: + opt_str = '' + opt_value = '' + + # modify + elif opt.dest == 'out_dir': + opt_value = rep_dir + + options_str += ' %s %s' % (opt_str, opt_value) + + return options_str + + +def split_sad(it_out_dir, posneg, vcf_dir, sad_stats): + """Split merged VCF predictions in HDF5 into tissue-specific + predictions in HDF5.""" + + merge_h5_file = '%s/merge_%s/sad.h5' % (it_out_dir, posneg) + merge_h5 = h5py.File(merge_h5_file, 'r') + + # read merged data + snps = [snp.decode('UTF-8') for snp in merge_h5['snp']] + merge_scores = {} + for ss in sad_stats: + merge_scores[ss] = merge_h5[ss][:] + + # hash snp indexes + snp_si = dict(zip(snps, np.arange(len(snps)))) + + # for each tissue VCF + vcf_glob = '%s/*_%s.vcf' % (vcf_dir, posneg) + for tissue_vcf_file in glob.glob(vcf_glob): + tissue_label = tissue_vcf_file.split('/')[-1] + tissue_label = tissue_label.replace('_pos.vcf','') + tissue_label = tissue_label.replace('_neg.vcf','') + + # initialize HDF5 arrays + sad_snp = [] + sad_chr = [] + sad_pos = [] + sad_ref = [] + sad_alt = [] + sad_scores = {} + for ss in sad_stats: + sad_scores[ss] = [] + + # fill HDF5 arrays with ordered SNPs + for line in open(tissue_vcf_file): + if not line.startswith('#'): + a = line.split() + chrm, pos, snp, ref, alt = a[:5] + sad_snp.append(snp) + sad_chr.append(chrm) + sad_pos.append(int(pos)) + sad_ref.append(ref) + sad_alt.append(alt) + + for ss in sad_stats: + si = snp_si[snp] + sad_scores[ss].append(merge_scores[ss][si]) + + # write tissue HDF5 + tissue_dir = '%s/%s_%s' % (it_out_dir, tissue_label, posneg) + os.makedirs(tissue_dir, exist_ok=True) + with h5py.File('%s/sad.h5' % tissue_dir, 'w') as tissue_h5: + # write SNPs + tissue_h5.create_dataset('snp', + data=np.array(sad_snp, 'S')) + + # write SNP chr + tissue_h5.create_dataset('chr', + data=np.array(sad_chr, 'S')) + + # write SNP pos + tissue_h5.create_dataset('pos', + data=np.array(sad_pos, dtype='uint32')) + + # write ref allele + tissue_h5.create_dataset('ref_allele', + data=np.array(sad_ref, dtype='S')) + + # write alt allele + tissue_h5.create_dataset('alt_allele', + data=np.array(sad_alt, dtype='S')) + + # write targets + tissue_h5.create_dataset('target_ids', data=merge_h5['target_ids']) + tissue_h5.create_dataset('target_labels', data=merge_h5['target_labels']) + + # write sed stats + for ss in sad_stats: + tissue_h5.create_dataset(ss, + data=np.array(sad_scores[ss], dtype='float16')) + + merge_h5.close() + +################################################################################ +# __main__ +################################################################################ +if __name__ == '__main__': + main() diff --git a/src/scripts/basenji_gtex_coef.py b/src/scripts/basenji_gtex_coef.py new file mode 100644 index 0000000..264c750 --- /dev/null +++ b/src/scripts/basenji_gtex_coef.py @@ -0,0 +1,336 @@ +#!/usr/bin/env python +from optparse import OptionParser +import os +import pdb +import re +import sys + +import h5py +import numpy as np +import pandas as pd +from scipy.stats import spearmanr +from sklearn.metrics import roc_auc_score + +import matplotlib.pyplot as plt +import seaborn as sns + +''' +basenji_gtex_coef.py + +Evaluate concordance of variant effect prediction sign classifcation +and coefficient correlations (gene-agnostic). +''' + +################################################################################ +# main +################################################################################ +def main(): + usage = 'usage: %prog [options] ' + parser = OptionParser(usage) + + parser.add_option( + '-o', + dest='out_dir', + default='coef_out', + help='Output directory for tissue metrics' + ) + parser.add_option( + '-g', + dest='gtex_vcf_dir', + default='/home/drk/seqnn/data/gtex_fine/susie_pip90', + help='GTEx VCF directory' + ) + parser.add_option( + '-m', + dest='min_variants', + type=int, + default=32, + help='Minimum number of variants for tissue to be included' + ) + parser.add_option( + '-p', + dest='plot', + default=False, + action='store_true', + help='Generate tissue prediction plots' + ) + parser.add_option( + '-s', + dest='snp_stat', + default='logSAD', + help='SNP statistic. [Default: %(default)s]' + ) + parser.add_option( + '-v', + dest='verbose', + default=False, + action='store_true' + ) + + (options, args) = parser.parse_args() + + if len(args) != 1: + parser.error('Must provide gtex output directory') + else: + gtex_dir = args[0] + + os.makedirs(options.out_dir, exist_ok=True) + + tissue_keywords = { + 'Adipose_Subcutaneous': 'adipose', + 'Adipose_Visceral_Omentum': 'adipose', + 'Adrenal_Gland': 'adrenal_gland', + 'Artery_Aorta': 'heart', + 'Artery_Tibial': 'heart', + 'Brain_Cerebellum': 'brain', + 'Brain_Cortex': 'brain', + 'Breast_Mammary_Tissue': 'breast', + 'Colon_Sigmoid': 'colon', + 'Colon_Transverse': 'colon', + 'Esophagus_Mucosa': 'esophagus', + 'Esophagus_Muscularis': 'esophagus', + 'Liver': 'liver', + 'Lung': 'lung', + 'Muscle_Skeletal': 'muscle', + 'Nerve_Tibial': 'nerve', + 'Ovary': 'ovary', + 'Pancreas': 'pancreas', + 'Pituitary': 'pituitary', + 'Prostate': 'prostate', + 'Skin_Not_Sun_Exposed_Suprapubic': 'skin', + 'Spleen': 'spleen', + 'Stomach': 'stomach', + 'Testis': 'testis', + 'Thyroid': 'thyroid', + 'Whole_Blood': 'blood' + } + # 'Cells_Cultured_fibroblasts': 'fibroblast', + + metrics_tissue = [] + metrics_sauroc = [] + metrics_cauroc = [] + metrics_r = [] + for tissue, keyword in tissue_keywords.items(): + if options.verbose: print(tissue) + + # read causal variants + eqtl_df = read_eqtl(tissue, options.gtex_vcf_dir) + if eqtl_df is not None and eqtl_df.shape[0] > options.min_variants: + # read model predictions + gtex_scores_file = f'{gtex_dir}/{tissue}_pos/sad.h5' + try: + variant_scores = read_scores(gtex_scores_file, keyword, eqtl_df, + options.snp_stat, verbose=options.verbose) + variant_scores = variant_scores[eqtl_df.consistent] + except TypeError: + print(f'Tracks matching {tissue} are missing', file=sys.stderr) + continue + + # compute sign AUROCs + variant_sign = eqtl_df[eqtl_df.consistent].sign + sign_auroc = roc_auc_score(variant_sign, variant_scores) + + # compute SpearmanR + variant_coef = eqtl_df[eqtl_df.consistent].coef + coef_r = spearmanr(variant_coef, variant_scores)[0] + + # classification AUROC + class_auroc = classify_auroc(gtex_scores_file, keyword, variant_scores, + options.snp_stat) + + if options.plot: + # write table + scatter_df = pd.DataFrame({ + 'variant': eqtl_df[eqtl_df.consistent].variant, + 'coef': variant_coef, + 'pred': variant_scores + }) + scatter_df.to_csv(f'{options.out_dir}/{tissue}.tsv', + index=False, sep='\t') + + # scatterplot + plt.figure(figsize=(6,6)) + sns.scatterplot(x=variant_coef, y=variant_scores, + alpha=0.5, s=20) + plt.gca().set_xlabel('eQTL coefficient') + plt.gca().set_ylabel('Variant effect prediction') + plt.savefig(f'{options.out_dir}/{tissue}.png', dpi=300) + + # save + metrics_tissue.append(tissue) + metrics_sauroc.append(sign_auroc) + metrics_cauroc.append(class_auroc) + metrics_r.append(coef_r) + + if options.verbose: print('') + + # save metrics + metrics_df = pd.DataFrame({ + 'tissue': metrics_tissue, + 'auroc_sign': metrics_sauroc, + 'spearmanr': metrics_r, + 'auroc_class': metrics_cauroc + }) + metrics_df.to_csv(f'{options.out_dir}/metrics.tsv', + sep='\t', index=False, float_format='%.4f') + + # summarize + print('Sign AUROC: %.4f' % np.mean(metrics_df.auroc_sign)) + print('SpearmanR: %.4f' % np.mean(metrics_df.spearmanr)) + print('Class AUROC: %.4f' % np.mean(metrics_df.auroc_class)) + + +def read_eqtl(tissue: str, gtex_vcf_dir: str, pip_t: float=0.9): + """Reads eQTLs from SUSIE output. + + Args: + tissue (str): Tissue name. + gtex_vcf_dir (str): GTEx VCF directory. + pip_t (float): PIP threshold. + + Returns: + eqtl_df (pd.DataFrame): eQTL dataframe, or None if tissue skipped. + """ + susie_dir = '/home/drk/seqnn/data/gtex_fine/tissues_susie' + + # read causal variants + eqtl_file = f'{susie_dir}/{tissue}.tsv' + df_eqtl = pd.read_csv(eqtl_file, sep='\t', index_col=0) + + # pip filter + pip_match = re.search(r"_pip(\d+)", gtex_vcf_dir).group(1) + pip_t = float(pip_match) / 100 + assert(pip_t > 0 and pip_t <= 1) + df_causal = df_eqtl[df_eqtl.pip > pip_t] + + # remove variants with inconsistent signs + variant_a1 = {} + variant_sign = {} + variant_beta = {} + inconsistent_variants = set() + for variant in df_causal.itertuples(): + vid = variant.variant + vsign = variant.beta_posterior > 0 + + variant_a1[vid] = variant.allele1 + variant_beta.setdefault(vid,[]).append(variant.beta_posterior) + if vid in variant_sign: + if variant_sign[vid] != vsign: + inconsistent_variants.add(vid) + else: + variant_sign[vid] = vsign + + # average beta's across genes + for vid in variant_beta: + variant_beta[vid] = np.mean(variant_beta[vid]) + + # order variants + tissue_vcf_file = f'{gtex_vcf_dir}/{tissue}_pos.vcf' + if not os.path.isfile(tissue_vcf_file): + eqtl_df = None + else: + pred_variants = np.array([line.split()[2] for line in open(tissue_vcf_file) if not line.startswith('##')]) + consistent_mask = np.array([vid not in inconsistent_variants for vid in pred_variants]) + + # create dataframe + eqtl_df = pd.DataFrame({ + 'variant': pred_variants, + 'coef': [variant_beta[vid] for vid in pred_variants], + 'sign': [variant_sign[vid] for vid in pred_variants], + 'allele': [variant_a1[vid] for vid in pred_variants], + 'consistent': consistent_mask + }) + return eqtl_df + + +def read_scores(gtex_scores_file: str, + keyword: str, + eqtl_df: pd.DataFrame, + score_key: str='SAD', + verbose: bool=False): + """Read eQTL RNA predictions for the given tissue. + + Args: + gtex_scores_file (str): Variant scores HDF5. + tissue_keyword (str): tissue keyword, for matching GTEx targets + eqtl_df (pd.DataFrame): eQTL dataframe + score_key (str): score key in HDF5 file + verbose (bool): Print matching targets. + + Returns: + np.array: eQTL predictions + """ + print(gtex_scores_file) + with h5py.File(gtex_scores_file, 'r') as gtex_scores_h5: + score_ref = np.array([ref.decode('UTF-8') for ref in gtex_scores_h5['ref_allele']]) + + # determine matching GTEx targets + target_ids = np.array([ref.decode('UTF-8') for ref in gtex_scores_h5['target_ids']]) + target_labels = np.array([ref.decode('UTF-8') for ref in gtex_scores_h5['target_labels']]) + match_tis = [] + for ti in range(len(target_ids)): + if target_ids[ti].find('GTEX') != -1 and target_labels[ti].find(keyword) != -1: + if not keyword == 'blood' or target_labels[ti].find('vessel') == -1: + if verbose: + print(ti, target_ids[ti], target_labels[ti]) + match_tis.append(ti) + match_tis = np.array(match_tis) + + # mean across targets + variant_scores = gtex_scores_h5[score_key][...,match_tis].mean(axis=-1, dtype='float32') + variant_scores = np.arcsinh(variant_scores) + + # flip signs + sad_flip = (score_ref != eqtl_df.allele) + variant_scores[sad_flip] = -variant_scores[sad_flip] + + return variant_scores + + +def classify_auroc(gtex_scores_file: str, + keyword: str, + pos_scores: np.array, + score_key: str='SAD', + verbose: bool=False): + """Read eQTL RNA predictions for the given tissue. + + Args: + gtex_scores_file (str): Variant scores HDF5. + tissue_keyword (str): tissue keyword, for matching GTEx targets + pos_scores (np.array): eQTL predictions + score_key (str): score key in HDF5 file + verbose (bool): Print matching targets. + + Returns: + np.array: eQTL predictions + """ + gtex_nscores_file = gtex_scores_file.replace('_pos','_neg') + with h5py.File(gtex_nscores_file, 'r') as gtex_scores_h5: + # determine matching GTEx targets + target_ids = np.array([ref.decode('UTF-8') for ref in gtex_scores_h5['target_ids']]) + target_labels = np.array([ref.decode('UTF-8') for ref in gtex_scores_h5['target_labels']]) + match_tis = [] + for ti in range(len(target_ids)): + if target_ids[ti].find('GTEX') != -1 and target_labels[ti].find(keyword) != -1: + if not keyword == 'blood' or target_labels[ti].find('vessel') == -1: + if verbose: + print(ti, target_ids[ti], target_labels[ti]) + match_tis.append(ti) + match_tis = np.array(match_tis) + + # mean across targets + neg_scores = gtex_scores_h5[score_key][...,match_tis].mean(axis=-1, dtype='float32') + neg_scores = np.arcsinh(neg_scores) + + pos_scores = np.abs(pos_scores) + neg_scores = np.abs(neg_scores) + X = np.concatenate([pos_scores, neg_scores]) + y = np.concatenate([np.ones_like(pos_scores), np.zeros_like(neg_scores)]) + return roc_auc_score(y, X) + + +################################################################################ +# __main__ +################################################################################ +if __name__ == '__main__': + main() diff --git a/src/scripts/basenji_sad.py b/src/scripts/basenji_sad.py new file mode 100644 index 0000000..d832139 --- /dev/null +++ b/src/scripts/basenji_sad.py @@ -0,0 +1,529 @@ +#!/usr/bin/env python +# Copyright 2017 Calico LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========================================================================= +from __future__ import print_function + +from optparse import OptionParser +import json +import pdb +import pickle +import os +import sys +import time + +import h5py +import numpy as np +import pandas as pd +import pysam +from scipy.sparse import dok_matrix +from scipy.special import rel_entr +import tensorflow as tf +from tqdm import tqdm + +from baskerville import dataset +from baskerville import seqnn +from baskerville import stream +from baskerville import vcf as bvcf + +''' +basenji_sad.py + +Compute SNP Activity Difference (SAD) scores for SNPs in a VCF file. +''' + +################################################################################ +# main +################################################################################ +def main(): + usage = 'usage: %prog [options] ' + parser = OptionParser(usage) + parser.add_option( + '-f', + dest='genome_fasta', + default='%s/assembly/ucsc/hg38.fa' % os.environ["HG38"], + help='Genome FASTA for sequences [Default: %default]' + ) + parser.add_option( + '-o', + dest='out_dir', + default='sad', + help='Output directory for tables and plots [Default: %default]' + ) + parser.add_option( + '-p', + dest='processes', + default=None, + type='int', + help='Number of processes, passed by multi script' + ) + parser.add_option( + '--rc', + dest='rc', + default=False, + action='store_true', + help='Average forward and reverse complement predictions [Default: %default]' + ) + parser.add_option( + '--shifts', + dest='shifts', + default='0', + type='str', + help='Ensemble prediction shifts [Default: %default]' + ) + parser.add_option( + '--stats', + dest='sad_stats', + default='SAD', + help='Comma-separated list of stats to save. [Default: %default]' + ) + parser.add_option( + '-t', + dest='targets_file', + default=None, + type='str', + help='File specifying target indexes and labels in table format' + ) + parser.add_option( + '-u', + dest='untransform_old', + default=False, + action='store_true' + ) + parser.add_option( + '--no_untransform', + dest='no_untransform', + default=False, + action='store_true' + ) + (options, args) = parser.parse_args() + + if len(args) == 3: + # single worker + params_file = args[0] + model_file = args[1] + vcf_file = args[2] + + elif len(args) == 4: + # multi separate + options_pkl_file = args[0] + params_file = args[1] + model_file = args[2] + vcf_file = args[3] + + # save out dir + out_dir = options.out_dir + + # load options + options_pkl = open(options_pkl_file, 'rb') + options = pickle.load(options_pkl) + options_pkl.close() + + # update output directory + options.out_dir = out_dir + + elif len(args) == 5: + # multi worker + options_pkl_file = args[0] + params_file = args[1] + model_file = args[2] + vcf_file = args[3] + worker_index = int(args[4]) + + # load options + options_pkl = open(options_pkl_file, 'rb') + options = pickle.load(options_pkl) + options_pkl.close() + + # update output directory + options.out_dir = '%s/job%d' % (options.out_dir, worker_index) + + else: + parser.error('Must provide parameters and model files and QTL VCF file') + + if not os.path.isdir(options.out_dir): + os.mkdir(options.out_dir) + + options.shifts = [int(shift) for shift in options.shifts.split(',')] + options.sad_stats = options.sad_stats.split(',') + + ################################################################# + # read parameters and targets + + # read model parameters + with open(params_file) as params_open: + params = json.load(params_open) + params_model = params['model'] + params_train = params['train'] + + if options.targets_file is None: + target_slice = None + sum_strand = False + else: + targets_df = pd.read_csv(options.targets_file, sep='\t', index_col=0) + target_slice = targets_df.index + + if 'strand_pair' in targets_df.columns: + sum_strand = True + + # prep strand + targets_strand_df = dataset.targets_prep_strand(targets_df) + + # set strand pairs (using new indexing) + orig_new_index = dict(zip(targets_df.index, np.arange(targets_df.shape[0]))) + targets_strand_pair = np.array([orig_new_index[ti] for ti in targets_df.strand_pair]) + params_model['strand_pair'] = [targets_strand_pair] + + # construct strand sum transform + strand_transform = dok_matrix((targets_df.shape[0], targets_strand_df.shape[0])) + ti = 0 + sti = 0 + for _, target in targets_df.iterrows(): + strand_transform[ti,sti] = True + if target.strand_pair == target.name: + sti += 1 + else: + if target.identifier[-1] == '-': + sti += 1 + ti += 1 + strand_transform = strand_transform.tocsr() + + else: + targets_strand_df = targets_df + sum_strand = False + + ################################################################# + # setup model + + # can we sum on GPU? + sum_length = (options.sad_stats == 'SAD') + + seqnn_model = seqnn.SeqNN(params_model) + seqnn_model.restore(model_file) + seqnn_model.build_slice(target_slice) + if sum_length: + seqnn_model.build_sad() + seqnn_model.build_ensemble(options.rc, options.shifts) + + targets_length = seqnn_model.target_lengths[0] + num_targets = seqnn_model.num_targets() + if options.targets_file is None: + target_ids = ['t%d' % ti for ti in range(num_targets)] + target_labels = ['']*len(target_ids) + targets_strand_df = pd.DataFrame({ + 'identifier':target_ids, + 'description':target_labels}) + + ################################################################# + # load SNPs + + # filter for worker SNPs + if options.processes is not None: + # determine boundaries + num_snps = bvcf.vcf_count(vcf_file) + worker_bounds = np.linspace(0, num_snps, options.processes+1, dtype='int') + + # read SNPs form VCF + snps = bvcf.vcf_snps(vcf_file, start_i=worker_bounds[worker_index], + end_i=worker_bounds[worker_index+1]) + + else: + # read SNPs form VCF + snps = bvcf.vcf_snps(vcf_file) + + # open genome FASTA + genome_open = pysam.Fastafile(options.genome_fasta) + + ################################################################# + # predict SNP scores, write output + + # setup output + sad_out = initialize_output_h5( + options.out_dir, + options.sad_stats, + snps, + targets_length, + targets_strand_df + ) + + for si, snp in tqdm(enumerate(snps), total=len(snps)): + # get SNP sequences + snp_1hot_list = bvcf.snp_seq1(snp, params_model['seq_length'], genome_open) + snps_1hot = np.array(snp_1hot_list) + + # get predictions + if params_train['batch_size'] == 1: + ref_preds = seqnn_model(snps_1hot[:1])[0] + alt_preds = seqnn_model(snps_1hot[1:])[0] + else: + snp_preds = seqnn_model(snps_1hot) + ref_preds, alt_preds = snp_preds[0], snp_preds[1] + + # untransform predictions + if options.targets_file is not None: + if not options.no_untransform: + if options.untransform_old: + ref_preds = dataset.untransform_preds1(ref_preds, targets_df) + alt_preds = dataset.untransform_preds1(alt_preds, targets_df) + else: + ref_preds = dataset.untransform_preds(ref_preds, targets_df) + alt_preds = dataset.untransform_preds(alt_preds, targets_df) + + # sum strand pairs + if sum_strand: + ref_preds = ref_preds * strand_transform + alt_preds = alt_preds * strand_transform + + # process SNP + if sum_length: + write_snp( + ref_preds, + alt_preds, + sad_out, + si, + options.sad_stats + ) + else: + write_snp_len( + ref_preds, + alt_preds, + sad_out, + si, + options.sad_stats + ) + + # close genome + genome_open.close() + + ################################################### + # compute SAD distributions across variants + + write_pct(sad_out, options.sad_stats) + sad_out.close() + + +def initialize_output_h5(out_dir, sad_stats, snps, targets_length, targets_df): + """Initialize an output HDF5 file for SAD stats.""" + + num_targets = targets_df.shape[0] + num_snps = len(snps) + + sad_out = h5py.File('%s/sad.h5' % out_dir, 'w') + + # write SNPs + snp_ids = np.array([snp.rsid for snp in snps], 'S') + sad_out.create_dataset('snp', data=snp_ids) + + # write SNP chr + snp_chr = np.array([snp.chr for snp in snps], 'S') + sad_out.create_dataset('chr', data=snp_chr) + + # write SNP pos + snp_pos = np.array([snp.pos for snp in snps], dtype='uint32') + sad_out.create_dataset('pos', data=snp_pos) + + # check flips + snp_flips = [snp.flipped for snp in snps] + + # write SNP reference allele + snp_refs = [] + snp_alts = [] + for snp in snps: + if snp.flipped: + snp_refs.append(snp.alt_alleles[0]) + snp_alts.append(snp.ref_allele) + else: + snp_refs.append(snp.ref_allele) + snp_alts.append(snp.alt_alleles[0]) + snp_refs = np.array(snp_refs, 'S') + snp_alts = np.array(snp_alts, 'S') + sad_out.create_dataset('ref_allele', data=snp_refs) + sad_out.create_dataset('alt_allele', data=snp_alts) + + # write targets + sad_out.create_dataset('target_ids', data=np.array(targets_df.identifier, 'S')) + sad_out.create_dataset('target_labels', data=np.array(targets_df.description, 'S')) + + # initialize SAD stats + for sad_stat in sad_stats: + if sad_stat in ['REF','ALT']: + sad_out.create_dataset(sad_stat, + shape=(num_snps, targets_length, num_targets), + dtype='float16') + else: + sad_out.create_dataset(sad_stat, + shape=(num_snps, num_targets), + dtype='float16') + + return sad_out + + +def write_pct(sad_out, sad_stats): + """Compute percentile values for each target and write to HDF5.""" + + # define percentiles + d_fine = 0.001 + d_coarse = 0.01 + percentiles_neg = np.arange(d_fine, 0.1, d_fine) + percentiles_base = np.arange(0.1, 0.9, d_coarse) + percentiles_pos = np.arange(0.9, 1, d_fine) + + percentiles = np.concatenate([percentiles_neg, percentiles_base, percentiles_pos]) + sad_out.create_dataset('percentiles', data=percentiles) + pct_len = len(percentiles) + + for sad_stat in sad_stats: + if sad_stat not in ['REF','ALT']: + sad_stat_pct = '%s_pct' % sad_stat + + # compute + sad_pct = np.percentile(sad_out[sad_stat], 100*percentiles, axis=0).T + sad_pct = sad_pct.astype('float16') + + # save + sad_out.create_dataset(sad_stat_pct, data=sad_pct, dtype='float16') + + +def write_snp(ref_preds_sum, alt_preds_sum, sad_out, si, sad_stats): + """Write SNP predictions to HDF, assuming the length dimension has + been collapsed.""" + + # compare reference to alternative via mean subtraction + if 'SAD' in sad_stats: + sad = alt_preds_sum - ref_preds_sum + sad_out['SAD'][si,:] = sad.astype('float16') + + +def write_snp_len(ref_preds, alt_preds, sad_out, si, sad_stats): + """Write SNP predictions to HDF, assuming the length dimension has + been maintained.""" + seq_length, num_targets = ref_preds.shape + + # log/sqrt + ref_preds_log = np.log2(ref_preds+1) + alt_preds_log = np.log2(alt_preds+1) + ref_preds_sqrt = np.sqrt(ref_preds) + alt_preds_sqrt = np.sqrt(alt_preds) + + # sum across length + ref_preds_sum = ref_preds.sum(axis=0) + alt_preds_sum = alt_preds.sum(axis=0) + ref_preds_log_sum = ref_preds_log.sum(axis=0) + alt_preds_log_sum = alt_preds_log.sum(axis=0) + ref_preds_sqrt_sum = ref_preds_sqrt.sum(axis=0) + alt_preds_sqrt_sum = alt_preds_sqrt.sum(axis=0) + + # difference + altref_diff = alt_preds - ref_preds + altref_adiff = np.abs(altref_diff) + altref_log_diff = alt_preds_log - ref_preds_log + altref_log_adiff = np.abs(altref_log_diff) + altref_sqrt_diff = alt_preds_sqrt - ref_preds_sqrt + altref_sqrt_adiff = np.abs(altref_sqrt_diff) + + # compare reference to alternative via sum subtraction + if 'SAD' in sad_stats: + sad = alt_preds_sum - ref_preds_sum + sad = np.clip(sad, np.finfo(np.float16).min, np.finfo(np.float16).max) + sad_out['SAD'][si] = sad.astype('float16') + if 'SADlog' in sad_stats: + sad_log = np.log2(alt_preds_sum + 1) - np.log2(ref_preds_sum + 1) + sad_log = np.clip(sad_log, np.finfo(np.float16).min, np.finfo(np.float16).max) + sad_out['SADlog'][si] = sad_log.astype('float16') + if 'logSAD' in sad_stats: + log_sad = alt_preds_log_sum - ref_preds_log_sum + log_sad = np.clip(log_sad, np.finfo(np.float16).min, np.finfo(np.float16).max) + sad_out['logSAD'][si] = log_sad.astype('float16') + if 'sqrtSAD' in sad_stats: + sqrt_sad = alt_preds_sqrt_sum - ref_preds_sqrt_sum + sqrt_sad = np.clip(sqrt_sad, np.finfo(np.float16).min, np.finfo(np.float16).max) + sad_out['sqrtSAD'][si] = sqrt_sad.astype('float16') + + # compare reference to alternative via max subtraction + if 'SAX' in sad_stats: + max_i = np.argmax(altref_adiff, axis=0) + sax = altref_diff[max_i, np.arange(num_targets)] + sad_out['SAX'][si] = sax.astype('float16') + + # L1 norm of difference vector + if 'D1' in sad_stats: + sad_d1 = altref_adiff.sum(axis=0) + sad_d1 = np.clip(sad_d1, np.finfo(np.float16).min, np.finfo(np.float16).max) + sad_out['D1'][si] = sad_d1.astype('float16') + if 'logD1' in sad_stats: + log_d1 = altref_log_adiff.sum(axis=0) + log_d1 = np.clip(log_d1, np.finfo(np.float16).min, np.finfo(np.float16).max) + sad_out['logD1'][si] = log_d1.astype('float16') + if 'sqrtD1' in sad_stats: + sqrt_d1 = altref_sqrt_adiff.sum(axis=0) + sqrt_d1 = np.clip(sqrt_d1, np.finfo(np.float16).min, np.finfo(np.float16).max) + sad_out['sqrtD1'][si] = sqrt_d1.astype('float16') + + # L2 norm of difference vector + if 'D2' in sad_stats: + altref_diff2 = np.power(altref_diff, 2) + sad_d2 = np.sqrt(altref_diff2.sum(axis=0)) + sad_d2 = np.clip(sad_d2, np.finfo(np.float16).min, np.finfo(np.float16).max) + sad_out['D2'][si] = sad_d2.astype('float16') + if 'logD2' in sad_stats: + altref_log_diff2 = np.power(altref_log_diff, 2) + log_d2 = np.sqrt(altref_log_diff2.sum(axis=0)) + log_d2 = np.clip(log_d2, np.finfo(np.float16).min, np.finfo(np.float16).max) + sad_out['logD2'][si] = log_d2.astype('float16') + if 'sqrtD2' in sad_stats: + altref_sqrt_diff2 = np.power(altref_sqrt_diff, 2) + sqrt_d2 = np.sqrt(altref_sqrt_diff2.sum(axis=0)) + sqrt_d2 = np.clip(sqrt_d2, np.finfo(np.float16).min, np.finfo(np.float16).max) + sad_out['sqrtD2'][si] = sqrt_d2.astype('float16') + + if 'JS' in sad_stats: + # normalized scores + pseudocounts = np.percentile(ref_preds, 25, axis=0) + ref_preds_norm = ref_preds + pseudocounts + ref_preds_norm /= ref_preds_norm.sum(axis=0) + alt_preds_norm = alt_preds + pseudocounts + alt_preds_norm /= alt_preds_norm.sum(axis=0) + + # compare normalized JS + ref_alt_entr = rel_entr(ref_preds_norm, alt_preds_norm).sum(axis=0) + alt_ref_entr = rel_entr(alt_preds_norm, ref_preds_norm).sum(axis=0) + js_dist = (ref_alt_entr + alt_ref_entr) / 2 + sad_out['JS'][si] = js_dist.astype('float16') + if 'logJS' in sad_stats: + # normalized scores + pseudocounts = np.percentile(ref_preds_log, 25, axis=0) + ref_preds_log_norm = ref_preds_log + pseudocounts + ref_preds_log_norm /= ref_preds_log_norm.sum(axis=0) + alt_preds_log_norm = alt_preds_log + pseudocounts + alt_preds_log_norm /= alt_preds_log_norm.sum(axis=0) + + # compare normalized JS + ref_alt_entr = rel_entr(ref_preds_log_norm, alt_preds_log_norm).sum(axis=0) + alt_ref_entr = rel_entr(alt_preds_log_norm, ref_preds_log_norm).sum(axis=0) + log_js_dist = (ref_alt_entr + alt_ref_entr) / 2 + sad_out['logJS'][si] = log_js_dist.astype('float16') + + # predictions + if 'REF' in sad_stats: + ref_preds = np.clip(ref_preds, np.finfo(np.float16).min, np.finfo(np.float16).max) + sad_out['REF'][si] = ref_preds.astype('float16') + if 'ALT' in sad_stats: + alt_preds = np.clip(alt_preds, np.finfo(np.float16).min, np.finfo(np.float16).max) + sad_out['ALT'][si] = alt_preds.astype('float16') + +################################################################################ +# __main__ +################################################################################ +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/src/scripts/borzoi_bench_gtex_folds.py b/src/scripts/borzoi_bench_gtex_folds.py index 5b27a79..a3f3c81 100644 --- a/src/scripts/borzoi_bench_gtex_folds.py +++ b/src/scripts/borzoi_bench_gtex_folds.py @@ -31,7 +31,7 @@ """ borzoi_bench_gtex_folds.py -Benchmark Basenji model replicates on GTEx eQTL coefficient task. +Benchmark Borzoi model replicates on GTEx eQTL coefficient task. """ ################################################################################ @@ -53,7 +53,7 @@ def main(): sed_options.add_option( '-f', dest='genome_fasta', - default='%s/data/hg38.fa' % os.environ['BASENJIDIR'], + default='%s/assembly/ucsc/hg38.fa' % os.environ['HG38'], help='Genome FASTA for sequences [Default: %default]', ) sed_options.add_option( @@ -125,6 +125,19 @@ def main(): type='int', help='Number of cross-fold rounds [Default:%default]', ) + fold_options.add_option( + '--folds', + dest='fold_subset', + default=1, + type='int', + help='Run a subset of folds [Default:%default]', + ) + fold_options.add_option( + '--f_list', + dest='fold_subset_list', + default=None, + help='Run a subset of folds (encoded as comma-separated string) [Default:%default]', + ) fold_options.add_option( '-d', dest='data_head', @@ -161,7 +174,7 @@ def main(): dest='processes', default=None, type='int', - help='Number of processes, passed by multi script. (Unused, but needs to appear as dummy.)', + help='Number of processes, passed by multi script.', ) fold_options.add_option( '-q', @@ -182,21 +195,16 @@ def main(): ####################################################### # prep work - # count folds - num_folds = 0 - fold0_dir = '%s/f%dc0' % (exp_dir, num_folds) - model_file = '%s/train/model_best.h5' % fold0_dir - if options.data_head is not None: - model_file = '%s/train/model%d_best.h5' % (fold0_dir, options.data_head) - while os.path.isfile(model_file): - num_folds += 1 - fold0_dir = '%s/f%dc0' % (exp_dir, num_folds) - model_file = '%s/train/model_best.h5' % fold0_dir - if options.data_head is not None: - model_file = '%s/train/model%d_best.h5' % (fold0_dir, options.data_head) - print('Found %d folds' % num_folds) - if num_folds == 0: - exit(1) + # set folds + num_folds = 1 + if options.fold_subset is not None: + num_folds = options.fold_subset + + fold_index = [fold_i for fold_i in range(num_folds)] + + # subset folds (list) + if options.fold_subset_list is not None: + fold_index = [int(fold_str) for fold_str in options.fold_subset_list.split(",")] # extract output subdirectory name gtex_out_dir = options.out_dir @@ -219,7 +227,7 @@ def main(): jobs = [] for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = '%s/f%dc%d' % (exp_dir, fi, ci) name = '%s-f%dc%d' % (options.name, fi, ci) @@ -293,7 +301,7 @@ def main(): # collect output for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_out_dir = '%s/f%dc%d/%s' % (exp_dir, fi, ci, gtex_out_dir) # collect negatives @@ -311,7 +319,7 @@ def main(): # split study/tissue variants for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_out_dir = '%s/f%dc%d/%s' % (exp_dir, fi, ci, gtex_out_dir) print(it_out_dir) @@ -341,7 +349,7 @@ def main(): sed_pos_files = [] sed_neg_files = [] for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = '%s/f%dc%d' % (exp_dir, fi, ci) it_out_dir = '%s/%s' % (it_dir, gtex_out_dir) @@ -372,7 +380,7 @@ def main(): jobs = [] for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = '%s/f%dc%d' % (exp_dir, fi, ci) it_out_dir = '%s/%s' % (it_dir, gtex_out_dir) diff --git a/src/scripts/borzoi_bench_ipaqtl_folds.py b/src/scripts/borzoi_bench_ipaqtl_folds.py index 55bbd10..9ddc9af 100755 --- a/src/scripts/borzoi_bench_ipaqtl_folds.py +++ b/src/scripts/borzoi_bench_ipaqtl_folds.py @@ -21,6 +21,7 @@ import numpy as np import slurm +import util """ borzoi_bench_ipaqtl_folds.py @@ -50,7 +51,9 @@ def main(): help="GTF for gene definition [Default %default]", ) sed_options.add_option( - "--apafile", dest="apa_file", default="polyadb_human_v3.csv.gz" + "--apafile", + dest="apa_file", + default="polyadb_human_v3.csv.gz" ) sed_options.add_option( "-o", @@ -77,7 +80,7 @@ def main(): dest="cov_min", default=100, type="float", - help="Coverage pseudocount [Default: %default]", + help="Coverage minimum for pA site [Default: %default]", ) sed_options.add_option( "--rc", @@ -106,6 +109,18 @@ def main(): type="str", help="File specifying target indexes and labels in table format", ) + sed_options.add_option( + '-u', + dest='untransform_old', + default=False, + action='store_true' + ) + sed_options.add_option( + '--no_untransform', + dest='no_untransform', + default=False, + action='store_true' + ) parser.add_option_group(sed_options) # classify @@ -128,6 +143,19 @@ def main(): type="int", help="Number of cross-fold rounds [Default:%default]", ) + fold_options.add_option( + "--folds", + dest="fold_subset", + default=1, + type="int", + help="Run a subset of folds [Default:%default]", + ) + fold_options.add_option( + "--f_list", + dest="fold_subset_list", + default=None, + help="Run a subset of folds (encoded as comma-separated string) [Default:%default]", + ) fold_options.add_option( "-d", dest="data_head", @@ -187,21 +215,16 @@ def main(): ####################################################### # prep work - # count folds - num_folds = 0 - fold0_dir = "%s/f%dc0" % (exp_dir, num_folds) - model_file = "%s/train/model_best.h5" % fold0_dir - if options.data_head is not None: - model_file = "%s/train/model%d_best.h5" % (fold0_dir, options.data_head) - while os.path.isfile(model_file): - num_folds += 1 - fold0_dir = "%s/f%dc0" % (exp_dir, num_folds) - model_file = "%s/train/model_best.h5" % fold0_dir - if options.data_head is not None: - model_file = "%s/train/model%d_best.h5" % (fold0_dir, options.data_head) - print("Found %d folds" % num_folds) - if num_folds == 0: - exit(1) + # set folds + num_folds = 1 + if options.fold_subset is not None: + num_folds = options.fold_subset + + fold_index = [fold_i for fold_i in range(num_folds)] + + # subset folds (list) + if options.fold_subset_list is not None: + fold_index = [int(fold_str) for fold_str in options.fold_subset_list.split(",")] sed_stats = options.sed_stats.split(",") @@ -220,7 +243,7 @@ def main(): jobs = [] for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) name = "%s-f%dc%d" % (options.name, fi, ci) @@ -280,7 +303,7 @@ def main(): # split study/tissue variants for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) it_out_dir = "%s/%s" % (it_dir, options.out_dir) @@ -310,7 +333,7 @@ def main(): sed_pos_files = [] sed_neg_files = [] for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) it_out_dir = "%s/%s" % (it_dir, options.out_dir) @@ -334,66 +357,88 @@ def main(): ensemble_sed_h5(ens_neg_file, sed_neg_files, sed_stats) ################################################################ - # fit classifiers + # (optionally) fit classifiers - cmd_base = "westminster_classify.py -i 100 -p 2 -r 44 -s --stat COVR" - cmd_base += " --msl %d" % options.msl + fit_classifiers = False # this analysis was ultimately not used in the manuscript + run_local = True - jobs = [] - for ci in range(options.crosses): - for fi in range(num_folds): - it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) - it_out_dir = "%s/%s" % (it_dir, options.out_dir) + if fit_classifiers: + if run_local: + cmd_base = "" + else: + cmd_base = ". /home/drk/anaconda3/etc/profile.d/conda.sh;" + cmd_base += " conda activate %s;" % options.conda_env + cmd_base += " echo $HOSTNAME;" - for sqtl_pos_vcf in glob.glob("%s/*_pos.vcf" % options.vcf_dir): - tissue = os.path.splitext(os.path.split(sqtl_pos_vcf)[1])[0][:-4] - sed_pos = "%s/%s_pos/sed.h5" % (it_out_dir, tissue) - sed_neg = "%s/%s_neg/sed.h5" % (it_out_dir, tissue) - class_out_dir = "%s/%s_class" % (it_out_dir, tissue) - - if not options.restart or not os.path.isfile( - "%s/stats.txt" % class_out_dir - ): - cmd_class = "%s -o %s %s %s" % ( - cmd_base, - class_out_dir, - sed_pos, - sed_neg, - ) - j = slurm.Job( - cmd_class, - tissue, - "%s.out" % class_out_dir, - "%s.err" % class_out_dir, - queue="standard", - cpu=2, - mem=22000, - time="1-0:0:0", - ) - jobs.append(j) + cmd_base += " basenji_bench_classify.py -i 100 -p 2 -r 44 -s --stat COVR" + cmd_base += " --msl %d" % options.msl - # ensemble - for sqtl_pos_vcf in glob.glob("%s/*_pos.vcf" % options.vcf_dir): - tissue = os.path.splitext(os.path.split(sqtl_pos_vcf)[1])[0][:-4] - sed_pos = "%s/%s_pos/sed.h5" % (sqtl_dir, tissue) - sed_neg = "%s/%s_neg/sed.h5" % (sqtl_dir, tissue) - class_out_dir = "%s/%s_class" % (sqtl_dir, tissue) - - if not options.restart or not os.path.isfile("%s/stats.txt" % class_out_dir): - cmd_class = "%s -o %s %s %s" % (cmd_base, class_out_dir, sed_pos, sed_neg) - j = slurm.Job( - cmd_class, - tissue, - "%s.out" % class_out_dir, - "%s.err" % class_out_dir, - queue="standard", - cpu=2, - mem=22000, - time="1-0:0:0", - ) - jobs.append(j) + jobs = [] + for ci in range(options.crosses): + for fi in fold_index: + it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) + it_out_dir = "%s/%s" % (it_dir, options.out_dir) + + for sqtl_pos_vcf in glob.glob("%s/*_pos.vcf" % options.vcf_dir): + tissue = os.path.splitext(os.path.split(sqtl_pos_vcf)[1])[0][:-4] + sed_pos = "%s/%s_pos/sed.h5" % (it_out_dir, tissue) + sed_neg = "%s/%s_neg/sed.h5" % (it_out_dir, tissue) + class_out_dir = "%s/%s_class" % (it_out_dir, tissue) + + if not options.restart or not os.path.isfile( + "%s/stats.txt" % class_out_dir + ): + cmd_class = "%s -o %s %s %s" % ( + cmd_base, + class_out_dir, + sed_pos, + sed_neg, + ) + j = slurm.Job( + cmd_class, + tissue, + "%s.out" % class_out_dir, + "%s.err" % class_out_dir, + "%s.sb" % class_out_dir, + queue="standard", + cpu=2, + mem=22000, + time="1-0:0:0", + ) + if run_local: + jobs.append(cmd_class) + else: + jobs.append(j) + + # ensemble + for sqtl_pos_vcf in glob.glob("%s/*_pos.vcf" % options.vcf_dir): + tissue = os.path.splitext(os.path.split(sqtl_pos_vcf)[1])[0][:-4] + sed_pos = "%s/%s_pos/sed.h5" % (sqtl_dir, tissue) + sed_neg = "%s/%s_neg/sed.h5" % (sqtl_dir, tissue) + class_out_dir = "%s/%s_class" % (sqtl_dir, tissue) + + if not options.restart or not os.path.isfile("%s/stats.txt" % class_out_dir): + cmd_class = "%s -o %s %s %s" % (cmd_base, class_out_dir, sed_pos, sed_neg) + j = slurm.Job( + cmd_class, + tissue, + "%s.out" % class_out_dir, + "%s.err" % class_out_dir, + "%s.sb" % class_out_dir, + queue="standard", + cpu=2, + mem=22000, + time="1-0:0:0", + ) + if run_local: + jobs.append(cmd_class) + else: + jobs.append(j) - slurm.multi_run(jobs, verbose=True) + if run_local: + util.exec_par(jobs, 6, verbose=True) + else: + slurm.multi_run(jobs, verbose=True) def complete_h5(h5_file, sed_stats): diff --git a/src/scripts/borzoi_bench_paqtl_folds.py b/src/scripts/borzoi_bench_paqtl_folds.py index 2f04909..9fc97c5 100755 --- a/src/scripts/borzoi_bench_paqtl_folds.py +++ b/src/scripts/borzoi_bench_paqtl_folds.py @@ -21,6 +21,7 @@ import numpy as np import slurm +import util """ borzoi_bench_paqtl_folds.py @@ -50,7 +51,9 @@ def main(): help="GTF for gene definition [Default %default]", ) sed_options.add_option( - "--apafile", dest="apa_file", default="polyadb_human_v3.csv.gz" + "--apafile", + dest="apa_file", + default="polyadb_human_v3.csv.gz" ) sed_options.add_option( "-o", @@ -77,7 +80,7 @@ def main(): dest="cov_min", default=100, type="float", - help="Coverage pseudocount [Default: %default]", + help="Coverage minimum for pA site [Default: %default]", ) sed_options.add_option( "--rc", @@ -106,6 +109,18 @@ def main(): type="str", help="File specifying target indexes and labels in table format", ) + sed_options.add_option( + '-u', + dest='untransform_old', + default=False, + action='store_true' + ) + sed_options.add_option( + '--no_untransform', + dest='no_untransform', + default=False, + action='store_true' + ) parser.add_option_group(sed_options) # classify @@ -128,6 +143,19 @@ def main(): type="int", help="Number of cross-fold rounds [Default:%default]", ) + fold_options.add_option( + "--folds", + dest="fold_subset", + default=1, + type="int", + help="Run a subset of folds [Default:%default]", + ) + fold_options.add_option( + "--f_list", + dest="fold_subset_list", + default=None, + help="Run a subset of folds (encoded as comma-separated string) [Default:%default]", + ) fold_options.add_option( "-d", dest="data_head", @@ -187,21 +215,16 @@ def main(): ####################################################### # prep work - # count folds - num_folds = 0 - fold0_dir = "%s/f%dc0" % (exp_dir, num_folds) - model_file = "%s/train/model_best.h5" % fold0_dir - if options.data_head is not None: - model_file = "%s/train/model%d_best.h5" % (fold0_dir, options.data_head) - while os.path.isfile(model_file): - num_folds += 1 - fold0_dir = "%s/f%dc0" % (exp_dir, num_folds) - model_file = "%s/train/model_best.h5" % fold0_dir - if options.data_head is not None: - model_file = "%s/train/model%d_best.h5" % (fold0_dir, options.data_head) - print("Found %d folds" % num_folds) - if num_folds == 0: - exit(1) + # set folds + num_folds = 1 + if options.fold_subset is not None: + num_folds = options.fold_subset + + fold_index = [fold_i for fold_i in range(num_folds)] + + # subset folds (list) + if options.fold_subset_list is not None: + fold_index = [int(fold_str) for fold_str in options.fold_subset_list.split(",")] sed_stats = options.sed_stats.split(",") @@ -220,7 +243,7 @@ def main(): jobs = [] for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) name = "%s-f%dc%d" % (options.name, fi, ci) @@ -280,7 +303,7 @@ def main(): # split study/tissue variants for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) it_out_dir = "%s/%s" % (it_dir, options.out_dir) @@ -310,7 +333,7 @@ def main(): sed_pos_files = [] sed_neg_files = [] for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) it_out_dir = "%s/%s" % (it_dir, options.out_dir) @@ -334,66 +357,88 @@ def main(): ensemble_sed_h5(ens_neg_file, sed_neg_files, sed_stats) ################################################################ - # fit classifiers + # (optionally) fit classifiers - cmd_base = "westminster_classify.py -i 100 -p 2 -r 44 -s --stat COVR" - cmd_base += " --msl %d" % options.msl + fit_classifiers = False # this analysis was ultimately not used in the manuscript + run_local = True - jobs = [] - for ci in range(options.crosses): - for fi in range(num_folds): - it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) - it_out_dir = "%s/%s" % (it_dir, options.out_dir) + if fit_classifiers: + if run_local: + cmd_base = "" + else: + cmd_base = ". /home/drk/anaconda3/etc/profile.d/conda.sh;" + cmd_base += " conda activate %s;" % options.conda_env + cmd_base += " echo $HOSTNAME;" - for sqtl_pos_vcf in glob.glob("%s/*_pos.vcf" % options.vcf_dir): - tissue = os.path.splitext(os.path.split(sqtl_pos_vcf)[1])[0][:-4] - sed_pos = "%s/%s_pos/sed.h5" % (it_out_dir, tissue) - sed_neg = "%s/%s_neg/sed.h5" % (it_out_dir, tissue) - class_out_dir = "%s/%s_class" % (it_out_dir, tissue) - - if not options.restart or not os.path.isfile( - "%s/stats.txt" % class_out_dir - ): - cmd_class = "%s -o %s %s %s" % ( - cmd_base, - class_out_dir, - sed_pos, - sed_neg, - ) - j = slurm.Job( - cmd_class, - tissue, - "%s.out" % class_out_dir, - "%s.err" % class_out_dir, - queue="standard", - cpu=2, - mem=22000, - time="1-0:0:0", - ) - jobs.append(j) + cmd_base += " basenji_bench_classify.py -i 100 -p 2 -r 44 -s --stat COVR" + cmd_base += " --msl %d" % options.msl - # ensemble - for sqtl_pos_vcf in glob.glob("%s/*_pos.vcf" % options.vcf_dir): - tissue = os.path.splitext(os.path.split(sqtl_pos_vcf)[1])[0][:-4] - sed_pos = "%s/%s_pos/sed.h5" % (sqtl_dir, tissue) - sed_neg = "%s/%s_neg/sed.h5" % (sqtl_dir, tissue) - class_out_dir = "%s/%s_class" % (sqtl_dir, tissue) - - if not options.restart or not os.path.isfile("%s/stats.txt" % class_out_dir): - cmd_class = "%s -o %s %s %s" % (cmd_base, class_out_dir, sed_pos, sed_neg) - j = slurm.Job( - cmd_class, - tissue, - "%s.out" % class_out_dir, - "%s.err" % class_out_dir, - queue="standard", - cpu=2, - mem=22000, - time="1-0:0:0", - ) - jobs.append(j) + jobs = [] + for ci in range(options.crosses): + for fi in fold_index: + it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) + it_out_dir = "%s/%s" % (it_dir, options.out_dir) + + for sqtl_pos_vcf in glob.glob("%s/*_pos.vcf" % options.vcf_dir): + tissue = os.path.splitext(os.path.split(sqtl_pos_vcf)[1])[0][:-4] + sed_pos = "%s/%s_pos/sed.h5" % (it_out_dir, tissue) + sed_neg = "%s/%s_neg/sed.h5" % (it_out_dir, tissue) + class_out_dir = "%s/%s_class" % (it_out_dir, tissue) + + if not options.restart or not os.path.isfile( + "%s/stats.txt" % class_out_dir + ): + cmd_class = "%s -o %s %s %s" % ( + cmd_base, + class_out_dir, + sed_pos, + sed_neg, + ) + j = slurm.Job( + cmd_class, + tissue, + "%s.out" % class_out_dir, + "%s.err" % class_out_dir, + "%s.sb" % class_out_dir, + queue="standard", + cpu=2, + mem=22000, + time="1-0:0:0", + ) + if run_local: + jobs.append(cmd_class) + else: + jobs.append(j) + + # ensemble + for sqtl_pos_vcf in glob.glob("%s/*_pos.vcf" % options.vcf_dir): + tissue = os.path.splitext(os.path.split(sqtl_pos_vcf)[1])[0][:-4] + sed_pos = "%s/%s_pos/sed.h5" % (sqtl_dir, tissue) + sed_neg = "%s/%s_neg/sed.h5" % (sqtl_dir, tissue) + class_out_dir = "%s/%s_class" % (sqtl_dir, tissue) + + if not options.restart or not os.path.isfile("%s/stats.txt" % class_out_dir): + cmd_class = "%s -o %s %s %s" % (cmd_base, class_out_dir, sed_pos, sed_neg) + j = slurm.Job( + cmd_class, + tissue, + "%s.out" % class_out_dir, + "%s.err" % class_out_dir, + "%s.sb" % class_out_dir, + queue="standard", + cpu=2, + mem=22000, + time="1-0:0:0", + ) + if run_local: + jobs.append(cmd_class) + else: + jobs.append(j) - slurm.multi_run(jobs, verbose=True) + if run_local: + util.exec_par(jobs, 6, verbose=True) + else: + slurm.multi_run(jobs, verbose=True) def complete_h5(h5_file, sed_stats): diff --git a/src/scripts/borzoi_bench_sqtl_folds.py b/src/scripts/borzoi_bench_sqtl_folds.py index c71e724..6c74788 100755 --- a/src/scripts/borzoi_bench_sqtl_folds.py +++ b/src/scripts/borzoi_bench_sqtl_folds.py @@ -132,6 +132,19 @@ def main(): type="int", help="Number of cross-fold rounds [Default:%default]", ) + fold_options.add_option( + '--folds', + dest='fold_subset', + default=1, + type='int', + help='Run a subset of folds [Default:%default]', + ) + fold_options.add_option( + '--f_list', + dest='fold_subset_list', + default=None, + help='Run a subset of folds (encoded as comma-separated string) [Default:%default]', + ) fold_options.add_option( "-d", dest="data_head", @@ -172,7 +185,9 @@ def main(): help="Restart a partially completed job [Default: %default]", ) fold_options.add_option( - "--vcf", dest="vcf_dir", default="/home/drk/seqnn/data/qtl_cat/sqtl_pip90" + "--vcf", + dest="vcf_dir", + default="/home/drk/seqnn/data/qtl_cat/sqtl_pip90" ) parser.add_option_group(fold_options) @@ -187,21 +202,16 @@ def main(): ####################################################### # prep work - # count folds - num_folds = 0 - fold0_dir = "%s/f%dc0" % (exp_dir, num_folds) - model_file = "%s/train/model_best.h5" % fold0_dir - if options.data_head is not None: - model_file = "%s/train/model%d_best.h5" % (fold0_dir, options.data_head) - while os.path.isfile(model_file): - num_folds += 1 - fold0_dir = "%s/f%dc0" % (exp_dir, num_folds) - model_file = "%s/train/model_best.h5" % fold0_dir - if options.data_head is not None: - model_file = "%s/train/model%d_best.h5" % (fold0_dir, options.data_head) - print("Found %d folds" % num_folds) - if num_folds == 0: - exit(1) + # set folds + num_folds = 1 + if options.fold_subset is not None: + num_folds = options.fold_subset + + fold_index = [fold_i for fold_i in range(num_folds)] + + # subset folds (list) + if options.fold_subset_list is not None: + fold_index = [int(fold_str) for fold_str in options.fold_subset_list.split(",")] sed_stats = options.sed_stats.split(",") @@ -220,7 +230,7 @@ def main(): jobs = [] for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) name = "%s-f%dc%d" % (options.name, fi, ci) @@ -282,7 +292,7 @@ def main(): # split study/tissue variants for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) it_out_dir = "%s/%s" % (it_dir, options.out_dir) print(it_out_dir) @@ -313,7 +323,7 @@ def main(): sed_pos_files = [] sed_neg_files = [] for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) it_out_dir = "%s/%s" % (it_dir, options.out_dir) @@ -337,85 +347,88 @@ def main(): ensemble_sed_h5(ens_neg_file, sed_neg_files, sed_stats) ################################################################ - # fit classifiers + # (optionally) fit classifiers + fit_classifiers = False # this analysis was ultimately not used in the manuscript run_local = True - if run_local: - cmd_base = "" - else: - cmd_base = ". /home/drk/anaconda3/etc/profile.d/conda.sh;" - cmd_base += " conda activate %s;" % options.conda_env - cmd_base += " echo $HOSTNAME;" - cmd_base += " westminster_classify.py -i 100 -p 2 -r 44 -s --stat nDi" - cmd_base += " --msl %d" % options.msl + if fit_classifiers: + if run_local: + cmd_base = "" + else: + cmd_base = ". /home/drk/anaconda3/etc/profile.d/conda.sh;" + cmd_base += " conda activate %s;" % options.conda_env + cmd_base += " echo $HOSTNAME;" - jobs = [] - for ci in range(options.crosses): - for fi in range(num_folds): - it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) - it_out_dir = "%s/%s" % (it_dir, options.out_dir) + cmd_base += " basenji_bench_classify.py -i 100 -p 2 -r 44 -s --stat nDi" + cmd_base += " --msl %d" % options.msl - for sqtl_pos_vcf in glob.glob("%s/*_pos.vcf" % options.vcf_dir): - tissue = os.path.splitext(os.path.split(sqtl_pos_vcf)[1])[0][:-4] - sed_pos = "%s/%s_pos/sed.h5" % (it_out_dir, tissue) - sed_neg = "%s/%s_neg/sed.h5" % (it_out_dir, tissue) - class_out_dir = "%s/%s_class" % (it_out_dir, tissue) - - if not options.restart or not os.path.isfile( - "%s/stats.txt" % class_out_dir - ): - cmd_class = "%s -o %s %s %s" % ( - cmd_base, - class_out_dir, - sed_pos, - sed_neg, - ) - j = slurm.Job( - cmd_class, - tissue, - "%s.out" % class_out_dir, - "%s.err" % class_out_dir, - "%s.sb" % class_out_dir, - queue="standard", - cpu=2, - mem=22000, - time="1-0:0:0", - ) - if run_local: - jobs.append(cmd_class) - else: - jobs.append(j) + jobs = [] + for ci in range(options.crosses): + for fi in fold_index: + it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) + it_out_dir = "%s/%s" % (it_dir, options.out_dir) - # ensemble - for sqtl_pos_vcf in glob.glob("%s/*_pos.vcf" % options.vcf_dir): - tissue = os.path.splitext(os.path.split(sqtl_pos_vcf)[1])[0][:-4] - sed_pos = "%s/%s_pos/sed.h5" % (sqtl_dir, tissue) - sed_neg = "%s/%s_neg/sed.h5" % (sqtl_dir, tissue) - class_out_dir = "%s/%s_class" % (sqtl_dir, tissue) - - if not options.restart or not os.path.isfile("%s/stats.txt" % class_out_dir): - cmd_class = "%s -o %s %s %s" % (cmd_base, class_out_dir, sed_pos, sed_neg) - j = slurm.Job( - cmd_class, - tissue, - "%s.out" % class_out_dir, - "%s.err" % class_out_dir, - "%s.sb" % class_out_dir, - queue="standard", - cpu=2, - mem=22000, - time="1-0:0:0", - ) - if run_local: - jobs.append(cmd_class) - else: - jobs.append(j) + for sqtl_pos_vcf in glob.glob("%s/*_pos.vcf" % options.vcf_dir): + tissue = os.path.splitext(os.path.split(sqtl_pos_vcf)[1])[0][:-4] + sed_pos = "%s/%s_pos/sed.h5" % (it_out_dir, tissue) + sed_neg = "%s/%s_neg/sed.h5" % (it_out_dir, tissue) + class_out_dir = "%s/%s_class" % (it_out_dir, tissue) + + if not options.restart or not os.path.isfile( + "%s/stats.txt" % class_out_dir + ): + cmd_class = "%s -o %s %s %s" % ( + cmd_base, + class_out_dir, + sed_pos, + sed_neg, + ) + j = slurm.Job( + cmd_class, + tissue, + "%s.out" % class_out_dir, + "%s.err" % class_out_dir, + "%s.sb" % class_out_dir, + queue="standard", + cpu=2, + mem=22000, + time="1-0:0:0", + ) + if run_local: + jobs.append(cmd_class) + else: + jobs.append(j) - if run_local: - util.exec_par(jobs, 6, verbose=True) - else: - slurm.multi_run(jobs, verbose=True) + # ensemble + for sqtl_pos_vcf in glob.glob("%s/*_pos.vcf" % options.vcf_dir): + tissue = os.path.splitext(os.path.split(sqtl_pos_vcf)[1])[0][:-4] + sed_pos = "%s/%s_pos/sed.h5" % (sqtl_dir, tissue) + sed_neg = "%s/%s_neg/sed.h5" % (sqtl_dir, tissue) + class_out_dir = "%s/%s_class" % (sqtl_dir, tissue) + + if not options.restart or not os.path.isfile("%s/stats.txt" % class_out_dir): + cmd_class = "%s -o %s %s %s" % (cmd_base, class_out_dir, sed_pos, sed_neg) + j = slurm.Job( + cmd_class, + tissue, + "%s.out" % class_out_dir, + "%s.err" % class_out_dir, + "%s.sb" % class_out_dir, + queue="standard", + cpu=2, + mem=22000, + time="1-0:0:0", + ) + if run_local: + jobs.append(cmd_class) + else: + jobs.append(j) + + if run_local: + util.exec_par(jobs, 6, verbose=True) + else: + slurm.multi_run(jobs, verbose=True) def complete_h5(h5_file, sed_stats): diff --git a/src/scripts/borzoi_bench_trip_folds.py b/src/scripts/borzoi_bench_trip_folds.py index d6eb4c1..7a8e3ca 100755 --- a/src/scripts/borzoi_bench_trip_folds.py +++ b/src/scripts/borzoi_bench_trip_folds.py @@ -98,6 +98,19 @@ def main(): type="int", help="Number of cross-fold rounds [Default:%default]", ) + fold_options.add_option( + "--folds", + dest="fold_subset", + default=1, + type="int", + help="Run a subset of folds [Default:%default]", + ) + fold_options.add_option( + "--f_list", + dest="fold_subset_list", + default=None, + help="Run a subset of folds (encoded as comma-separated string) [Default:%default]", + ) fold_options.add_option( "-d", dest="data_head", @@ -156,21 +169,16 @@ def main(): ####################################################### # prep work - # count folds - num_folds = 0 - fold0_dir = "%s/f%dc0" % (exp_dir, num_folds) - model_file = "%s/train/model_best.h5" % fold0_dir - if options.data_head is not None: - model_file = "%s/train/model%d_best.h5" % (fold0_dir, options.data_head) - while os.path.isfile(model_file): - num_folds += 1 - fold0_dir = "%s/f%dc0" % (exp_dir, num_folds) - model_file = "%s/train/model_best.h5" % fold0_dir - if options.data_head is not None: - model_file = "%s/train/model%d_best.h5" % (fold0_dir, options.data_head) - print("Found %d folds" % num_folds) - if num_folds == 0: - exit(1) + # set folds + num_folds = 1 + if options.fold_subset is not None: + num_folds = options.fold_subset + + fold_index = [fold_i for fold_i in range(num_folds)] + + # subset folds (list) + if options.fold_subset_list is not None: + fold_index = [int(fold_str) for fold_str in options.fold_subset_list.split(",")] ################################################################ # TRIP prediction jobs @@ -183,7 +191,7 @@ def main(): jobs = [] for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = "%s/f%dc%d" % (exp_dir, fi, ci) name = "%s-f%dc%d" % (options.name, fi, ci) diff --git a/src/scripts/borzoi_gtex_coef.py b/src/scripts/borzoi_gtex_coef.py index 680d60b..b44d548 100644 --- a/src/scripts/borzoi_gtex_coef.py +++ b/src/scripts/borzoi_gtex_coef.py @@ -18,7 +18,7 @@ borzoi_gtex_coef.py Evaluate concordance of variant effect prediction sign classifcation -and coefficient correlations. +and coefficient correlations (gene-specific). ''' ################################################################################ @@ -57,7 +57,7 @@ def main(): parser.add_option( '-s', dest='snp_stat', - default='logSAD', + default='logSED', help='SNP statistic. [Default: %(default)s]', ) parser.add_option( diff --git a/src/scripts/borzoi_satg_gene_gpu.py b/src/scripts/borzoi_satg_gene_gpu.py index ae4f48d..fd22b45 100755 --- a/src/scripts/borzoi_satg_gene_gpu.py +++ b/src/scripts/borzoi_satg_gene_gpu.py @@ -60,8 +60,8 @@ def main(): parser.add_option( "--rc", dest="rc", - default=0, - type="int", + default=False, + action="store_true", help="Ensemble forward and reverse complement predictions [Default: %default]", ) parser.add_option( @@ -69,7 +69,21 @@ def main(): dest="folds", default="0", type="str", - help="Model folds to use in ensemble [Default: %default]", + help="Model folds to use in ensemble (comma-separated list) [Default: %default]", + ) + parser.add_option( + '-c', + dest='crosses', + default=1, + type='int', + help='Number of cross-fold rounds [Default:%default]', + ) + parser.add_option( + "--head", + dest="head_i", + default=0, + type="int", + help="Model head index [Default: %default]", ) parser.add_option( "--shifts", @@ -81,50 +95,50 @@ def main(): parser.add_option( "--span", dest="span", - default=0, - type="int", + default=False, + action="store_true", help="Aggregate entire gene span [Default: %default]", ) parser.add_option( - "--smoothgrad", - dest="smooth_grad", - default=0, - type="int", - help="Run smoothgrad [Default: %default]", + "--clip_soft", + dest="clip_soft", + default=None, + type="float", + help="Model clip_soft setting [Default: %default]", ) parser.add_option( - "--samples", - dest="n_samples", - default=5, - type="int", - help="Number of smoothgrad samples [Default: %default]", + "--track_scale", + dest="track_scale", + default=0.02, + type="float", + help="Target transform scale [Default: %default]", ) parser.add_option( - "--sampleprob", - dest="sample_prob", - default=0.875, + "--track_transform", + dest="track_transform", + default=0.75, type="float", - help="Probability of not mutating a position in smoothgrad [Default: %default]", + help="Target transform exponent [Default: %default]", ) parser.add_option( - "--clip_soft", - dest="clip_soft", - default=None, - type="float", - help="Model clip_soft setting [Default: %default]", + "--untransform_old", + dest="untransform_old", + default=False, + action="store_true", + help="Run gradients with old version of inverse transforms [Default: %default]", ) parser.add_option( - "--no_transform", - dest="no_transform", - default=0, - type="int", + "--no_untransform", + dest="no_untransform", + default=False, + action="store_true", help="Run gradients with no inverse transforms [Default: %default]", ) parser.add_option( "--get_preds", dest="get_preds", - default=0, - type="int", + default=False, + action="store_true", help="Store scalar predictions in addition to their gradients [Default: %default]", ) parser.add_option( @@ -214,7 +228,10 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore(model_folder + "/f0c0/model0_best.h5", 0, by_name=False) + seqnn_model.restore( + model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + options.head_i + ) seqnn_model.build_slice(targets_df.index, False) # seqnn_model.build_ensemble(options.rc, options.shifts) @@ -265,50 +282,215 @@ def main(): # loop over folds for fold_ix in options.folds: - print("-- Fold = " + str(fold_ix) + " --") - - # (re-)initialize HDF5 - scores_h5_file = "%s/scores_f%dc0.h5" % (options.out_dir, fold_ix) - if os.path.isfile(scores_h5_file): - os.remove(scores_h5_file) - scores_h5 = h5py.File(scores_h5_file, "w") - scores_h5.create_dataset("seqs", dtype="bool", shape=(num_genes, seq_len, 4)) - scores_h5.create_dataset( - "grads", dtype="float16", shape=(num_genes, seq_len, 4, num_targets) - ) - if options.get_preds == 1: + for cross_ix in options.crosses: + + print("-- fold = f" + str(fold_ix) + "c" + str(cross_ix) + " --") + + # (re-)initialize HDF5 + scores_h5_file = "%s/scores_f%dc%d.h5" % (options.out_dir, fold_ix, cross_ix) + if os.path.isfile(scores_h5_file): + os.remove(scores_h5_file) + scores_h5 = h5py.File(scores_h5_file, "w") + scores_h5.create_dataset("seqs", dtype="bool", shape=(num_genes, seq_len, 4)) scores_h5.create_dataset( - "preds", dtype="float32", shape=(num_genes, num_targets) + "grads", dtype="float16", shape=(num_genes, seq_len, 4, num_targets) ) - scores_h5.create_dataset("gene", data=np.array(gene_list, dtype="S")) - scores_h5.create_dataset("chr", data=np.array(genes_chr, dtype="S")) - scores_h5.create_dataset("start", data=np.array(genes_start)) - scores_h5.create_dataset("end", data=np.array(genes_end)) - scores_h5.create_dataset("strand", data=np.array(genes_strand, dtype="S")) - - # load model fold - seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c0/model0_best.h5", 0, by_name=False - ) - seqnn_model.build_slice(targets_df.index, False) + if options.get_preds: + scores_h5.create_dataset( + "preds", dtype="float32", shape=(num_genes, num_targets) + ) + scores_h5.create_dataset("gene", data=np.array(gene_list, dtype="S")) + scores_h5.create_dataset("chr", data=np.array(genes_chr, dtype="S")) + scores_h5.create_dataset("start", data=np.array(genes_start)) + scores_h5.create_dataset("end", data=np.array(genes_end)) + scores_h5.create_dataset("strand", data=np.array(genes_strand, dtype="S")) + + # load model fold + seqnn_model = seqnn.SeqNN(params_model) + seqnn_model.restore( + model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + options.head_i + ) + seqnn_model.build_slice(targets_df.index, False) + + # optionally get (and store) scalar predictions before computing their gradients + if options.get_preds: + print(" - (prediction) - ", flush=True) + + for shift in options.shifts: + print("Processing shift %d" % shift, flush=True) + + for rev_comp in [False, True] if options.rc else [False]: + + if options.rc: + print( + "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), + flush=True, + ) + + seq_1hots = [] + gene_slices = [] + gene_targets = [] + + for gi, gene_id in enumerate(gene_list): + + if gi % 500 == 0: + print("Processing %d, %s" % (gi, gene_id), flush=True) + + gene = transcriptome.genes[gene_id] + + # make sequence + seq_1hot = make_seq_1hot( + genome_open, + genes_chr[gi], + genes_start[gi], + genes_end[gi], + seq_len, + ) + seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) + + # determine output sequence start + seq_out_start = genes_start[gi] + model_stride * model_crop + seq_out_len = model_stride * target_length + + # determine output positions + gene_slice = gene.output_slice( + seq_out_start, seq_out_len, model_stride, options.span + ) + + if rev_comp: + seq_1hot = dna_io.hot1_rc(seq_1hot) + gene_slice = target_length - gene_slice - 1 + + # slice relevant strand targets + if genes_strand[gi] == "+": + gene_strand_mask = ( + (targets_df.strand != "-") + if not rev_comp + else (targets_df.strand != "+") + ) + else: + gene_strand_mask = ( + (targets_df.strand != "+") + if not rev_comp + else (targets_df.strand != "-") + ) + + gene_target = np.array( + targets_df.index[gene_strand_mask].values + ) + + # accumulate data tensors + seq_1hots.append(seq_1hot[None, ...]) + gene_slices.append(gene_slice[None, ...]) + gene_targets.append(gene_target[None, ...]) + + if gi == len(gene_list) - 1 or len(seq_1hots) >= buffer_size: + + # concat sequences + seq_1hots = np.concatenate(seq_1hots, axis=0) + + # pad gene slices to same length (mark valid positions in mask tensor) + max_slice_len = int( + np.max( + [gene_slice.shape[1] for gene_slice in gene_slices] + ) + ) + + gene_masks = np.zeros( + (len(gene_slices), max_slice_len), dtype="float32" + ) + gene_slices_padded = np.zeros( + (len(gene_slices), max_slice_len), dtype="int32" + ) + for gii, gene_slice in enumerate(gene_slices): + for j in range(gene_slice.shape[1]): + gene_masks[gii, j] = 1.0 + gene_slices_padded[gii, j] = gene_slice[0, j] + + gene_slices = gene_slices_padded + + # concat gene-specific targets + gene_targets = np.concatenate(gene_targets, axis=0) + + # batch call count predictions + preds = predict_counts( + seqnn_model, + seq_1hots, + head_i=0, + target_slice=gene_targets, + pos_slice=gene_slices, + pos_mask=gene_masks, + chunk_size=buffer_size, + batch_size=1, + track_scale=options.track_scale, + track_transform=options.track_transform, + clip_soft=options.clip_soft, + untransform_old=options.untransform_old, + use_mean=False, + dtype="float32", + ) + + # save predictions + for gii, gene_slice in enumerate(gene_slices): + h5_gi = (gi // buffer_size) * buffer_size + gii + + # write to HDF5 + scores_h5["preds"][h5_gi, :] += preds[gii] / float( + len(options.shifts) + ) + + # clear sequence buffer + seq_1hots = [] + gene_slices = [] + gene_targets = [] + + # collect garbage + gc.collect() - track_scale = targets_df.iloc[0]["scale"] - track_transform = 3.0 / 4.0 + # optionally set pseudo count from predictions + pseudo_count = 0.0 + if options.pseudo_qtl is not None: + gene_preds = scores_h5["preds"][:] - # optionally get (and store) scalar predictions before computing their gradients - if options.get_preds == 1: - print(" - (prediction) - ", flush=True) + # filter on tissue + tissue_preds = None + + if tissue_genes is not None: + tissue_set = set(tissue_genes) + + # get subset of genes and predictions belonging to the pseudo count tissue + tissue_preds = [] + for gi, gene_id in enumerate(gene_list): + if gene_id.split(".")[0] in tissue_set: + tissue_preds.append(gene_preds[gi, 0]) + + tissue_preds = np.array(tissue_preds, dtype="float32") + else: + tissue_preds = np.array(gene_preds[:, 0], dtype="float32") + + print("tissue_preds.shape[0] = " + str(tissue_preds.shape[0])) + + print("np.min(tissue_preds) = " + str(np.min(tissue_preds))) + print("np.max(tissue_preds) = " + str(np.max(tissue_preds))) + + # set pseudo count based on quantile of predictions + pseudo_count = np.quantile(tissue_preds, q=options.pseudo_qtl) + + print("") + print("pseudo_count = " + str(round(pseudo_count, 6))) + + # compute gradients + print(" - (gradients) - ", flush=True) for shift in options.shifts: print("Processing shift %d" % shift, flush=True) - for rev_comp in [False, True] if options.rc == 1 else [False]: + for rev_comp in [False, True] if options.rc else [False]: - if options.rc == 1: + if options.rc: print( - "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), - flush=True, + "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), flush=True ) seq_1hots = [] @@ -338,7 +520,7 @@ def main(): # determine output positions gene_slice = gene.output_slice( - seq_out_start, seq_out_len, model_stride, options.span == 1 + seq_out_start, seq_out_len, model_stride, options.span ) if rev_comp: @@ -359,9 +541,7 @@ def main(): else (targets_df.strand != "-") ) - gene_target = np.array( - targets_df.index[gene_strand_mask].values - ) + gene_target = np.array(targets_df.index[gene_strand_mask].values) # accumulate data tensors seq_1hots.append(seq_1hot[None, ...]) @@ -375,9 +555,7 @@ def main(): # pad gene slices to same length (mark valid positions in mask tensor) max_slice_len = int( - np.max( - [gene_slice.shape[1] for gene_slice in gene_slices] - ) + np.max([gene_slice.shape[1] for gene_slice in gene_slices]) ) gene_masks = np.zeros( @@ -396,9 +574,8 @@ def main(): # concat gene-specific targets gene_targets = np.concatenate(gene_targets, axis=0) - # batch call count predictions - preds = predict_counts( - seqnn_model, + # batch call gradient computation + grads = seqnn_model.gradients( seq_1hots, head_i=0, target_slice=gene_targets, @@ -406,21 +583,32 @@ def main(): pos_mask=gene_masks, chunk_size=buffer_size, batch_size=1, - track_scale=track_scale, - track_transform=track_transform, + track_scale=options.track_scale, + track_transform=options.track_transform, clip_soft=options.clip_soft, + pseudo_count=pseudo_count, + untransform_old=options.untransform_old, + no_untransform=options.no_untransform, use_mean=False, - dtype="float32", + use_ratio=False, + use_logodds=False, + subtract_avg=True, + input_gate=False, + dtype="float16", ) - # save predictions + # undo augmentations and save gradients for gii, gene_slice in enumerate(gene_slices): + grad = unaugment_grads( + grads[gii, :, :, None], + fwdrc=(not rev_comp), + shift=shift, + ) + h5_gi = (gi // buffer_size) * buffer_size + gii # write to HDF5 - scores_h5["preds"][h5_gi, :] += preds[gii] / float( - len(options.shifts) - ) + scores_h5["grads"][h5_gi] += grad # clear sequence buffer seq_1hots = [] @@ -430,196 +618,22 @@ def main(): # collect garbage gc.collect() - # optionally set pseudo count from predictions - pseudo_count = 0.0 - if options.pseudo_qtl is not None: - gene_preds = scores_h5["preds"][:] - - # filter on tissue - tissue_preds = None - - if tissue_genes is not None: - tissue_set = set(tissue_genes) - - # get subset of genes and predictions belonging to the pseudo count tissue - tissue_preds = [] - for gi, gene_id in enumerate(gene_list): - if gene_id.split(".")[0] in tissue_set: - tissue_preds.append(gene_preds[gi, 0]) - - tissue_preds = np.array(tissue_preds, dtype="float32") - else: - tissue_preds = np.array(gene_preds[:, 0], dtype="float32") - - print("tissue_preds.shape[0] = " + str(tissue_preds.shape[0])) - - print("np.min(tissue_preds) = " + str(np.min(tissue_preds))) - print("np.max(tissue_preds) = " + str(np.max(tissue_preds))) - - # set pseudo count based on quantile of predictions - pseudo_count = np.quantile(tissue_preds, q=options.pseudo_qtl) - - print("") - print("pseudo_count = " + str(round(pseudo_count, 6))) - - # compute gradients - print(" - (gradients) - ", flush=True) - - for shift in options.shifts: - print("Processing shift %d" % shift, flush=True) - - for rev_comp in [False, True] if options.rc == 1 else [False]: - - if options.rc == 1: - print( - "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), flush=True - ) - - seq_1hots = [] - gene_slices = [] - gene_targets = [] - - for gi, gene_id in enumerate(gene_list): - - if gi % 500 == 0: - print("Processing %d, %s" % (gi, gene_id), flush=True) - - gene = transcriptome.genes[gene_id] - - # make sequence - seq_1hot = make_seq_1hot( - genome_open, - genes_chr[gi], - genes_start[gi], - genes_end[gi], - seq_len, - ) - seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) - - # determine output sequence start - seq_out_start = genes_start[gi] + model_stride * model_crop - seq_out_len = model_stride * target_length - - # determine output positions - gene_slice = gene.output_slice( - seq_out_start, seq_out_len, model_stride, options.span == 1 - ) - - if rev_comp: - seq_1hot = dna_io.hot1_rc(seq_1hot) - gene_slice = target_length - gene_slice - 1 + # save sequences and normalize gradients by total size of ensemble + for gi, gene_id in enumerate(gene_list): - # slice relevant strand targets - if genes_strand[gi] == "+": - gene_strand_mask = ( - (targets_df.strand != "-") - if not rev_comp - else (targets_df.strand != "+") - ) - else: - gene_strand_mask = ( - (targets_df.strand != "+") - if not rev_comp - else (targets_df.strand != "-") - ) - - gene_target = np.array(targets_df.index[gene_strand_mask].values) - - # accumulate data tensors - seq_1hots.append(seq_1hot[None, ...]) - gene_slices.append(gene_slice[None, ...]) - gene_targets.append(gene_target[None, ...]) - - if gi == len(gene_list) - 1 or len(seq_1hots) >= buffer_size: - - # concat sequences - seq_1hots = np.concatenate(seq_1hots, axis=0) - - # pad gene slices to same length (mark valid positions in mask tensor) - max_slice_len = int( - np.max([gene_slice.shape[1] for gene_slice in gene_slices]) - ) - - gene_masks = np.zeros( - (len(gene_slices), max_slice_len), dtype="float32" - ) - gene_slices_padded = np.zeros( - (len(gene_slices), max_slice_len), dtype="int32" - ) - for gii, gene_slice in enumerate(gene_slices): - for j in range(gene_slice.shape[1]): - gene_masks[gii, j] = 1.0 - gene_slices_padded[gii, j] = gene_slice[0, j] - - gene_slices = gene_slices_padded - - # concat gene-specific targets - gene_targets = np.concatenate(gene_targets, axis=0) - - # batch call gradient computation - grads = seqnn_model.gradients( - seq_1hots, - head_i=0, - target_slice=gene_targets, - pos_slice=gene_slices, - pos_mask=gene_masks, - chunk_size=buffer_size - if options.smooth_grad != 1 - else buffer_size // options.n_samples, - batch_size=1, - track_scale=track_scale, - track_transform=track_transform, - clip_soft=options.clip_soft, - pseudo_count=pseudo_count, - no_transform=options.no_transform == 1, - use_mean=False, - use_ratio=False, - use_logodds=False, - subtract_avg=True, - input_gate=False, - smooth_grad=options.smooth_grad == 1, - n_samples=options.n_samples, - sample_prob=options.sample_prob, - dtype="float16", - ) - - # undo augmentations and save gradients - for gii, gene_slice in enumerate(gene_slices): - grad = unaugment_grads( - grads[gii, :, :, None], - fwdrc=(not rev_comp), - shift=shift, - ) - - h5_gi = (gi // buffer_size) * buffer_size + gii - - # write to HDF5 - scores_h5["grads"][h5_gi] += grad - - # clear sequence buffer - seq_1hots = [] - gene_slices = [] - gene_targets = [] - - # collect garbage - gc.collect() - - # save sequences and normalize gradients by total size of ensemble - for gi, gene_id in enumerate(gene_list): - - # re-make original sequence - seq_1hot = make_seq_1hot( - genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len - ) + # re-make original sequence + seq_1hot = make_seq_1hot( + genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len + ) - # write to HDF5 - scores_h5["seqs"][gi] = seq_1hot - scores_h5["grads"][gi] /= float( - (len(options.shifts) * (2 if options.rc == 1 else 1)) - ) + # write to HDF5 + scores_h5["seqs"][gi] = seq_1hot + scores_h5["grads"][gi] /= float( + (len(options.shifts) * (2 if options.rc else 1)) + ) - # collect garbage - gc.collect() + # collect garbage + gc.collect() # close files genome_open.close() @@ -682,6 +696,7 @@ def _count_func( track_scale=1.0, track_transform=1.0, clip_soft=None, + untransform_old=False, use_mean=False, ): @@ -690,16 +705,31 @@ def _count_func( model(seq_1hot, training=False), target_slice, axis=-1, batch_dims=1 ) - # undo scale - preds = preds / track_scale + if untransform_old: + # undo scale + preds = preds / track_scale - # undo soft_clip - if clip_soft is not None: - preds = tf.where(preds > clip_soft, (preds - clip_soft) ** 2 + clip_soft, preds) + # undo clip_soft + if clip_soft is not None: + preds = tf.where( + preds > clip_soft, (preds - clip_soft) ** 2 + clip_soft, preds + ) + + # undo sqrt + preds = preds ** (1. / track_transform) + else: + # undo clip_soft + if clip_soft is not None: + preds = tf.where( + preds > clip_soft, (preds - clip_soft + 1) ** 2 + clip_soft - 1, preds + ) - # undo sqrt - preds = preds ** (1.0 / track_transform) + # undo sqrt + preds = -1 + (preds + 1) ** (1. / track_transform) + # scale + preds = preds / track_scale + # aggregate over tracks (average) preds = tf.reduce_mean(preds, axis=-1) @@ -735,6 +765,7 @@ def predict_counts( track_scale=1.0, track_transform=1.0, clip_soft=None, + untransform_old=False, use_mean=False, dtype="float32", ): @@ -840,6 +871,7 @@ def predict_counts( track_scale, track_transform, clip_soft, + untransform_old, use_mean, ) .numpy() diff --git a/src/scripts/borzoi_satg_gene_gpu_crispr_ism_shuffle.py b/src/scripts/borzoi_satg_gene_gpu_crispr_ism_shuffle.py index ac166df..6177ef0 100644 --- a/src/scripts/borzoi_satg_gene_gpu_crispr_ism_shuffle.py +++ b/src/scripts/borzoi_satg_gene_gpu_crispr_ism_shuffle.py @@ -35,10 +35,10 @@ import pygene import tensorflow as tf -from basenji import dna_io -from basenji import gene as bgene -from basenji import seqnn -from borzoi_sed import targets_prep_strand +from baskerville import dna_io +from baskerville import gene as bgene +from baskerville import seqnn +from baskerville.dataset import targets_prep_strand from scipy.ndimage import gaussian_filter1d @@ -48,188 +48,6 @@ Perform a windowed shuffle analysis for genes specified in a GTF file, targeting regions specified in a separate csv. ''' -# tf code for computing ISM scores on GPU -@tf.function -def _score_func(model, seq_1hot, target_slice, pos_slice, pos_mask=None, pos_slice_denom=None, pos_mask_denom=True, track_scale=1., track_transform=1., clip_soft=None, pseudo_count=0., no_transform=False, aggregate_tracks=None, use_mean=False, use_ratio=False, use_logodds=False) : - - # predict - preds = tf.gather(model(seq_1hot, training=False), target_slice, axis=-1, batch_dims=1) - - if not no_transform : - - # undo scale - preds = preds / track_scale - - # undo soft_clip - if clip_soft is not None : - preds = tf.where(preds > clip_soft, (preds - clip_soft)**2 + clip_soft, preds) - - # undo sqrt - preds = preds**(1. / track_transform) - - if aggregate_tracks is not None : - preds = tf.reduce_mean(tf.reshape(preds, (preds.shape[0], preds.shape[1], preds.shape[2] // aggregate_tracks, aggregate_tracks)), axis=-1) - - # slice specified positions - preds_slice = tf.gather(preds, pos_slice, axis=1, batch_dims=1) - if pos_mask is not None : - preds_slice = preds_slice * pos_mask - - # slice denominator positions - if use_ratio and pos_slice_denom is not None: - preds_slice_denom = tf.gather(preds, pos_slice_denom, axis=1, batch_dims=1) - if pos_mask_denom is not None : - preds_slice_denom = preds_slice_denom * pos_mask_denom - - # aggregate over positions - if not use_mean : - preds_agg = tf.reduce_sum(preds_slice, axis=1) - if use_ratio and pos_slice_denom is not None: - preds_agg_denom = tf.reduce_sum(preds_slice_denom, axis=1) - else : - if pos_mask is not None : - preds_agg = tf.reduce_sum(preds_slice, axis=1) / tf.reduce_sum(pos_mask, axis=1) - else : - preds_agg = tf.reduce_mean(preds_slice, axis=1) - - if use_ratio and pos_slice_denom is not None: - if pos_mask_denom is not None : - preds_agg_denom = tf.reduce_sum(preds_slice_denom, axis=1) / tf.reduce_sum(pos_mask_denom, axis=1) - else : - preds_agg_denom = tf.reduce_mean(preds_slice_denom, axis=1) - - # compute final statistic - if no_transform : - score_ratios = preds_agg - elif not use_ratio : - score_ratios = tf.math.log(preds_agg + pseudo_count + 1e-6) - else : - if not use_logodds : - score_ratios = tf.math.log((preds_agg + pseudo_count) / (preds_agg_denom + pseudo_count) + 1e-6) - else : - score_ratios = tf.math.log(((preds_agg + pseudo_count) / (preds_agg_denom + pseudo_count)) / (1. - ((preds_agg + pseudo_count) / (preds_agg_denom + pseudo_count))) + 1e-6) - - return score_ratios - -def get_ism_shuffle(seqnn_model, seq_1hot_wt, ism_regions, head_i=None, target_slice=None, pos_slice=None, pos_mask=None, pos_slice_denom=None, pos_mask_denom=None, track_scale=1., track_transform=1., clip_soft=None, pseudo_count=0., no_transform=False, aggregate_tracks=None, use_mean=False, use_ratio=False, use_logodds=False, bases=[0, 1, 2, 3], window_size=5, n_samples=8, mononuc_shuffle=False, dinuc_shuffle=False) : - - # choose model - if seqnn_model.ensemble is not None: - model = seqnn_model.ensemble - elif head_i is not None: - model = seqnn_model.models[head_i] - else: - model = seqnn_model.model - - # verify tensor shape(s) - seq_1hot_wt = seq_1hot_wt.astype('float32') - target_slice = np.array(target_slice).astype('int32') - pos_slice = np.array(pos_slice).astype('int32') - - # convert constants to tf tensors - track_scale = tf.constant(track_scale, dtype=tf.float32) - track_transform = tf.constant(track_transform, dtype=tf.float32) - if clip_soft is not None : - clip_soft = tf.constant(clip_soft, dtype=tf.float32) - pseudo_count = tf.constant(pseudo_count, dtype=tf.float32) - - if pos_mask is not None : - pos_mask = np.array(pos_mask).astype('float32') - - if use_ratio and pos_slice_denom is not None : - pos_slice_denom = np.array(pos_slice_denom).astype('int32') - - if pos_mask_denom is not None : - pos_mask_denom = np.array(pos_mask_denom).astype('float32') - - if len(seq_1hot_wt.shape) < 3: - seq_1hot_wt = seq_1hot_wt[None, ...] - - if len(target_slice.shape) < 2: - target_slice = target_slice[None, ...] - - if len(pos_slice.shape) < 2: - pos_slice = pos_slice[None, ...] - - if pos_mask is not None and len(pos_mask.shape) < 2: - pos_mask = pos_mask[None, ...] - - if use_ratio and pos_slice_denom is not None and len(pos_slice_denom.shape) < 2: - pos_slice_denom = pos_slice_denom[None, ...] - - if pos_mask_denom is not None and len(pos_mask_denom.shape) < 2: - pos_mask_denom = pos_mask_denom[None, ...] - - # convert to tf tensors - seq_1hot_wt_tf = tf.convert_to_tensor(seq_1hot_wt, dtype=tf.float32) - target_slice = tf.convert_to_tensor(target_slice, dtype=tf.int32) - pos_slice = tf.convert_to_tensor(pos_slice, dtype=tf.int32) - - if pos_mask is not None : - pos_mask = tf.convert_to_tensor(pos_mask, dtype=tf.float32) - - if use_ratio and pos_slice_denom is not None : - pos_slice_denom = tf.convert_to_tensor(pos_slice_denom, dtype=tf.int32) - - if pos_mask_denom is not None : - pos_mask_denom = tf.convert_to_tensor(pos_mask_denom, dtype=tf.float32) - - # allocate ism shuffle result tensor - pred_shuffle = np.zeros((seq_1hot_wt.shape[1], n_samples, target_slice.shape[1] // (aggregate_tracks if aggregate_tracks is not None else 1))) - - # get wt pred - score_wt = _score_func(model, seq_1hot_wt_tf, target_slice, pos_slice, pos_mask, pos_slice_denom, pos_mask_denom, track_scale, track_transform, clip_soft, pseudo_count, no_transform, aggregate_tracks, use_mean, use_ratio, use_logodds).numpy() - - for ism_region_i, [ism_start, ism_end] in enumerate(ism_regions) : - for j in range(ism_start, ism_end) : - j_start = j - window_size // 2 - j_end = j + window_size // 2 + 1 - - pos_index = np.arange(j_end - j_start) + j_start - - for sample_ix in range(n_samples): - seq_1hot_mut = np.copy(seq_1hot_wt) - seq_1hot_mut[0, j_start:j_end, :] = 0. - - if not mononuc_shuffle and not dinuc_shuffle: - nt_index = np.random.choice(bases, size=(j_end - j_start,)).tolist() - seq_1hot_mut[0, pos_index, nt_index] = 1. - elif mononuc_shuffle: - shuffled_pos_index = np.copy(pos_index) - np.random.shuffle(shuffled_pos_index) - - seq_1hot_mut[0, shuffled_pos_index, :] = seq_1hot_wt[0, pos_index, :] - else: # dinuc-shuffle - shuffled_pos_index = [ - [pos_index[pos_j], pos_index[pos_j + 1]] - if pos_j + 1 < pos_index.shape[0] else [pos_index[pos_j]] - for pos_j in range(0, pos_index.shape[0], 2) - ] - - shuffled_shuffle_index = np.arange(len(shuffled_pos_index), dtype="int32") - np.random.shuffle(shuffled_shuffle_index) - - shuffled_pos_index_new = [] - for pos_tuple_i in range(len(shuffled_pos_index)): - shuffled_pos_index_new.extend( - shuffled_pos_index[shuffled_shuffle_index[pos_tuple_i]] - ) - - shuffled_pos_index = np.array(shuffled_pos_index_new, dtype="int32") - seq_1hot_mut[0, shuffled_pos_index, :] = seq_1hot_wt[0, pos_index, :] - - # convert to tf tensor - seq_1hot_mut_tf = tf.convert_to_tensor(seq_1hot_mut, dtype=tf.float32) - - # get mut pred - score_mut = _score_func(model, seq_1hot_mut_tf, target_slice, pos_slice, pos_mask, pos_slice_denom, pos_mask_denom, track_scale, track_transform, clip_soft, pseudo_count, no_transform, aggregate_tracks, use_mean, use_ratio, use_logodds).numpy() - - pred_shuffle[j, sample_ix, :] = score_wt - score_mut - - pred_ism = np.tile(np.mean(pred_shuffle, axis=1, keepdims=True), (1, 4, 1)) * seq_1hot_wt[0, ..., None] - - return pred_ism - ################################################################################ # main @@ -252,8 +70,8 @@ def main(): parser.add_option( "--rc", dest="rc", - default=0, - type="int", + default=False, + action="store_true", help="Ensemble forward and reverse complement predictions [Default: %default]", ) parser.add_option( @@ -261,7 +79,21 @@ def main(): dest="folds", default="0", type="str", - help="Model folds to use in ensemble [Default: %default]", + help="Model folds to use in ensemble (comma-separated list) [Default: %default]", + ) + parser.add_option( + '-c', + dest='crosses', + default=1, + type='int', + help='Number of cross-fold rounds [Default:%default]', + ) + parser.add_option( + "--head", + dest="head_i", + default=0, + type="int", + help="Model head index [Default: %default]", ) parser.add_option( "--shifts", @@ -273,8 +105,8 @@ def main(): parser.add_option( "--span", dest="span", - default=0, - type="int", + default=False, + action="store_true", help="Aggregate entire gene span [Default: %default]", ) parser.add_option( @@ -285,10 +117,31 @@ def main(): help="Model clip_soft setting [Default: %default]", ) parser.add_option( - "--no_transform", - dest="no_transform", - default=0, - type="int", + "--track_scale", + dest="track_scale", + default=0.02, + type="float", + help="Target transform scale [Default: %default]", + ) + parser.add_option( + "--track_transform", + dest="track_transform", + default=0.75, + type="float", + help="Target transform exponent [Default: %default]", + ) + parser.add_option( + "--untransform_old", + dest="untransform_old", + default=False, + action="store_true", + help="Run gradients with old version of inverse transforms [Default: %default]", + ) + parser.add_option( + "--no_untransform", + dest="no_untransform", + default=False, + action="store_true", help="Run gradients with no inverse transforms [Default: %default]", ) parser.add_option( @@ -343,15 +196,15 @@ def main(): parser.add_option( '--mononuc_shuffle', dest='mononuc_shuffle', - default=0, - type='int', + default=False, + action="store_true", help='Mono-nucleotide shuffle [Default: %default]', ) parser.add_option( '--dinuc_shuffle', dest='dinuc_shuffle', - default=0, - type='int', + default=False, + action="store_true", help='Di-nucleotide shuffle [Default: %default]', ) (options, args) = parser.parse_args() @@ -398,7 +251,10 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore(model_folder + "/f0c0/model0_best.h5", 0, by_name=False) + seqnn_model.restore( + model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + options.head_i + ) seqnn_model.build_slice(targets_df.index, False) # seqnn_model.build_ensemble(options.rc, options.shifts) @@ -498,128 +354,131 @@ def main(): # loop over folds for fold_ix in options.folds : - print("-- Fold = " + str(fold_ix) + " --") - - # (re-)initialize HDF5 - scores_h5_file = '%s/ism_f%dc0.h5' % (options.out_dir, fold_ix) - if os.path.isfile(scores_h5_file): - os.remove(scores_h5_file) - scores_h5 = h5py.File(scores_h5_file, 'w') - scores_h5.create_dataset('seqs', dtype='bool', - shape=(num_genes, seq_len, 4)) - scores_h5.create_dataset('isms', dtype='float16', - shape=(num_genes, seq_len, 4, num_targets // (options.aggregate_tracks if options.aggregate_tracks is not None else 1))) - scores_h5.create_dataset('gene', data=np.array(gene_list, dtype='S')) - scores_h5.create_dataset('chr', data=np.array(genes_chr, dtype='S')) - scores_h5.create_dataset('start', data=np.array(genes_start)) - scores_h5.create_dataset('end', data=np.array(genes_end)) - scores_h5.create_dataset('strand', data=np.array(genes_strand, dtype='S')) - - # load model fold - seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore(model_folder + "/f" + str(fold_ix) + "c0/model0_best.h5", 0, by_name=False) - seqnn_model.build_slice(targets_df.index, False) - - track_scale = targets_df.iloc[0]['scale'] - track_transform = 3. / 4. - - for shift in options.shifts : - print('Processing shift %d' % shift, flush=True) - - for rev_comp in ([False, True] if options.rc == 1 else [False]) : - - if options.rc == 1 : - print('Fwd/rev = %s' % ('fwd' if not rev_comp else 'rev'), flush=True) - - for gi, gene_id in enumerate(gene_list): - - if gi % 5 == 0 : - print('Processing %d, %s' % (gi, gene_id), flush=True) - - gene = transcriptome.genes[gene_id] - - # make sequence - seq_1hot = make_seq_1hot(genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len) - seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) - - # determine output sequence start - seq_out_start = genes_start[gi] + model_stride*model_crop - seq_out_len = model_stride*target_length - - # determine output positions - gene_slice = gene.output_slice(seq_out_start, seq_out_len, model_stride, options.span == 1) - - # get ism window regions - gene_ism_regions = genes_ism_regions[gi] - - if rev_comp: - seq_1hot = dna_io.hot1_rc(seq_1hot) - gene_slice = target_length - gene_slice - 1 - - gene_ism_regions = [] - for [genes_ism_start_orig, gene_ism_end_orig] in genes_ism_regions[gi] : - gene_ism_start = seq_len - gene_ism_end_orig - 1 - gene_ism_end = seq_len - genes_ism_start_orig - 1 - - gene_ism_regions.append([gene_ism_start, gene_ism_end]) - - # slice relevant strand targets - if genes_strand[gi] == '+': - gene_strand_mask = (targets_df.strand != '-') if not rev_comp else (targets_df.strand != '+') - else: - gene_strand_mask = (targets_df.strand != '+') if not rev_comp else (targets_df.strand != '-') - - gene_target = np.array(targets_df.index[gene_strand_mask].values) - - # broadcast to singleton batch - seq_1hot = seq_1hot[None, ...] - gene_slice = gene_slice[None, ...] - gene_target = gene_target[None, ...] - - # ism computation - ism = get_ism_shuffle( - seqnn_model, - seq_1hot, - gene_ism_regions, - head_i=0, - target_slice=gene_target, - pos_slice=gene_slice, - track_scale=track_scale, - track_transform=track_transform, - clip_soft=options.clip_soft, - pseudo_count=pseudo_count, - no_transform=options.no_transform == 1, - aggregate_tracks=options.aggregate_tracks, - use_mean=False, - use_ratio=False, - use_logodds=False, - window_size=options.window_size, - n_samples=options.n_samples, - mononuc_shuffle=options.mononuc_shuffle == 1, - dinuc_shuffle=options.dinuc_shuffle == 1, - ) - - # undo augmentations and save ism - ism = unaugment_grads(ism, fwdrc=(not rev_comp), shift=shift) - - # write to HDF5 - scores_h5['isms'][gi] += ism[:, ...] - - # collect garbage - gc.collect() - - # save sequences and normalize isms by total size of ensemble - for gi, gene_id in enumerate(gene_list): - - # re-make original sequence - seq_1hot = make_seq_1hot(genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len) + for cross_ix in options.crosses: - # write to HDF5 - scores_h5['seqs'][gi] = seq_1hot[:, ...] - scores_h5['isms'][gi] /= float((len(options.shifts) * (2 if options.rc == 1 else 1))) - - # collect garbage - gc.collect() + print("-- fold = f" + str(fold_ix) + "c" + str(cross_ix) + " --") + + # (re-)initialize HDF5 + scores_h5_file = '%s/ism_f%dc%d.h5' % (options.out_dir, fold_ix, cross_ix) + if os.path.isfile(scores_h5_file): + os.remove(scores_h5_file) + scores_h5 = h5py.File(scores_h5_file, 'w') + scores_h5.create_dataset('seqs', dtype='bool', + shape=(num_genes, seq_len, 4)) + scores_h5.create_dataset('isms', dtype='float16', + shape=(num_genes, seq_len, 4, num_targets // (options.aggregate_tracks if options.aggregate_tracks is not None else 1))) + scores_h5.create_dataset('gene', data=np.array(gene_list, dtype='S')) + scores_h5.create_dataset('chr', data=np.array(genes_chr, dtype='S')) + scores_h5.create_dataset('start', data=np.array(genes_start)) + scores_h5.create_dataset('end', data=np.array(genes_end)) + scores_h5.create_dataset('strand', data=np.array(genes_strand, dtype='S')) + + # load model fold + seqnn_model = seqnn.SeqNN(params_model) + seqnn_model.restore( + model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + options.head_i + ) + seqnn_model.build_slice(targets_df.index, False) + + for shift in options.shifts : + print('Processing shift %d' % shift, flush=True) + + for rev_comp in ([False, True] if options.rc else [False]) : + + if options.rc : + print('Fwd/rev = %s' % ('fwd' if not rev_comp else 'rev'), flush=True) + + for gi, gene_id in enumerate(gene_list): + + if gi % 5 == 0 : + print('Processing %d, %s' % (gi, gene_id), flush=True) + + gene = transcriptome.genes[gene_id] + + # make sequence + seq_1hot = make_seq_1hot(genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len) + seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) + + # determine output sequence start + seq_out_start = genes_start[gi] + model_stride*model_crop + seq_out_len = model_stride*target_length + + # determine output positions + gene_slice = gene.output_slice(seq_out_start, seq_out_len, model_stride, options.span) + + # get ism window regions + gene_ism_regions = genes_ism_regions[gi] + + if rev_comp: + seq_1hot = dna_io.hot1_rc(seq_1hot) + gene_slice = target_length - gene_slice - 1 + + gene_ism_regions = [] + for [genes_ism_start_orig, gene_ism_end_orig] in genes_ism_regions[gi] : + gene_ism_start = seq_len - gene_ism_end_orig - 1 + gene_ism_end = seq_len - genes_ism_start_orig - 1 + + gene_ism_regions.append([gene_ism_start, gene_ism_end]) + + # slice relevant strand targets + if genes_strand[gi] == '+': + gene_strand_mask = (targets_df.strand != '-') if not rev_comp else (targets_df.strand != '+') + else: + gene_strand_mask = (targets_df.strand != '+') if not rev_comp else (targets_df.strand != '-') + + gene_target = np.array(targets_df.index[gene_strand_mask].values) + + # broadcast to singleton batch + seq_1hot = seq_1hot[None, ...] + gene_slice = gene_slice[None, ...] + gene_target = gene_target[None, ...] + + # ism computation + ism = get_ism_shuffle( + seqnn_model, + seq_1hot, + gene_ism_regions, + head_i=0, + target_slice=gene_target, + pos_slice=gene_slice, + track_scale=options.track_scale, + track_transform=options.track_transform, + clip_soft=options.clip_soft, + pseudo_count=pseudo_count, + untransform_old=options.untransform_old, + no_untransform=options.no_untransform, + aggregate_tracks=options.aggregate_tracks, + use_mean=False, + use_ratio=False, + use_logodds=False, + window_size=options.window_size, + n_samples=options.n_samples, + mononuc_shuffle=options.mononuc_shuffle, + dinuc_shuffle=options.dinuc_shuffle, + ) + + # undo augmentations and save ism + ism = unaugment_grads(ism, fwdrc=(not rev_comp), shift=shift) + + # write to HDF5 + scores_h5['isms'][gi] += ism[:, ...] + + # collect garbage + gc.collect() + + # save sequences and normalize isms by total size of ensemble + for gi, gene_id in enumerate(gene_list): + + # re-make original sequence + seq_1hot = make_seq_1hot(genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len) + + # write to HDF5 + scores_h5['seqs'][gi] = seq_1hot[:, ...] + scores_h5['isms'][gi] /= float((len(options.shifts) * (2 if options.rc else 1))) + + # collect garbage + gc.collect() # close files genome_open.close() @@ -670,6 +529,205 @@ def make_seq_1hot(genome_open, chrm, start, end, seq_len): seq_1hot = dna_io.dna_1hot(seq_dna) return seq_1hot + +# tf code for computing ISM scores on GPU +@tf.function +def _score_func(model, seq_1hot, target_slice, pos_slice, pos_mask=None, pos_slice_denom=None, pos_mask_denom=True, track_scale=1., track_transform=1., clip_soft=None, pseudo_count=0., untransform_old=False, no_untransform=False, aggregate_tracks=None, use_mean=False, use_ratio=False, use_logodds=False) : + + # predict + preds = tf.gather(model(seq_1hot, training=False), target_slice, axis=-1, batch_dims=1) + + if not no_untransform: + if untransform_old: + # undo scale + preds = preds / track_scale + + # undo soft_clip + if clip_soft is not None: + preds = tf.where( + preds > clip_soft, (preds - clip_soft) ** 2 + clip_soft, preds + ) + + # undo sqrt + preds = preds ** (1. / track_transform) + else: + # undo clip_soft + if clip_soft is not None: + preds = tf.where( + preds > clip_soft, (preds - clip_soft + 1) ** 2 + clip_soft - 1, preds + ) + + # undo sqrt + preds = -1 + (preds + 1) ** (1. / track_transform) + + # scale + preds = preds / track_scale + + if aggregate_tracks is not None : + preds = tf.reduce_mean(tf.reshape(preds, (preds.shape[0], preds.shape[1], preds.shape[2] // aggregate_tracks, aggregate_tracks)), axis=-1) + + # slice specified positions + preds_slice = tf.gather(preds, pos_slice, axis=1, batch_dims=1) + if pos_mask is not None : + preds_slice = preds_slice * pos_mask + + # slice denominator positions + if use_ratio and pos_slice_denom is not None: + preds_slice_denom = tf.gather(preds, pos_slice_denom, axis=1, batch_dims=1) + if pos_mask_denom is not None : + preds_slice_denom = preds_slice_denom * pos_mask_denom + + # aggregate over positions + if not use_mean : + preds_agg = tf.reduce_sum(preds_slice, axis=1) + if use_ratio and pos_slice_denom is not None: + preds_agg_denom = tf.reduce_sum(preds_slice_denom, axis=1) + else : + if pos_mask is not None : + preds_agg = tf.reduce_sum(preds_slice, axis=1) / tf.reduce_sum(pos_mask, axis=1) + else : + preds_agg = tf.reduce_mean(preds_slice, axis=1) + + if use_ratio and pos_slice_denom is not None: + if pos_mask_denom is not None : + preds_agg_denom = tf.reduce_sum(preds_slice_denom, axis=1) / tf.reduce_sum(pos_mask_denom, axis=1) + else : + preds_agg_denom = tf.reduce_mean(preds_slice_denom, axis=1) + + # compute final statistic + if no_untransform : + score_ratios = preds_agg + elif not use_ratio : + score_ratios = tf.math.log(preds_agg + pseudo_count + 1e-6) + else : + if not use_logodds : + score_ratios = tf.math.log((preds_agg + pseudo_count) / (preds_agg_denom + pseudo_count) + 1e-6) + else : + score_ratios = tf.math.log(((preds_agg + pseudo_count) / (preds_agg_denom + pseudo_count)) / (1. - ((preds_agg + pseudo_count) / (preds_agg_denom + pseudo_count))) + 1e-6) + + return score_ratios + + +def get_ism_shuffle(seqnn_model, seq_1hot_wt, ism_regions, head_i=None, target_slice=None, pos_slice=None, pos_mask=None, pos_slice_denom=None, pos_mask_denom=None, track_scale=1., track_transform=1., clip_soft=None, pseudo_count=0., untransform_old=False, no_untransform=False, aggregate_tracks=None, use_mean=False, use_ratio=False, use_logodds=False, bases=[0, 1, 2, 3], window_size=5, n_samples=8, mononuc_shuffle=False, dinuc_shuffle=False) : + + # choose model + if seqnn_model.ensemble is not None: + model = seqnn_model.ensemble + elif head_i is not None: + model = seqnn_model.models[head_i] + else: + model = seqnn_model.model + + # verify tensor shape(s) + seq_1hot_wt = seq_1hot_wt.astype('float32') + target_slice = np.array(target_slice).astype('int32') + pos_slice = np.array(pos_slice).astype('int32') + + # convert constants to tf tensors + track_scale = tf.constant(track_scale, dtype=tf.float32) + track_transform = tf.constant(track_transform, dtype=tf.float32) + if clip_soft is not None : + clip_soft = tf.constant(clip_soft, dtype=tf.float32) + pseudo_count = tf.constant(pseudo_count, dtype=tf.float32) + + if pos_mask is not None : + pos_mask = np.array(pos_mask).astype('float32') + + if use_ratio and pos_slice_denom is not None : + pos_slice_denom = np.array(pos_slice_denom).astype('int32') + + if pos_mask_denom is not None : + pos_mask_denom = np.array(pos_mask_denom).astype('float32') + + if len(seq_1hot_wt.shape) < 3: + seq_1hot_wt = seq_1hot_wt[None, ...] + + if len(target_slice.shape) < 2: + target_slice = target_slice[None, ...] + + if len(pos_slice.shape) < 2: + pos_slice = pos_slice[None, ...] + + if pos_mask is not None and len(pos_mask.shape) < 2: + pos_mask = pos_mask[None, ...] + + if use_ratio and pos_slice_denom is not None and len(pos_slice_denom.shape) < 2: + pos_slice_denom = pos_slice_denom[None, ...] + + if pos_mask_denom is not None and len(pos_mask_denom.shape) < 2: + pos_mask_denom = pos_mask_denom[None, ...] + + # convert to tf tensors + seq_1hot_wt_tf = tf.convert_to_tensor(seq_1hot_wt, dtype=tf.float32) + target_slice = tf.convert_to_tensor(target_slice, dtype=tf.int32) + pos_slice = tf.convert_to_tensor(pos_slice, dtype=tf.int32) + + if pos_mask is not None : + pos_mask = tf.convert_to_tensor(pos_mask, dtype=tf.float32) + + if use_ratio and pos_slice_denom is not None : + pos_slice_denom = tf.convert_to_tensor(pos_slice_denom, dtype=tf.int32) + + if pos_mask_denom is not None : + pos_mask_denom = tf.convert_to_tensor(pos_mask_denom, dtype=tf.float32) + + # allocate ism shuffle result tensor + pred_shuffle = np.zeros((seq_1hot_wt.shape[1], n_samples, target_slice.shape[1] // (aggregate_tracks if aggregate_tracks is not None else 1))) + + # get wt pred + score_wt = _score_func(model, seq_1hot_wt_tf, target_slice, pos_slice, pos_mask, pos_slice_denom, pos_mask_denom, track_scale, track_transform, clip_soft, pseudo_count, untransform_old, no_untransform, aggregate_tracks, use_mean, use_ratio, use_logodds).numpy() + + for ism_region_i, [ism_start, ism_end] in enumerate(ism_regions) : + for j in range(ism_start, ism_end) : + j_start = j - window_size // 2 + j_end = j + window_size // 2 + 1 + + pos_index = np.arange(j_end - j_start) + j_start + + for sample_ix in range(n_samples): + seq_1hot_mut = np.copy(seq_1hot_wt) + seq_1hot_mut[0, j_start:j_end, :] = 0. + + if not mononuc_shuffle and not dinuc_shuffle: + nt_index = np.random.choice(bases, size=(j_end - j_start,)).tolist() + seq_1hot_mut[0, pos_index, nt_index] = 1. + elif mononuc_shuffle: + shuffled_pos_index = np.copy(pos_index) + np.random.shuffle(shuffled_pos_index) + + seq_1hot_mut[0, shuffled_pos_index, :] = seq_1hot_wt[0, pos_index, :] + else: # dinuc-shuffle + shuffled_pos_index = [ + [pos_index[pos_j], pos_index[pos_j + 1]] + if pos_j + 1 < pos_index.shape[0] else [pos_index[pos_j]] + for pos_j in range(0, pos_index.shape[0], 2) + ] + + shuffled_shuffle_index = np.arange(len(shuffled_pos_index), dtype="int32") + np.random.shuffle(shuffled_shuffle_index) + + shuffled_pos_index_new = [] + for pos_tuple_i in range(len(shuffled_pos_index)): + shuffled_pos_index_new.extend( + shuffled_pos_index[shuffled_shuffle_index[pos_tuple_i]] + ) + + shuffled_pos_index = np.array(shuffled_pos_index_new, dtype="int32") + seq_1hot_mut[0, shuffled_pos_index, :] = seq_1hot_wt[0, pos_index, :] + + # convert to tf tensor + seq_1hot_mut_tf = tf.convert_to_tensor(seq_1hot_mut, dtype=tf.float32) + + # get mut pred + score_mut = _score_func(model, seq_1hot_mut_tf, target_slice, pos_slice, pos_mask, pos_slice_denom, pos_mask_denom, track_scale, track_transform, clip_soft, pseudo_count, untransform_old, no_untransform, aggregate_tracks, use_mean, use_ratio, use_logodds).numpy() + + pred_shuffle[j, sample_ix, :] = score_wt - score_mut + + pred_ism = np.tile(np.mean(pred_shuffle, axis=1, keepdims=True), (1, 4, 1)) * seq_1hot_wt[0, ..., None] + + return pred_ism + + ################################################################################ # __main__ # ############################################################################### diff --git a/src/scripts/borzoi_satg_gene_gpu_focused_ism.py b/src/scripts/borzoi_satg_gene_gpu_focused_ism.py index 3e7e46a..0f6cd08 100755 --- a/src/scripts/borzoi_satg_gene_gpu_focused_ism.py +++ b/src/scripts/borzoi_satg_gene_gpu_focused_ism.py @@ -58,8 +58,8 @@ def main(): parser.add_option( "--rc", dest="rc", - default=0, - type="int", + default=False, + action="store_true", help="Ensemble forward and reverse complement predictions [Default: %default]", ) parser.add_option( @@ -67,7 +67,21 @@ def main(): dest="folds", default="0", type="str", - help="Model folds to use in ensemble [Default: %default]", + help="Model folds to use in ensemble (comma-separated list) [Default: %default]", + ) + parser.add_option( + '-c', + dest='crosses', + default=1, + type='int', + help='Number of cross-fold rounds [Default:%default]', + ) + parser.add_option( + "--head", + dest="head_i", + default=0, + type="int", + help="Model head index [Default: %default]", ) parser.add_option( "--shifts", @@ -79,8 +93,8 @@ def main(): parser.add_option( "--span", dest="span", - default=0, - type="int", + default=False, + action="store_true", help="Aggregate entire gene span [Default: %default]", ) parser.add_option( @@ -91,10 +105,31 @@ def main(): help="Model clip_soft setting [Default: %default]", ) parser.add_option( - "--no_transform", - dest="no_transform", - default=0, - type="int", + "--track_scale", + dest="track_scale", + default=0.02, + type="float", + help="Target transform scale [Default: %default]", + ) + parser.add_option( + "--track_transform", + dest="track_transform", + default=0.75, + type="float", + help="Target transform exponent [Default: %default]", + ) + parser.add_option( + "--untransform_old", + dest="untransform_old", + default=False, + action="store_true", + help="Run gradients with old version of inverse transforms [Default: %default]", + ) + parser.add_option( + "--no_untransform", + dest="no_untransform", + default=False, + action="store_true", help="Run gradients with no inverse transforms [Default: %default]", ) parser.add_option( @@ -109,7 +144,7 @@ def main(): dest="aggregate_tracks", default=None, type="int", - help="Run gradients with no inverse transforms [Default: %default]", + help="Aggregate groups of tracks [Default: %default]", ) parser.add_option( "-t", @@ -231,7 +266,10 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore(model_folder + "/f0c0/model0_best.h5", 0, by_name=False) + seqnn_model.restore( + model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + options.head_i + ) seqnn_model.build_slice(targets_df.index, False) # seqnn_model.build_ensemble(options.rc, options.shifts) @@ -438,170 +476,171 @@ def main(): # loop over folds for fold_ix in options.folds: - print("-- Fold = " + str(fold_ix) + " --") - - # (re-)initialize HDF5 - scores_h5_file = "%s/ism_f%dc0.h5" % (options.out_dir, fold_ix) - if os.path.isfile(scores_h5_file): - os.remove(scores_h5_file) - scores_h5 = h5py.File(scores_h5_file, "w") - scores_h5.create_dataset( - "seqs", dtype="bool", shape=(num_genes, options.ism_size, 4) - ) - scores_h5.create_dataset( - "isms", - dtype="float16", - shape=( - num_genes, - options.ism_size, - 4, - num_targets - // ( - options.aggregate_tracks - if options.aggregate_tracks is not None - else 1 + for cross_ix in options.crosses: + + print("-- fold = f" + str(fold_ix) + "c" + str(cross_ix) + " --") + + # (re-)initialize HDF5 + scores_h5_file = "%s/ism_f%dc%d.h5" % (options.out_dir, fold_ix, cross_ix) + if os.path.isfile(scores_h5_file): + os.remove(scores_h5_file) + scores_h5 = h5py.File(scores_h5_file, "w") + scores_h5.create_dataset( + "seqs", dtype="bool", shape=(num_genes, options.ism_size, 4) + ) + scores_h5.create_dataset( + "isms", + dtype="float16", + shape=( + num_genes, + options.ism_size, + 4, + num_targets + // ( + options.aggregate_tracks + if options.aggregate_tracks is not None + else 1 + ), ), - ), - ) - scores_h5.create_dataset("gene", data=np.array(gene_list, dtype="S")) - scores_h5.create_dataset("chr", data=np.array(genes_chr, dtype="S")) - scores_h5.create_dataset("start", data=np.array(genes_start)) - scores_h5.create_dataset("end", data=np.array(genes_end)) - scores_h5.create_dataset("ism_start", data=np.array(genes_ism_start)) - scores_h5.create_dataset("ism_end", data=np.array(genes_ism_end)) - scores_h5.create_dataset("strand", data=np.array(genes_strand, dtype="S")) - - # load model fold - seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c0/model0_best.h5", 0, by_name=False - ) - seqnn_model.build_slice(targets_df.index, False) + ) + scores_h5.create_dataset("gene", data=np.array(gene_list, dtype="S")) + scores_h5.create_dataset("chr", data=np.array(genes_chr, dtype="S")) + scores_h5.create_dataset("start", data=np.array(genes_start)) + scores_h5.create_dataset("end", data=np.array(genes_end)) + scores_h5.create_dataset("ism_start", data=np.array(genes_ism_start)) + scores_h5.create_dataset("ism_end", data=np.array(genes_ism_end)) + scores_h5.create_dataset("strand", data=np.array(genes_strand, dtype="S")) + + # load model fold + seqnn_model = seqnn.SeqNN(params_model) + seqnn_model.restore( + model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + options.head_i + ) + seqnn_model.build_slice(targets_df.index, False) - track_scale = targets_df.iloc[0]["scale"] - track_transform = 3.0 / 4.0 + for shift in options.shifts: + print("Processing shift %d" % shift, flush=True) - for shift in options.shifts: - print("Processing shift %d" % shift, flush=True) + for rev_comp in [False, True] if options.rc else [False]: - for rev_comp in [False, True] if options.rc == 1 else [False]: + if options.rc: + print( + "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), flush=True + ) - if options.rc == 1: - print( - "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), flush=True - ) + seq_1hots = [] + gene_slices = [] + gene_targets = [] - seq_1hots = [] - gene_slices = [] - gene_targets = [] + for gi, gene_id in enumerate(gene_list): - for gi, gene_id in enumerate(gene_list): + if gi % 50 == 0: + print("Processing %d, %s" % (gi, gene_id), flush=True) - if gi % 50 == 0: - print("Processing %d, %s" % (gi, gene_id), flush=True) + gene = transcriptome.genes[gene_id] - gene = transcriptome.genes[gene_id] + # make sequence + seq_1hot = make_seq_1hot( + genome_open, + genes_chr[gi], + genes_start[gi], + genes_end[gi], + seq_len, + ) + seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) - # make sequence - seq_1hot = make_seq_1hot( - genome_open, - genes_chr[gi], - genes_start[gi], - genes_end[gi], - seq_len, - ) - seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) + # determine output sequence start + seq_out_start = genes_start[gi] + model_stride * model_crop + seq_out_len = model_stride * target_length - # determine output sequence start - seq_out_start = genes_start[gi] + model_stride * model_crop - seq_out_len = model_stride * target_length + # determine output positions + gene_slice = gene.output_slice( + seq_out_start, seq_out_len, model_stride, options.span + ) - # determine output positions - gene_slice = gene.output_slice( - seq_out_start, seq_out_len, model_stride, options.span == 1 - ) + # determine ism window + gene_ism_start = genes_ism_start[gi] + gene_ism_end = genes_ism_end[gi] + + if rev_comp: + seq_1hot = dna_io.hot1_rc(seq_1hot) + gene_slice = target_length - gene_slice - 1 + + gene_ism_start = seq_len - genes_ism_end[gi] - 1 + gene_ism_end = seq_len - genes_ism_start[gi] - 1 + + # slice relevant strand targets + if genes_strand[gi] == "+": + gene_strand_mask = ( + (targets_df.strand != "-") + if not rev_comp + else (targets_df.strand != "+") + ) + else: + gene_strand_mask = ( + (targets_df.strand != "+") + if not rev_comp + else (targets_df.strand != "-") + ) + + gene_target = np.array(targets_df.index[gene_strand_mask].values) + + # broadcast to singleton batch + seq_1hot = seq_1hot[None, ...] + gene_slice = gene_slice[None, ...] + gene_target = gene_target[None, ...] + + # ism computation + ism = get_ism( + seqnn_model, + seq_1hot, + gene_ism_start, + gene_ism_end, + head_i=0, + target_slice=gene_target, + pos_slice=gene_slice, + track_scale=options.track_scale, + track_transform=options.track_transform, + clip_soft=options.clip_soft, + pseudo_count=pseudo_count, + untransform_old=options.untransform_old, + no_untransform=options.no_untransform, + aggregate_tracks=options.aggregate_tracks, + use_mean=False, + use_ratio=False, + use_logodds=False, + ) - # determine ism window - gene_ism_start = genes_ism_start[gi] - gene_ism_end = genes_ism_end[gi] + # undo augmentations and save ism + ism = unaugment_grads(ism, fwdrc=(not rev_comp), shift=shift) - if rev_comp: - seq_1hot = dna_io.hot1_rc(seq_1hot) - gene_slice = target_length - gene_slice - 1 + # write to HDF5 + scores_h5["isms"][gi] += ism[ + genes_ism_start[gi] : genes_ism_end[gi], ... + ] - gene_ism_start = seq_len - genes_ism_end[gi] - 1 - gene_ism_end = seq_len - genes_ism_start[gi] - 1 + # collect garbage + gc.collect() - # slice relevant strand targets - if genes_strand[gi] == "+": - gene_strand_mask = ( - (targets_df.strand != "-") - if not rev_comp - else (targets_df.strand != "+") - ) - else: - gene_strand_mask = ( - (targets_df.strand != "+") - if not rev_comp - else (targets_df.strand != "-") - ) + # save sequences and normalize isms by total size of ensemble + for gi, gene_id in enumerate(gene_list): - gene_target = np.array(targets_df.index[gene_strand_mask].values) - - # broadcast to singleton batch - seq_1hot = seq_1hot[None, ...] - gene_slice = gene_slice[None, ...] - gene_target = gene_target[None, ...] - - # ism computation - ism = get_ism( - seqnn_model, - seq_1hot, - gene_ism_start, - gene_ism_end, - head_i=0, - target_slice=gene_target, - pos_slice=gene_slice, - track_scale=track_scale, - track_transform=track_transform, - clip_soft=options.clip_soft, - pseudo_count=pseudo_count, - no_transform=options.no_transform == 1, - aggregate_tracks=options.aggregate_tracks, - use_mean=False, - use_ratio=False, - use_logodds=False, - ) - - # undo augmentations and save ism - ism = unaugment_grads(ism, fwdrc=(not rev_comp), shift=shift) - - # write to HDF5 - scores_h5["isms"][gi] += ism[ - genes_ism_start[gi] : genes_ism_end[gi], ... - ] - - # collect garbage - gc.collect() - - # save sequences and normalize isms by total size of ensemble - for gi, gene_id in enumerate(gene_list): - - # re-make original sequence - seq_1hot = make_seq_1hot( - genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len - ) + # re-make original sequence + seq_1hot = make_seq_1hot( + genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len + ) - # write to HDF5 - scores_h5["seqs"][gi] = seq_1hot[ - genes_ism_start[gi] : genes_ism_end[gi], ... - ] - scores_h5["isms"][gi] /= float( - (len(options.shifts) * (2 if options.rc == 1 else 1)) - ) + # write to HDF5 + scores_h5["seqs"][gi] = seq_1hot[ + genes_ism_start[gi] : genes_ism_end[gi], ... + ] + scores_h5["isms"][gi] /= float( + (len(options.shifts) * (2 if options.rc else 1)) + ) - # collect garbage - gc.collect() + # collect garbage + gc.collect() # close files genome_open.close() @@ -667,7 +706,8 @@ def _score_func( track_transform=1.0, clip_soft=None, pseudo_count=0.0, - no_transform=False, + untransform_old=False, + no_untransform=False, aggregate_tracks=None, use_mean=False, use_ratio=False, @@ -679,19 +719,31 @@ def _score_func( model(seq_1hot, training=False), target_slice, axis=-1, batch_dims=1 ) - if not no_transform: + if not no_untransform: + if untransform_old: + # undo scale + preds = preds / track_scale - # undo scale - preds = preds / track_scale + # undo soft_clip + if clip_soft is not None: + preds = tf.where( + preds > clip_soft, (preds - clip_soft) ** 2 + clip_soft, preds + ) - # undo soft_clip - if clip_soft is not None: - preds = tf.where( - preds > clip_soft, (preds - clip_soft) ** 2 + clip_soft, preds - ) + # undo sqrt + preds = preds ** (1. / track_transform) + else: + # undo clip_soft + if clip_soft is not None: + preds = tf.where( + preds > clip_soft, (preds - clip_soft + 1) ** 2 + clip_soft - 1, preds + ) + + # undo sqrt + preds = -1 + (preds + 1) ** (1. / track_transform) - # undo sqrt - preds = preds ** (1.0 / track_transform) + # scale + preds = preds / track_scale if aggregate_tracks is not None: preds = tf.reduce_mean( @@ -740,7 +792,7 @@ def _score_func( preds_agg_denom = tf.reduce_mean(preds_slice_denom, axis=1) # compute final statistic - if no_transform: + if no_untransform: score_ratios = preds_agg elif not use_ratio: score_ratios = tf.math.log(preds_agg + pseudo_count + 1e-6) @@ -777,7 +829,8 @@ def get_ism( track_transform=1.0, clip_soft=None, pseudo_count=0.0, - no_transform=False, + untransform_old=False, + no_untransform=False, aggregate_tracks=None, use_mean=False, use_ratio=False, @@ -849,7 +902,7 @@ def get_ism( # allocate ism result tensor pred_ism = np.zeros( ( - 524288, + seq_1hot_wt.shape[1], 4, target_slice.shape[1] // (aggregate_tracks if aggregate_tracks is not None else 1), @@ -869,7 +922,8 @@ def get_ism( track_transform, clip_soft, pseudo_count, - no_transform, + untransform_old, + no_untransform, aggregate_tracks, use_mean, use_ratio, @@ -899,7 +953,8 @@ def get_ism( track_transform, clip_soft, pseudo_count, - no_transform, + untransform_old, + no_untransform, aggregate_tracks, use_mean, use_ratio, diff --git a/src/scripts/borzoi_satg_polya_gpu.py b/src/scripts/borzoi_satg_polya_gpu.py index 08f0287..2b4b8e0 100755 --- a/src/scripts/borzoi_satg_polya_gpu.py +++ b/src/scripts/borzoi_satg_polya_gpu.py @@ -56,8 +56,8 @@ def main(): parser.add_option( "--rc", dest="rc", - default=0, - type="int", + default=False, + action="store_true", help="Ensemble forward and reverse complement predictions [Default: %default]", ) parser.add_option( @@ -65,7 +65,21 @@ def main(): dest="folds", default="0", type="str", - help="Model folds to use in ensemble [Default: %default]", + help="Model folds to use in ensemble (comma-separated list) [Default: %default]", + ) + parser.add_option( + '-c', + dest='crosses', + default=1, + type='int', + help='Number of cross-fold rounds [Default:%default]', + ) + parser.add_option( + "--head", + dest="head_i", + default=0, + type="int", + help="Model head index [Default: %default]", ) parser.add_option( "--shifts", @@ -77,37 +91,37 @@ def main(): parser.add_option( "--span", dest="span", - default=0, - type="int", + default=False, + action="store_true", help="Aggregate entire gene span [Default: %default]", ) parser.add_option( - "--smoothgrad", - dest="smooth_grad", - default=0, - type="int", - help="Run smoothgrad [Default: %default]", + "--clip_soft", + dest="clip_soft", + default=None, + type="float", + help="Model clip_soft setting [Default: %default]", ) parser.add_option( - "--samples", - dest="n_samples", - default=5, - type="int", - help="Number of smoothgrad samples [Default: %default]", + "--track_scale", + dest="track_scale", + default=0.02, + type="float", + help="Target transform scale [Default: %default]", ) parser.add_option( - "--sampleprob", - dest="sample_prob", - default=0.875, + "--track_transform", + dest="track_transform", + default=0.75, type="float", - help="Probability of not mutating a position in smoothgrad [Default: %default]", + help="Target transform exponent [Default: %default]", ) parser.add_option( - "--clip_soft", - dest="clip_soft", - default=None, - type="float", - help="Model clip_soft setting [Default: %default]", + "--untransform_old", + dest="untransform_old", + default=False, + action="store_true", + help="Run gradients with old version of inverse transforms [Default: %default]", ) parser.add_option( "-t", @@ -165,7 +179,10 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore(model_folder + "/f0c0/model0_best.h5", 0, by_name=False) + seqnn_model.restore( + model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + options.head_i + ) seqnn_model.build_slice(targets_df.index, False) # seqnn_model.build_ensemble(options.rc, options.shifts) @@ -270,290 +287,286 @@ def main(): # loop over folds for fold_ix in options.folds: - print("-- Fold = " + str(fold_ix) + " --") - - # (re-)initialize HDF5 - scores_h5_file = "%s/scores_f%dc0.h5" % (options.out_dir, fold_ix) - if os.path.isfile(scores_h5_file): - os.remove(scores_h5_file) - scores_h5 = h5py.File(scores_h5_file, "w") - scores_h5.create_dataset("seqs", dtype="bool", shape=(num_genes, seq_len, 4)) - scores_h5.create_dataset( - "grads", dtype="float16", shape=(num_genes, seq_len, 4, num_targets) - ) - scores_h5.create_dataset("gene", data=np.array(gene_list, dtype="S")) - scores_h5.create_dataset("chr", data=np.array(genes_chr, dtype="S")) - scores_h5.create_dataset("start", data=np.array(genes_start)) - scores_h5.create_dataset("end", data=np.array(genes_end)) - scores_h5.create_dataset("strand", data=np.array(genes_strand, dtype="S")) - - # load model fold - seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c0/model0_best.h5", 0, by_name=False - ) - seqnn_model.build_slice(targets_df.index, False) - - track_scale = targets_df.iloc[0]["scale"] - track_transform = 3.0 / 4.0 - - for shift in options.shifts: - print("Processing shift %d" % shift, flush=True) - - for rev_comp in [False, True] if options.rc == 1 else [False]: - - if options.rc == 1: - print( - "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), flush=True - ) - - seq_1hots = [] - gene_slices = [] - gene_slices_denom = [] - gene_targets = [] - - for gi, gene_id in enumerate(gene_list): - - if gi % 500 == 0: - print("Processing %d, %s" % (gi, gene_id), flush=True) - - gene = transcriptome.genes[gene_id] - - # make sequence - seq_1hot = make_seq_1hot( - genome_open, - genes_chr[gi], - genes_start[gi], - genes_end[gi], - seq_len, - ) - seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) - - # get apa dataframe - gene_apa_df = apa_df.query( - "Chromosome == '" - + genes_chr[gi] - + "' and ((End > " - + str(genes_start[gi] - pas_ext) - + " and End <= " - + str(genes_end[gi] + pas_ext) - + ") or (Start < " - + str(genes_end[gi] + pas_ext) - + " and Start >= " - + str(genes_start[gi] - pas_ext) - + ")) and pas_strand == '" - + str(genes_strand[gi]) - + "'" - ).sort_values(by=["gene", "site_num"], ascending=True) - - gene_slice = None - gene_slice_denom = None - - if len(gene_apa_df) > 0: - # get distal-most PAS position - pas_start = gene_apa_df.iloc[-1]["Start"] - pas_end = gene_apa_df.iloc[-1]["End"] - pas_strand = gene_apa_df.iloc[-1]["pas_strand"] - - # determine output sequence start - seq_out_start = genes_start[gi] + model_stride * model_crop - - # get relative pas positions - pas_seq_start = max(0, pas_start - seq_out_start) - pas_seq_end = max(0, pas_end - seq_out_start) - - # determine output positions - - # upstream coverage (before PAS) - bin_start = None - bin_end = None - if pas_strand == "+": - bin_end = int(np.round(pas_seq_start / model_stride)) + 1 - bin_start = bin_end - 3 - 1 - else: - bin_start = int(np.round(pas_seq_end / model_stride)) - bin_end = bin_start + 3 + 1 - - # clip right boundaries - bin_max = int( - (seq_len - 2.0 * model_stride * model_crop) / model_stride - ) - bin_start = max(min(bin_start, bin_max), 0) - bin_end = max(min(bin_end, bin_max), 0) + for cross_ix in options.crosses: + + print("-- fold = f" + str(fold_ix) + "c" + str(cross_ix) + " --") + + # (re-)initialize HDF5 + scores_h5_file = "%s/scores_f%dc%d.h5" % (options.out_dir, fold_ix, cross_ix) + if os.path.isfile(scores_h5_file): + os.remove(scores_h5_file) + scores_h5 = h5py.File(scores_h5_file, "w") + scores_h5.create_dataset("seqs", dtype="bool", shape=(num_genes, seq_len, 4)) + scores_h5.create_dataset( + "grads", dtype="float16", shape=(num_genes, seq_len, 4, num_targets) + ) + scores_h5.create_dataset("gene", data=np.array(gene_list, dtype="S")) + scores_h5.create_dataset("chr", data=np.array(genes_chr, dtype="S")) + scores_h5.create_dataset("start", data=np.array(genes_start)) + scores_h5.create_dataset("end", data=np.array(genes_end)) + scores_h5.create_dataset("strand", data=np.array(genes_strand, dtype="S")) + + # load model fold + seqnn_model = seqnn.SeqNN(params_model) + seqnn_model.restore( + model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + options.head_i + ) + seqnn_model.build_slice(targets_df.index, False) - gene_slice = np.arange(bin_start, bin_end) + for shift in options.shifts: + print("Processing shift %d" % shift, flush=True) - # downstream coverage (after PAS) - bin_start = None - bin_end = None - if pas_strand == "+": - bin_start = int(np.round(pas_seq_end / model_stride)) + 1 - bin_end = bin_start + 3 + 1 + 1 - else: - bin_end = ( - int(np.round(pas_seq_start / model_stride)) + 1 - 1 - ) - bin_start = bin_end - 3 - 1 - 1 + for rev_comp in [False, True] if options.rc else [False]: - # clip right boundaries - bin_max = int( - (seq_len - 2.0 * model_stride * model_crop) / model_stride - ) - bin_start = max(min(bin_start, bin_max), 0) - bin_end = max(min(bin_end, bin_max), 0) - - gene_slice_denom = np.arange(bin_start, bin_end) - - else: - gene_slice = np.array([0]) - gene_slice_denom = np.array([0]) - - if gene_slice.shape[0] == 0 or gene_slice_denom.shape[0] == 0: - gene_slice = np.array([0]) - gene_slice_denom = np.array([0]) - - if rev_comp: - seq_1hot = dna_io.hot1_rc(seq_1hot) - gene_slice = target_length - gene_slice - 1 - gene_slice_denom = target_length - gene_slice_denom - 1 - - # slice relevant strand targets - if genes_strand[gi] == "+": - gene_strand_mask = ( - (targets_df.strand != "-") - if not rev_comp - else (targets_df.strand != "+") - ) - else: - gene_strand_mask = ( - (targets_df.strand != "+") - if not rev_comp - else (targets_df.strand != "-") + if options.rc: + print( + "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), flush=True ) - gene_target = np.array(targets_df.index[gene_strand_mask].values) + seq_1hots = [] + gene_slices = [] + gene_slices_denom = [] + gene_targets = [] - # accumulate data tensors - seq_1hots.append(seq_1hot[None, ...]) - gene_slices.append(gene_slice[None, ...]) - gene_slices_denom.append(gene_slice_denom[None, ...]) - gene_targets.append(gene_target[None, ...]) + for gi, gene_id in enumerate(gene_list): - if gi == len(gene_list) - 1 or len(seq_1hots) >= buffer_size: + if gi % 500 == 0: + print("Processing %d, %s" % (gi, gene_id), flush=True) - # concat sequences - seq_1hots = np.concatenate(seq_1hots, axis=0) + gene = transcriptome.genes[gene_id] - # pad gene slices to same length (mark valid positions in mask tensor) - max_slice_len = int( - np.max([gene_slice.shape[1] for gene_slice in gene_slices]) + # make sequence + seq_1hot = make_seq_1hot( + genome_open, + genes_chr[gi], + genes_start[gi], + genes_end[gi], + seq_len, ) - max_slice_denom_len = int( - np.max( - [ - gene_slice_denom.shape[1] - for gene_slice_denom in gene_slices_denom - ] + seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) + + # get apa dataframe + gene_apa_df = apa_df.query( + "Chromosome == '" + + genes_chr[gi] + + "' and ((End > " + + str(genes_start[gi] - pas_ext) + + " and End <= " + + str(genes_end[gi] + pas_ext) + + ") or (Start < " + + str(genes_end[gi] + pas_ext) + + " and Start >= " + + str(genes_start[gi] - pas_ext) + + ")) and pas_strand == '" + + str(genes_strand[gi]) + + "'" + ).sort_values(by=["gene", "site_num"], ascending=True) + + gene_slice = None + gene_slice_denom = None + + if len(gene_apa_df) > 0: + # get distal-most PAS position + pas_start = gene_apa_df.iloc[-1]["Start"] + pas_end = gene_apa_df.iloc[-1]["End"] + pas_strand = gene_apa_df.iloc[-1]["pas_strand"] + + # determine output sequence start + seq_out_start = genes_start[gi] + model_stride * model_crop + + # get relative pas positions + pas_seq_start = max(0, pas_start - seq_out_start) + pas_seq_end = max(0, pas_end - seq_out_start) + + # determine output positions + + # upstream coverage (before PAS) + bin_start = None + bin_end = None + if pas_strand == "+": + bin_end = int(np.round(pas_seq_start / model_stride)) + 1 + bin_start = bin_end - 3 - 1 + else: + bin_start = int(np.round(pas_seq_end / model_stride)) + bin_end = bin_start + 3 + 1 + + # clip right boundaries + bin_max = int( + (seq_len - 2.0 * model_stride * model_crop) / model_stride ) - ) + bin_start = max(min(bin_start, bin_max), 0) + bin_end = max(min(bin_end, bin_max), 0) + + gene_slice = np.arange(bin_start, bin_end) + + # downstream coverage (after PAS) + bin_start = None + bin_end = None + if pas_strand == "+": + bin_start = int(np.round(pas_seq_end / model_stride)) + 1 + bin_end = bin_start + 3 + 1 + 1 + else: + bin_end = ( + int(np.round(pas_seq_start / model_stride)) + 1 - 1 + ) + bin_start = bin_end - 3 - 1 - 1 + + # clip right boundaries + bin_max = int( + (seq_len - 2.0 * model_stride * model_crop) / model_stride + ) + bin_start = max(min(bin_start, bin_max), 0) + bin_end = max(min(bin_end, bin_max), 0) - gene_masks = np.zeros( - (len(gene_slices), max_slice_len), dtype="float32" - ) - gene_slices_padded = np.zeros( - (len(gene_slices), max_slice_len), dtype="int32" - ) - for gii, gene_slice in enumerate(gene_slices): - for j in range(gene_slice.shape[1]): - gene_masks[gii, j] = 1.0 - gene_slices_padded[gii, j] = gene_slice[0, j] + gene_slice_denom = np.arange(bin_start, bin_end) - gene_slices = gene_slices_padded + else: + gene_slice = np.array([0]) + gene_slice_denom = np.array([0]) + + if gene_slice.shape[0] == 0 or gene_slice_denom.shape[0] == 0: + gene_slice = np.array([0]) + gene_slice_denom = np.array([0]) + + if rev_comp: + seq_1hot = dna_io.hot1_rc(seq_1hot) + gene_slice = target_length - gene_slice - 1 + gene_slice_denom = target_length - gene_slice_denom - 1 + + # slice relevant strand targets + if genes_strand[gi] == "+": + gene_strand_mask = ( + (targets_df.strand != "-") + if not rev_comp + else (targets_df.strand != "+") + ) + else: + gene_strand_mask = ( + (targets_df.strand != "+") + if not rev_comp + else (targets_df.strand != "-") + ) - gene_masks_denom = np.zeros( - (len(gene_slices_denom), max_slice_denom_len), - dtype="float32", - ) - gene_slices_denom_padded = np.zeros( - (len(gene_slices_denom), max_slice_denom_len), dtype="int32" - ) - for gii, gene_slice_denom in enumerate(gene_slices_denom): - for j in range(gene_slice_denom.shape[1]): - gene_masks_denom[gii, j] = 1.0 - gene_slices_denom_padded[gii, j] = gene_slice_denom[ - 0, j - ] - - gene_slices_denom = gene_slices_denom_padded - - # concat gene-specific targets - gene_targets = np.concatenate(gene_targets, axis=0) - - # batch call gradient computation - grads = seqnn_model.gradients( - seq_1hots, - head_i=0, - target_slice=gene_targets, - pos_slice=gene_slices, - pos_mask=gene_masks, - pos_slice_denom=gene_slices_denom, - pos_mask_denom=gene_masks_denom, - chunk_size=buffer_size - if options.smooth_grad != 1 - else buffer_size // options.n_samples, - batch_size=1, - track_scale=track_scale, - track_transform=track_transform, - clip_soft=options.clip_soft, - use_mean=True, - use_ratio=True, - use_logodds=False, - subtract_avg=True, - input_gate=False, - smooth_grad=options.smooth_grad == 1, - n_samples=options.n_samples, - sample_prob=options.sample_prob, - dtype="float16", - ) + gene_target = np.array(targets_df.index[gene_strand_mask].values) - # undo augmentations and save gradients - for gii, gene_slice in enumerate(gene_slices): - grad = unaugment_grads( - grads[gii, :, :, None], - fwdrc=(not rev_comp), - shift=shift, + # accumulate data tensors + seq_1hots.append(seq_1hot[None, ...]) + gene_slices.append(gene_slice[None, ...]) + gene_slices_denom.append(gene_slice_denom[None, ...]) + gene_targets.append(gene_target[None, ...]) + + if gi == len(gene_list) - 1 or len(seq_1hots) >= buffer_size: + + # concat sequences + seq_1hots = np.concatenate(seq_1hots, axis=0) + + # pad gene slices to same length (mark valid positions in mask tensor) + max_slice_len = int( + np.max([gene_slice.shape[1] for gene_slice in gene_slices]) + ) + max_slice_denom_len = int( + np.max( + [ + gene_slice_denom.shape[1] + for gene_slice_denom in gene_slices_denom + ] + ) ) - h5_gi = (gi // buffer_size) * buffer_size + gii + gene_masks = np.zeros( + (len(gene_slices), max_slice_len), dtype="float32" + ) + gene_slices_padded = np.zeros( + (len(gene_slices), max_slice_len), dtype="int32" + ) + for gii, gene_slice in enumerate(gene_slices): + for j in range(gene_slice.shape[1]): + gene_masks[gii, j] = 1.0 + gene_slices_padded[gii, j] = gene_slice[0, j] - # write to HDF5 - scores_h5["grads"][h5_gi] += grad + gene_slices = gene_slices_padded - # clear sequence buffer - seq_1hots = [] - gene_slices = [] - gene_slices_denom = [] - gene_targets = [] + gene_masks_denom = np.zeros( + (len(gene_slices_denom), max_slice_denom_len), + dtype="float32", + ) + gene_slices_denom_padded = np.zeros( + (len(gene_slices_denom), max_slice_denom_len), dtype="int32" + ) + for gii, gene_slice_denom in enumerate(gene_slices_denom): + for j in range(gene_slice_denom.shape[1]): + gene_masks_denom[gii, j] = 1.0 + gene_slices_denom_padded[gii, j] = gene_slice_denom[ + 0, j + ] + + gene_slices_denom = gene_slices_denom_padded + + # concat gene-specific targets + gene_targets = np.concatenate(gene_targets, axis=0) + + # batch call gradient computation + grads = seqnn_model.gradients( + seq_1hots, + head_i=0, + target_slice=gene_targets, + pos_slice=gene_slices, + pos_mask=gene_masks, + pos_slice_denom=gene_slices_denom, + pos_mask_denom=gene_masks_denom, + chunk_size=buffer_size, + batch_size=1, + track_scale=options.track_scale, + track_transform=options.track_transform, + clip_soft=options.clip_soft, + untransform_old=options.untransform_old, + use_mean=True, + use_ratio=True, + use_logodds=False, + subtract_avg=True, + input_gate=False, + dtype="float16", + ) - # collect garbage - gc.collect() + # undo augmentations and save gradients + for gii, gene_slice in enumerate(gene_slices): + grad = unaugment_grads( + grads[gii, :, :, None], + fwdrc=(not rev_comp), + shift=shift, + ) - # save sequences and normalize gradients by total size of ensemble - for gi, gene_id in enumerate(gene_list): + h5_gi = (gi // buffer_size) * buffer_size + gii - # re-make original sequence - seq_1hot = make_seq_1hot( - genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len - ) + # write to HDF5 + scores_h5["grads"][h5_gi] += grad - # write to HDF5 - scores_h5["seqs"][gi] = seq_1hot - scores_h5["grads"][gi] /= float( - (len(options.shifts) * (2 if options.rc == 1 else 1)) - ) + # clear sequence buffer + seq_1hots = [] + gene_slices = [] + gene_slices_denom = [] + gene_targets = [] + + # collect garbage + gc.collect() + + # save sequences and normalize gradients by total size of ensemble + for gi, gene_id in enumerate(gene_list): + + # re-make original sequence + seq_1hot = make_seq_1hot( + genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len + ) + + # write to HDF5 + scores_h5["seqs"][gi] = seq_1hot + scores_h5["grads"][gi] /= float( + (len(options.shifts) * (2 if options.rc else 1)) + ) - # collect garbage - gc.collect() + # collect garbage + gc.collect() # close files genome_open.close() diff --git a/src/scripts/borzoi_satg_splice_gpu.py b/src/scripts/borzoi_satg_splice_gpu.py index 7dafe23..8d01451 100755 --- a/src/scripts/borzoi_satg_splice_gpu.py +++ b/src/scripts/borzoi_satg_splice_gpu.py @@ -56,8 +56,8 @@ def main(): parser.add_option( "--rc", dest="rc", - default=0, - type="int", + default=False, + action="store_true", help="Ensemble forward and reverse complement predictions [Default: %default]", ) parser.add_option( @@ -65,7 +65,21 @@ def main(): dest="folds", default="0", type="str", - help="Model folds to use in ensemble [Default: %default]", + help="Model folds to use in ensemble (comma-separated list) [Default: %default]", + ) + parser.add_option( + '-c', + dest='crosses', + default=1, + type='int', + help='Number of cross-fold rounds [Default:%default]', + ) + parser.add_option( + "--head", + dest="head_i", + default=0, + type="int", + help="Model head index [Default: %default]", ) parser.add_option( "--shifts", @@ -77,37 +91,37 @@ def main(): parser.add_option( "--span", dest="span", - default=0, - type="int", + default=False, + action="store_true", help="Aggregate entire gene span [Default: %default]", ) parser.add_option( - "--smoothgrad", - dest="smooth_grad", - default=0, - type="int", - help="Run smoothgrad [Default: %default]", + "--clip_soft", + dest="clip_soft", + default=None, + type="float", + help="Model clip_soft setting [Default: %default]", ) parser.add_option( - "--samples", - dest="n_samples", - default=5, - type="int", - help="Number of smoothgrad samples [Default: %default]", + "--track_scale", + dest="track_scale", + default=0.02, + type="float", + help="Target transform scale [Default: %default]", ) parser.add_option( - "--sampleprob", - dest="sample_prob", - default=0.875, + "--track_transform", + dest="track_transform", + default=0.75, type="float", - help="Probability of not mutating a position in smoothgrad [Default: %default]", + help="Target transform exponent [Default: %default]", ) parser.add_option( - "--clip_soft", - dest="clip_soft", - default=None, - type="float", - help="Model clip_soft setting [Default: %default]", + "--untransform_old", + dest="untransform_old", + default=False, + action="store_true", + help="Run gradients with old version of inverse transforms [Default: %default]", ) parser.add_option( "-t", @@ -166,7 +180,10 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore(model_folder + "/f0c0/model0_best.h5", 0, by_name=False) + seqnn_model.restore( + model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + options.head_i + ) seqnn_model.build_slice(targets_df.index, False) # seqnn_model.build_ensemble(options.rc, options.shifts) @@ -242,347 +259,343 @@ def main(): # loop over folds for fold_ix in options.folds: - print("-- Fold = " + str(fold_ix) + " --") - - # (re-)initialize HDF5 - scores_h5_file = "%s/scores_f%dc0.h5" % (options.out_dir, fold_ix) - if os.path.isfile(scores_h5_file): - os.remove(scores_h5_file) - scores_h5 = h5py.File(scores_h5_file, "w") - scores_h5.create_dataset("seqs", dtype="bool", shape=(num_genes, seq_len, 4)) - scores_h5.create_dataset( - "grads", dtype="float16", shape=(num_genes, seq_len, 4, num_targets) - ) - scores_h5.create_dataset("gene", data=np.array(gene_list, dtype="S")) - scores_h5.create_dataset("chr", data=np.array(genes_chr, dtype="S")) - scores_h5.create_dataset("start", data=np.array(genes_start)) - scores_h5.create_dataset("end", data=np.array(genes_end)) - scores_h5.create_dataset("strand", data=np.array(genes_strand, dtype="S")) - - # load model fold - seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c0/model0_best.h5", 0, by_name=False - ) - seqnn_model.build_slice(targets_df.index, False) - - track_scale = targets_df.iloc[0]["scale"] - track_transform = 3.0 / 4.0 - - for shift in options.shifts: - print("Processing shift %d" % shift, flush=True) - - for rev_comp in [False, True] if options.rc == 1 else [False]: - - if options.rc == 1: - print( - "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), flush=True - ) - - seq_1hots = [] - gene_slices = [] - gene_slices_denom = [] - gene_targets = [] - - for gi, gene_id in enumerate(gene_list): - - if gi % 500 == 0: - print("Processing %d, %s" % (gi, gene_id), flush=True) - - gene = transcriptome.genes[gene_id] - - # make sequence - seq_1hot = make_seq_1hot( - genome_open, - genes_chr[gi], - genes_start[gi], - genes_end[gi], - seq_len, - ) - seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) - - # get splice dataframe - gene_splice_df = splice_df.query( - "Chromosome == '" - + genes_chr[gi] - + "' and ((End > " - + str(genes_start[gi]) - + " and End <= " - + str(genes_end[gi]) - + ") or (Start < " - + str(genes_end[gi]) - + " and Start >= " - + str(genes_start[gi]) - + ")) and Strand == '" - + str(genes_strand[gi]) - + "'" - ).sort_values(by=["Chromosome", "Start"], ascending=True) - - gene_slice = None - gene_slice_denom = None - - if len(gene_splice_df) > 0: - - # get random splice junction (donor or acceptor) - rand_ix = np.random.randint(len(gene_splice_df)) - - # get splice junction position - splice_start = gene_splice_df.iloc[rand_ix]["Start"] - splice_end = gene_splice_df.iloc[rand_ix]["End"] - splice_strand = gene_splice_df.iloc[rand_ix]["Strand"] - donor_or_acceptor = gene_splice_df.iloc[rand_ix]["feature"] - - # determine output sequence start - seq_out_start = genes_start[gi] + model_stride * model_crop - - # get relative splice positions - splice_seq_start = max(0, splice_start - seq_out_start) - splice_seq_end = max(0, splice_end - seq_out_start) - - # determine output positions - - if donor_or_acceptor == "donor": - - # upstream coverage (before donor) - bin_start = None - bin_end = None - if splice_strand == "+": - bin_end = ( - int(np.round(splice_seq_start / model_stride)) + 1 - ) - bin_start = bin_end - 3 - else: - bin_start = int(np.round(splice_seq_end / model_stride)) - bin_end = bin_start + 3 - - # clip right boundaries - bin_max = int( - (seq_len - 2.0 * model_stride * model_crop) - / model_stride - ) - bin_start = max(min(bin_start, bin_max), 0) - bin_end = max(min(bin_end, bin_max), 0) + for cross_ix in options.crosses: + + print("-- fold = f" + str(fold_ix) + "c" + str(cross_ix) + " --") + + # (re-)initialize HDF5 + scores_h5_file = "%s/scores_f%dc%d.h5" % (options.out_dir, fold_ix, cross_ix) + if os.path.isfile(scores_h5_file): + os.remove(scores_h5_file) + scores_h5 = h5py.File(scores_h5_file, "w") + scores_h5.create_dataset("seqs", dtype="bool", shape=(num_genes, seq_len, 4)) + scores_h5.create_dataset( + "grads", dtype="float16", shape=(num_genes, seq_len, 4, num_targets) + ) + scores_h5.create_dataset("gene", data=np.array(gene_list, dtype="S")) + scores_h5.create_dataset("chr", data=np.array(genes_chr, dtype="S")) + scores_h5.create_dataset("start", data=np.array(genes_start)) + scores_h5.create_dataset("end", data=np.array(genes_end)) + scores_h5.create_dataset("strand", data=np.array(genes_strand, dtype="S")) + + # load model fold + seqnn_model = seqnn.SeqNN(params_model) + seqnn_model.restore( + model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + options.head_i + ) + seqnn_model.build_slice(targets_df.index, False) - gene_slice = np.arange(bin_start, bin_end) + for shift in options.shifts: + print("Processing shift %d" % shift, flush=True) - # downstream coverage (after donor) - bin_start = None - bin_end = None - if splice_strand == "+": - bin_start = ( - int(np.round(splice_seq_end / model_stride)) + 1 - ) - bin_end = bin_start + 3 - else: - bin_end = int(np.round(splice_seq_start / model_stride)) - bin_start = bin_end - 3 - - # clip right boundaries - bin_max = int( - (seq_len - 2.0 * model_stride * model_crop) - / model_stride - ) - bin_start = max(min(bin_start, bin_max), 0) - bin_end = max(min(bin_end, bin_max), 0) - - gene_slice_denom = np.arange(bin_start, bin_end) - - elif donor_or_acceptor == "acceptor": - - # downstream coverage (after acceptor) - bin_start = None - bin_end = None - if splice_strand == "+": - bin_start = int(np.round(splice_seq_end / model_stride)) - bin_end = bin_start + 3 - else: - bin_end = ( - int(np.round(splice_seq_start / model_stride)) + 1 - ) - bin_start = bin_end - 3 + for rev_comp in [False, True] if options.rc else [False]: - # clip right boundaries - bin_max = int( - (seq_len - 2.0 * model_stride * model_crop) - / model_stride - ) - bin_start = max(min(bin_start, bin_max), 0) - bin_end = max(min(bin_end, bin_max), 0) - - gene_slice = np.arange(bin_start, bin_end) - - # upstream coverage (before acceptor) - bin_start = None - bin_end = None - if splice_strand == "+": - bin_end = int(np.round(splice_seq_start / model_stride)) - bin_start = bin_end - 3 - else: - bin_start = ( - int(np.round(splice_seq_end / model_stride)) + 1 - ) - bin_end = bin_start + 3 - - # clip right boundaries - bin_max = int( - (seq_len - 2.0 * model_stride * model_crop) - / model_stride - ) - bin_start = max(min(bin_start, bin_max), 0) - bin_end = max(min(bin_end, bin_max), 0) - - gene_slice_denom = np.arange(bin_start, bin_end) - - else: - gene_slice = np.array([0]) - gene_slice_denom = np.array([0]) - - if gene_slice.shape[0] == 0 or gene_slice_denom.shape[0] == 0: - gene_slice = np.array([0]) - gene_slice_denom = np.array([0]) - - if rev_comp: - seq_1hot = dna_io.hot1_rc(seq_1hot) - gene_slice = target_length - gene_slice - 1 - gene_slice_denom = target_length - gene_slice_denom - 1 - - # slice relevant strand targets - if genes_strand[gi] == "+": - gene_strand_mask = ( - (targets_df.strand != "-") - if not rev_comp - else (targets_df.strand != "+") - ) - else: - gene_strand_mask = ( - (targets_df.strand != "+") - if not rev_comp - else (targets_df.strand != "-") + if options.rc: + print( + "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), flush=True ) - gene_target = np.array(targets_df.index[gene_strand_mask].values) + seq_1hots = [] + gene_slices = [] + gene_slices_denom = [] + gene_targets = [] - # accumulate data tensors - seq_1hots.append(seq_1hot[None, ...]) - gene_slices.append(gene_slice[None, ...]) - gene_slices_denom.append(gene_slice_denom[None, ...]) - gene_targets.append(gene_target[None, ...]) + for gi, gene_id in enumerate(gene_list): - if gi == len(gene_list) - 1 or len(seq_1hots) >= buffer_size: + if gi % 500 == 0: + print("Processing %d, %s" % (gi, gene_id), flush=True) - # concat sequences - seq_1hots = np.concatenate(seq_1hots, axis=0) + gene = transcriptome.genes[gene_id] - # pad gene slices to same length (mark valid positions in mask tensor) - max_slice_len = int( - np.max([gene_slice.shape[1] for gene_slice in gene_slices]) + # make sequence + seq_1hot = make_seq_1hot( + genome_open, + genes_chr[gi], + genes_start[gi], + genes_end[gi], + seq_len, ) - max_slice_denom_len = int( - np.max( - [ - gene_slice_denom.shape[1] - for gene_slice_denom in gene_slices_denom - ] + seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) + + # get splice dataframe + gene_splice_df = splice_df.query( + "Chromosome == '" + + genes_chr[gi] + + "' and ((End > " + + str(genes_start[gi]) + + " and End <= " + + str(genes_end[gi]) + + ") or (Start < " + + str(genes_end[gi]) + + " and Start >= " + + str(genes_start[gi]) + + ")) and Strand == '" + + str(genes_strand[gi]) + + "'" + ).sort_values(by=["Chromosome", "Start"], ascending=True) + + gene_slice = None + gene_slice_denom = None + + if len(gene_splice_df) > 0: + + # get random splice junction (donor or acceptor) + rand_ix = np.random.randint(len(gene_splice_df)) + + # get splice junction position + splice_start = gene_splice_df.iloc[rand_ix]["Start"] + splice_end = gene_splice_df.iloc[rand_ix]["End"] + splice_strand = gene_splice_df.iloc[rand_ix]["Strand"] + donor_or_acceptor = gene_splice_df.iloc[rand_ix]["feature"] + + # determine output sequence start + seq_out_start = genes_start[gi] + model_stride * model_crop + + # get relative splice positions + splice_seq_start = max(0, splice_start - seq_out_start) + splice_seq_end = max(0, splice_end - seq_out_start) + + # determine output positions + + if donor_or_acceptor == "donor": + + # upstream coverage (before donor) + bin_start = None + bin_end = None + if splice_strand == "+": + bin_end = ( + int(np.round(splice_seq_start / model_stride)) + 1 + ) + bin_start = bin_end - 3 + else: + bin_start = int(np.round(splice_seq_end / model_stride)) + bin_end = bin_start + 3 + + # clip right boundaries + bin_max = int( + (seq_len - 2.0 * model_stride * model_crop) + / model_stride + ) + bin_start = max(min(bin_start, bin_max), 0) + bin_end = max(min(bin_end, bin_max), 0) + + gene_slice = np.arange(bin_start, bin_end) + + # downstream coverage (after donor) + bin_start = None + bin_end = None + if splice_strand == "+": + bin_start = ( + int(np.round(splice_seq_end / model_stride)) + 1 + ) + bin_end = bin_start + 3 + else: + bin_end = int(np.round(splice_seq_start / model_stride)) + bin_start = bin_end - 3 + + # clip right boundaries + bin_max = int( + (seq_len - 2.0 * model_stride * model_crop) + / model_stride + ) + bin_start = max(min(bin_start, bin_max), 0) + bin_end = max(min(bin_end, bin_max), 0) + + gene_slice_denom = np.arange(bin_start, bin_end) + + elif donor_or_acceptor == "acceptor": + + # downstream coverage (after acceptor) + bin_start = None + bin_end = None + if splice_strand == "+": + bin_start = int(np.round(splice_seq_end / model_stride)) + bin_end = bin_start + 3 + else: + bin_end = ( + int(np.round(splice_seq_start / model_stride)) + 1 + ) + bin_start = bin_end - 3 + + # clip right boundaries + bin_max = int( + (seq_len - 2.0 * model_stride * model_crop) + / model_stride + ) + bin_start = max(min(bin_start, bin_max), 0) + bin_end = max(min(bin_end, bin_max), 0) + + gene_slice = np.arange(bin_start, bin_end) + + # upstream coverage (before acceptor) + bin_start = None + bin_end = None + if splice_strand == "+": + bin_end = int(np.round(splice_seq_start / model_stride)) + bin_start = bin_end - 3 + else: + bin_start = ( + int(np.round(splice_seq_end / model_stride)) + 1 + ) + bin_end = bin_start + 3 + + # clip right boundaries + bin_max = int( + (seq_len - 2.0 * model_stride * model_crop) + / model_stride + ) + bin_start = max(min(bin_start, bin_max), 0) + bin_end = max(min(bin_end, bin_max), 0) + + gene_slice_denom = np.arange(bin_start, bin_end) + + else: + gene_slice = np.array([0]) + gene_slice_denom = np.array([0]) + + if gene_slice.shape[0] == 0 or gene_slice_denom.shape[0] == 0: + gene_slice = np.array([0]) + gene_slice_denom = np.array([0]) + + if rev_comp: + seq_1hot = dna_io.hot1_rc(seq_1hot) + gene_slice = target_length - gene_slice - 1 + gene_slice_denom = target_length - gene_slice_denom - 1 + + # slice relevant strand targets + if genes_strand[gi] == "+": + gene_strand_mask = ( + (targets_df.strand != "-") + if not rev_comp + else (targets_df.strand != "+") + ) + else: + gene_strand_mask = ( + (targets_df.strand != "+") + if not rev_comp + else (targets_df.strand != "-") ) - ) - gene_masks = np.zeros( - (len(gene_slices), max_slice_len), dtype="float32" - ) - gene_slices_padded = np.zeros( - (len(gene_slices), max_slice_len), dtype="int32" - ) - for gii, gene_slice in enumerate(gene_slices): - for j in range(gene_slice.shape[1]): - gene_masks[gii, j] = 1.0 - gene_slices_padded[gii, j] = gene_slice[0, j] + gene_target = np.array(targets_df.index[gene_strand_mask].values) - gene_slices = gene_slices_padded + # accumulate data tensors + seq_1hots.append(seq_1hot[None, ...]) + gene_slices.append(gene_slice[None, ...]) + gene_slices_denom.append(gene_slice_denom[None, ...]) + gene_targets.append(gene_target[None, ...]) - gene_masks_denom = np.zeros( - (len(gene_slices_denom), max_slice_denom_len), - dtype="float32", - ) - gene_slices_denom_padded = np.zeros( - (len(gene_slices_denom), max_slice_denom_len), dtype="int32" - ) - for gii, gene_slice_denom in enumerate(gene_slices_denom): - for j in range(gene_slice_denom.shape[1]): - gene_masks_denom[gii, j] = 1.0 - gene_slices_denom_padded[gii, j] = gene_slice_denom[ - 0, j - ] - - gene_slices_denom = gene_slices_denom_padded - - # concat gene-specific targets - gene_targets = np.concatenate(gene_targets, axis=0) - - # batch call gradient computation - grads = seqnn_model.gradients( - seq_1hots, - head_i=0, - target_slice=gene_targets, - pos_slice=gene_slices, - pos_mask=gene_masks, - pos_slice_denom=gene_slices_denom, - pos_mask_denom=gene_masks_denom, - chunk_size=buffer_size - if options.smooth_grad != 1 - else buffer_size // options.n_samples, - batch_size=1, - track_scale=track_scale, - track_transform=track_transform, - clip_soft=options.clip_soft, - use_mean=True, - use_ratio=True, - use_logodds=False, - subtract_avg=True, - input_gate=False, - smooth_grad=options.smooth_grad == 1, - n_samples=options.n_samples, - sample_prob=options.sample_prob, - dtype="float16", - ) + if gi == len(gene_list) - 1 or len(seq_1hots) >= buffer_size: - # undo augmentations and save gradients - for gii, gene_slice in enumerate(gene_slices): - grad = unaugment_grads( - grads[gii, :, :, None], - fwdrc=(not rev_comp), - shift=shift, + # concat sequences + seq_1hots = np.concatenate(seq_1hots, axis=0) + + # pad gene slices to same length (mark valid positions in mask tensor) + max_slice_len = int( + np.max([gene_slice.shape[1] for gene_slice in gene_slices]) + ) + max_slice_denom_len = int( + np.max( + [ + gene_slice_denom.shape[1] + for gene_slice_denom in gene_slices_denom + ] + ) ) - h5_gi = (gi // buffer_size) * buffer_size + gii + gene_masks = np.zeros( + (len(gene_slices), max_slice_len), dtype="float32" + ) + gene_slices_padded = np.zeros( + (len(gene_slices), max_slice_len), dtype="int32" + ) + for gii, gene_slice in enumerate(gene_slices): + for j in range(gene_slice.shape[1]): + gene_masks[gii, j] = 1.0 + gene_slices_padded[gii, j] = gene_slice[0, j] - # write to HDF5 - scores_h5["grads"][h5_gi] += grad + gene_slices = gene_slices_padded - # clear sequence buffer - seq_1hots = [] - gene_slices = [] - gene_slices_denom = [] - gene_targets = [] + gene_masks_denom = np.zeros( + (len(gene_slices_denom), max_slice_denom_len), + dtype="float32", + ) + gene_slices_denom_padded = np.zeros( + (len(gene_slices_denom), max_slice_denom_len), dtype="int32" + ) + for gii, gene_slice_denom in enumerate(gene_slices_denom): + for j in range(gene_slice_denom.shape[1]): + gene_masks_denom[gii, j] = 1.0 + gene_slices_denom_padded[gii, j] = gene_slice_denom[ + 0, j + ] + + gene_slices_denom = gene_slices_denom_padded + + # concat gene-specific targets + gene_targets = np.concatenate(gene_targets, axis=0) + + # batch call gradient computation + grads = seqnn_model.gradients( + seq_1hots, + head_i=0, + target_slice=gene_targets, + pos_slice=gene_slices, + pos_mask=gene_masks, + pos_slice_denom=gene_slices_denom, + pos_mask_denom=gene_masks_denom, + chunk_size=buffer_size, + batch_size=1, + track_scale=options.track_scale, + track_transform=options.track_transform, + clip_soft=options.clip_soft, + untransform_old=options.untransform_old, + use_mean=True, + use_ratio=True, + use_logodds=False, + subtract_avg=True, + input_gate=False, + dtype="float16", + ) - # collect garbage - gc.collect() + # undo augmentations and save gradients + for gii, gene_slice in enumerate(gene_slices): + grad = unaugment_grads( + grads[gii, :, :, None], + fwdrc=(not rev_comp), + shift=shift, + ) - # save sequences and normalize gradients by total size of ensemble - for gi, gene_id in enumerate(gene_list): + h5_gi = (gi // buffer_size) * buffer_size + gii - # re-make original sequence - seq_1hot = make_seq_1hot( - genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len - ) + # write to HDF5 + scores_h5["grads"][h5_gi] += grad - # write to HDF5 - scores_h5["seqs"][gi] = seq_1hot - scores_h5["grads"][gi] /= float( - (len(options.shifts) * (2 if options.rc == 1 else 1)) - ) + # clear sequence buffer + seq_1hots = [] + gene_slices = [] + gene_slices_denom = [] + gene_targets = [] + + # collect garbage + gc.collect() + + # save sequences and normalize gradients by total size of ensemble + for gi, gene_id in enumerate(gene_list): + + # re-make original sequence + seq_1hot = make_seq_1hot( + genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len + ) + + # write to HDF5 + scores_h5["seqs"][gi] = seq_1hot + scores_h5["grads"][gi] /= float( + (len(options.shifts) * (2 if options.rc else 1)) + ) - # collect garbage - gc.collect() + # collect garbage + gc.collect() # close files genome_open.close() diff --git a/src/scripts/borzoi_sed_folds.py b/src/scripts/borzoi_sed_folds.py index eb0176b..e7f92a9 100644 --- a/src/scripts/borzoi_sed_folds.py +++ b/src/scripts/borzoi_sed_folds.py @@ -44,7 +44,7 @@ def main(): sed_options.add_option( '-f', dest='genome_fasta', - default='%s/data/hg38.fa' % os.environ['BASENJIDIR'], + default='%s/assembly/ucsc/hg38.fa' % os.environ['HG38'], help='Genome FASTA for sequences [Default: %default]', ) sed_options.add_option( @@ -87,23 +87,10 @@ def main(): action='store_true', help='Aggregate entire gene span [Default: %default]', ) - sed_options.add_option( - '-u', - dest='untransform_old', - default=False, - action='store_true', - help='Undo scale, clip_soft and sqrt transforms (old) [Default: %default]', - ) - sed_options.add_option( - '--no_untransform', - dest='no_untransform', - default=False, - action='store_true', - ) sed_options.add_option( '--stats', dest='sed_stats', - default='D2', + default='SED', help='Comma-separated list of stats to save. [Default: %default]', ) sed_options.add_option( @@ -127,7 +114,6 @@ def main(): ) parser.add_option_group(sed_options) - # cross-fold fold_options = OptionGroup(parser, 'cross-fold options') fold_options.add_option( @@ -137,6 +123,19 @@ def main(): type='int', help='Number of cross-fold rounds [Default:%default]', ) + fold_options.add_option( + '--folds', + dest='fold_subset', + default=1, + type='int', + help='Run a subset of folds [Default:%default]', + ) + fold_options.add_option( + '--f_list', + dest='fold_subset_list', + default=None, + help='Run a subset of folds (encoded as comma-separated string) [Default:%default]', + ) fold_options.add_option( '-d', dest='data_head', @@ -194,21 +193,16 @@ def main(): ####################################################### # prep work - # count folds - num_folds = 0 - fold0_dir = '%s/f%dc0' % (exp_dir, num_folds) - model_file = '%s/train/model_best.h5' % fold0_dir - if options.data_head is not None: - model_file = '%s/train/model%d_best.h5' % (fold0_dir, options.data_head) - while os.path.isfile(model_file): - num_folds += 1 - fold0_dir = '%s/f%dc0' % (exp_dir, num_folds) - model_file = '%s/train/model_best.h5' % fold0_dir - if options.data_head is not None: - model_file = '%s/train/model%d_best.h5' % (fold0_dir, options.data_head) - print('Found %d folds' % num_folds) - if num_folds == 0: - exit(1) + # set folds + num_folds = 1 + if options.fold_subset is not None: + num_folds = options.fold_subset + + fold_index = [fold_i for fold_i in range(num_folds)] + + # subset folds (list) + if options.fold_subset_list is not None: + fold_index = [int(fold_str) for fold_str in options.fold_subset_list.split(",")] ################################################################ # SNP scores @@ -221,7 +215,7 @@ def main(): jobs = [] for ci in range(options.crosses): - for fi in range(num_folds): + for fi in fold_index: it_dir = '%s/f%dc%d' % (exp_dir, fi, ci) name = '%s-f%dc%d' % (options.name, fi, ci) diff --git a/src/scripts/borzoi_sed_ipaqtl_cov.py b/src/scripts/borzoi_sed_ipaqtl_cov.py index 61eb282..9e08f94 100755 --- a/src/scripts/borzoi_sed_ipaqtl_cov.py +++ b/src/scripts/borzoi_sed_ipaqtl_cov.py @@ -27,6 +27,7 @@ import pysam from baskerville import gene as bgene +from baskerville import dataset from baskerville import seqnn from baskerville import stream from baskerville import vcf as bvcf @@ -38,11 +39,6 @@ relative to intronic polyadenylation sites in an annotation file. """ - -def eprint(*args, **kwargs): - print(*args, file=sys.stderr, **kwargs) - - ################################################################################ # main ################################################################################ @@ -61,7 +57,11 @@ def main(): default="%s/genes/gencode41/gencode41_basic_nort.gtf" % os.environ["HG38"], help="GTF for gene definition [Default %default]", ) - parser.add_option("--apafile", dest="apa_file", default="polyadb_human_v3.csv.gz") + parser.add_option( + "--apafile", + dest="apa_file", + default="polyadb_human_v3.csv.gz" + ) parser.add_option( "-o", dest="out_dir", @@ -123,6 +123,18 @@ def main(): type="str", help="File specifying target indexes and labels in table format", ) + parser.add_option( + "-u", + dest="untransform_old", + default=False, + action="store_true", + ) + parser.add_option( + "--no_untransform", + dest="no_untransform", + default=False, + action="store_true", + ) (options, args) = parser.parse_args() if len(args) == 3: @@ -190,7 +202,14 @@ def main(): targets_df = pd.read_csv(options.targets_file, sep="\t", index_col=0) # prep strand - targets_strand_df = targets_prep_strand(targets_df) + targets_strand_df = dataset.targets_prep_strand(targets_df) + + # set strand pairs (using new indexing) + orig_new_index = dict(zip(targets_df.index, np.arange(targets_df.shape[0]))) + targets_strand_pair = np.array( + [orig_new_index[ti] for ti in targets_df.strand_pair] + ) + params_model["strand_pair"] = [targets_strand_pair] ################################################################# # setup model @@ -246,15 +265,6 @@ def main(): .reset_index(drop=True) ) - print( - "n intron sites = " + str(len(apa_df.query("site_type == 'Intron'"))), - flush=True, - ) - print( - "n utr3 sites = " + str(len(apa_df.query("site_type == '3\\' most exon'"))), - flush=True, - ) - apa_df["start_hg38"] = apa_df["position_hg38"] apa_df["end_hg38"] = apa_df["position_hg38"] + 1 @@ -350,13 +360,15 @@ def snp_gen(): alt_preds = preds_stream[pi] pi += 1 - # undo scale - ref_preds /= np.expand_dims(targets_df.scale, axis=0) - alt_preds /= np.expand_dims(targets_df.scale, axis=0) - - # undo sqrt - ref_preds = ref_preds ** (4 / 3) - alt_preds = alt_preds ** (4 / 3) + # untransform predictions + if options.targets_file is not None: + if not options.no_untransform: + if options.untransform_old: + ref_preds = dataset.untransform_preds1(ref_preds, targets_df, unscale=True, unclip=False) + alt_preds = dataset.untransform_preds1(alt_preds, targets_df, unscale=True, unclip=False) + else: + ref_preds = dataset.untransform_preds(ref_preds, targets_df, unscale=True, unclip=False) + alt_preds = dataset.untransform_preds(alt_preds, targets_df, unscale=True, unclip=False) # for each overlapping gene for gene_id, gene_slice_dup in snpseq_gene_slice[si]["bins"].items(): @@ -415,8 +427,7 @@ def snp_gen(): # for each overlapping PAS for pas_id, pas_slice in snpseq_apa_slice[si]["bins"].items(): if len(pas_slice) > len(set(pas_slice)): - print("WARNING: %d %s has overlapping bins" % (si, pas_id)) - eprint("WARNING: %d %s has overlapping bins" % (si, pas_id)) + print("WARNING: %d %s has overlapping bins" % (si, pas_id), flush=True) # slice pas positions ref_preds_pas = ref_preds[pas_slice] @@ -778,23 +789,6 @@ def make_snpseq_bedt(snps, seq_len): return snpseq_bedt -def targets_prep_strand(targets_df): - # attach strand - targets_strand = [] - for _, target in targets_df.iterrows(): - if target.strand_pair == target.name: - targets_strand.append(".") - else: - targets_strand.append(target.identifier[-1]) - targets_df["strand"] = targets_strand - - # collapse stranded - strand_mask = targets_df.strand != "-" - targets_strand_df = targets_df[strand_mask] - - return targets_strand_df - - def write_pct(sed_out, sed_stats): """Compute percentile values for each target and write to HDF5.""" diff --git a/src/scripts/borzoi_sed_paqtl_cov.py b/src/scripts/borzoi_sed_paqtl_cov.py index 8869f74..84b84cc 100755 --- a/src/scripts/borzoi_sed_paqtl_cov.py +++ b/src/scripts/borzoi_sed_paqtl_cov.py @@ -27,6 +27,7 @@ import pysam from baskerville import gene as bgene +from baskerville import dataset from baskerville import seqnn from baskerville import stream from baskerville import vcf as bvcf @@ -38,11 +39,6 @@ relative to 3' UTR polyadenylation sites in an annotation file. """ - -def eprint(*args, **kwargs): - print(*args, file=sys.stderr, **kwargs) - - ################################################################################ # main ################################################################################ @@ -61,7 +57,11 @@ def main(): default="%s/genes/gencode41/gencode41_basic_nort.gtf" % os.environ["HG38"], help="GTF for gene definition [Default %default]", ) - parser.add_option("--apafile", dest="apa_file", default="polyadb_human_v3.csv.gz") + parser.add_option( + "--apafile", + dest="apa_file", + default="polyadb_human_v3.csv.gz" + ) parser.add_option( "-o", dest="out_dir", @@ -123,6 +123,18 @@ def main(): type="str", help="File specifying target indexes and labels in table format", ) + parser.add_option( + "-u", + dest="untransform_old", + default=False, + action="store_true", + ) + parser.add_option( + "--no_untransform", + dest="no_untransform", + default=False, + action="store_true", + ) (options, args) = parser.parse_args() if len(args) == 3: @@ -190,7 +202,14 @@ def main(): targets_df = pd.read_csv(options.targets_file, sep="\t", index_col=0) # prep strand - targets_strand_df = targets_prep_strand(targets_df) + targets_strand_df = dataset.targets_prep_strand(targets_df) + + # set strand pairs (using new indexing) + orig_new_index = dict(zip(targets_df.index, np.arange(targets_df.shape[0]))) + targets_strand_pair = np.array( + [orig_new_index[ti] for ti in targets_df.strand_pair] + ) + params_model["strand_pair"] = [targets_strand_pair] ################################################################# # setup model @@ -360,13 +379,15 @@ def snp_gen(): alt_preds = preds_stream[pi] pi += 1 - # undo scale - ref_preds /= np.expand_dims(targets_df.scale, axis=0) - alt_preds /= np.expand_dims(targets_df.scale, axis=0) - - # undo sqrt - ref_preds = ref_preds ** (4 / 3) - alt_preds = alt_preds ** (4 / 3) + # untransform predictions + if options.targets_file is not None: + if not options.no_untransform: + if options.untransform_old: + ref_preds = dataset.untransform_preds1(ref_preds, targets_df, unscale=True, unclip=False) + alt_preds = dataset.untransform_preds1(alt_preds, targets_df, unscale=True, unclip=False) + else: + ref_preds = dataset.untransform_preds(ref_preds, targets_df, unscale=True, unclip=False) + alt_preds = dataset.untransform_preds(alt_preds, targets_df, unscale=True, unclip=False) # for each overlapping gene for gene_id, gene_slice_dup in snpseq_gene_slice[si]["bins"].items(): @@ -425,8 +446,7 @@ def snp_gen(): # for each overlapping PAS for pas_id, pas_slice in snpseq_apa_slice[si]["bins"].items(): if len(pas_slice) > len(set(pas_slice)): - print("WARNING: %d %s has overlapping bins" % (si, pas_id)) - eprint("WARNING: %d %s has overlapping bins" % (si, pas_id)) + print("WARNING: %d %s has overlapping bins" % (si, pas_id), flush=True) # slice pas positions ref_preds_pas = ref_preds[pas_slice] @@ -788,23 +808,6 @@ def make_snpseq_bedt(snps, seq_len): return snpseq_bedt -def targets_prep_strand(targets_df): - # attach strand - targets_strand = [] - for _, target in targets_df.iterrows(): - if target.strand_pair == target.name: - targets_strand.append(".") - else: - targets_strand.append(target.identifier[-1]) - targets_df["strand"] = targets_strand - - # collapse stranded - strand_mask = targets_df.strand != "-" - targets_strand_df = targets_df[strand_mask] - - return targets_strand_df - - def write_pct(sed_out, sed_stats): """Compute percentile values for each target and write to HDF5.""" From 970379f2dc53d7390878eb8d68394870ede815c5 Mon Sep 17 00:00:00 2001 From: Johannes Linder Date: Mon, 30 Sep 2024 09:46:50 -0700 Subject: [PATCH 03/32] Revision updates. --- download_models.sh | 68 + env_vars.sh | 50 + examples/CD99_example.gtf | 156 + examples/CFHR2_example.gtf | 15 + examples/GCFC2_example.gtf | 126 + ...zoi_example_eqtl_chr10_116952944_T_C.ipynb | 166 +- ...i_example_ipaqtl_chr10_116664061_G_A.ipynb | 128 +- ...zoi_example_paqtl_chr1_236763042_A_G.ipynb | 128 +- ...rzoi_example_sqtl_chr9_135548708_G_C.ipynb | 126 +- examples/params.json | 87 + examples/params_pred.json | 58 +- examples/targets_gtex_liver.txt | 4 + examples/targets_mouse.txt | 2609 +++++++++++++++++ examples/targets_rna.txt | 1544 ++++++++++ pyproject.toml | 43 +- setup.cfg | 43 - .../{ => _archive}/borzoi_bench_crispr.py | 0 .../borzoi_bench_crispr_folds.py | 0 .../borzoi_bench_flowfish_folds.py | 0 .../borzoi_bench_gasperini_folds.py | 0 src/scripts/_archive/borzoi_satg_gene.py | 351 +++ .../{ => _archive}/borzoi_satg_gene_multi.py | 0 ...h_classify.py => borzoi_bench_classify.py} | 2 +- ...olds.py => borzoi_bench_gtex_folds_sad.py} | 27 +- ...olds.py => borzoi_bench_gtex_folds_sed.py} | 19 +- src/scripts/borzoi_bench_ipaqtl_folds.py | 13 +- src/scripts/borzoi_bench_paqtl_folds.py | 13 +- src/scripts/borzoi_bench_sqtl_folds.py | 12 +- src/scripts/borzoi_bench_trip_folds.py | 6 +- ...i_gtex_coef.py => borzoi_gtex_coef_sad.py} | 15 +- ...i_gtex_coef.py => borzoi_gtex_coef_sed.py} | 15 +- src/scripts/{basenji_sad.py => borzoi_sad.py} | 4 +- src/scripts/borzoi_sad_folds.py | 266 ++ src/scripts/borzoi_satg_gene.py | 821 +++++- ...=> borzoi_satg_gene_crispr_ism_shuffle.py} | 15 +- ...ism.py => borzoi_satg_gene_focused_ism.py} | 15 +- src/scripts/borzoi_satg_gene_gpu.py | 903 ------ ...satg_polya_gpu.py => borzoi_satg_polya.py} | 17 +- ...tg_splice_gpu.py => borzoi_satg_splice.py} | 17 +- src/scripts/borzoi_sed.py | 4 +- src/scripts/borzoi_sed_folds.py | 8 +- src/scripts/borzoi_sed_ipaqtl_cov.py | 7 +- src/scripts/borzoi_sed_paqtl_cov.py | 7 +- ...test_apa_polaydb.py => borzoi_test_apa.py} | 10 +- ...ds_polaydb.py => borzoi_test_apa_folds.py} | 13 +- src/scripts/borzoi_test_exons.py | 22 +- src/scripts/borzoi_test_exons_folds.py | 36 +- src/scripts/borzoi_test_genes.py | 2 +- src/scripts/borzoi_test_genes_folds.py | 6 +- ...test_tss_gencode.py => borzoi_test_tss.py} | 10 +- ...ds_gencode.py => borzoi_test_tss_folds.py} | 13 +- src/scripts/borzoi_tfmodisco.py | 6 +- src/scripts/borzoi_tfmodisco_diff.py | 6 +- src/scripts/borzoi_trip.py | 4 +- src/scripts/bw_h5.py | 140 + src/scripts/data/qtl_data/README.md | 33 - src/scripts/data/qtl_data/download_finemap.py | 62 - src/scripts/data/qtl_data/download_sumstat.py | 56 - .../qtl_data/ipaqtl_make_negative_sets.py | 196 -- .../qtl_data/ipaqtl_make_positive_sets.py | 191 -- src/scripts/data/qtl_data/ipaqtl_vcfs.py | 234 -- .../data/qtl_data/make_expression_tables.py | 181 -- src/scripts/data/qtl_data/make_vcfs.py | 112 - .../data/qtl_data/merge_finemapping_tables.py | 102 - .../data/qtl_data/paqtl_make_negative_sets.py | 196 -- .../data/qtl_data/paqtl_make_positive_sets.py | 191 -- src/scripts/data/qtl_data/paqtl_vcfs.py | 234 -- .../data/qtl_data/sqtl_make_negative_sets.py | 195 -- .../data/qtl_data/sqtl_make_positive_sets.py | 190 -- src/scripts/data/qtl_data/sqtl_vcfs.py | 234 -- src/scripts/data/training_data/Makefile | 47 - src/scripts/data/training_data/README.md | 11 - src/scripts/idx_genome.py | 32 + src/scripts/pygene.py | 324 ++ src/scripts/slurm.py | 332 +++ src/scripts/util.py | 120 + src/scripts/w5_merge.py | 110 + src/scripts/w5_qc.py | 322 ++ 78 files changed, 7941 insertions(+), 3940 deletions(-) create mode 100755 download_models.sh create mode 100755 env_vars.sh create mode 100644 examples/CD99_example.gtf create mode 100644 examples/CFHR2_example.gtf create mode 100644 examples/GCFC2_example.gtf create mode 100644 examples/params.json create mode 100644 examples/targets_gtex_liver.txt create mode 100644 examples/targets_mouse.txt create mode 100644 examples/targets_rna.txt delete mode 100644 setup.cfg rename src/scripts/{ => _archive}/borzoi_bench_crispr.py (100%) rename src/scripts/{ => _archive}/borzoi_bench_crispr_folds.py (100%) rename src/scripts/{ => _archive}/borzoi_bench_flowfish_folds.py (100%) rename src/scripts/{ => _archive}/borzoi_bench_gasperini_folds.py (100%) create mode 100755 src/scripts/_archive/borzoi_satg_gene.py rename src/scripts/{ => _archive}/borzoi_satg_gene_multi.py (100%) rename src/scripts/{basenji_bench_classify.py => borzoi_bench_classify.py} (99%) mode change 100644 => 100755 rename src/scripts/{basenji_bench_gtex_folds.py => borzoi_bench_gtex_folds_sad.py} (96%) mode change 100644 => 100755 rename src/scripts/{borzoi_bench_gtex_folds.py => borzoi_bench_gtex_folds_sed.py} (97%) mode change 100644 => 100755 rename src/scripts/{basenji_gtex_coef.py => borzoi_gtex_coef_sad.py} (96%) mode change 100644 => 100755 rename src/scripts/{borzoi_gtex_coef.py => borzoi_gtex_coef_sed.py} (97%) mode change 100644 => 100755 rename src/scripts/{basenji_sad.py => borzoi_sad.py} (99%) mode change 100644 => 100755 create mode 100755 src/scripts/borzoi_sad_folds.py rename src/scripts/{borzoi_satg_gene_gpu_crispr_ism_shuffle.py => borzoi_satg_gene_crispr_ism_shuffle.py} (98%) mode change 100644 => 100755 rename src/scripts/{borzoi_satg_gene_gpu_focused_ism.py => borzoi_satg_gene_focused_ism.py} (98%) delete mode 100755 src/scripts/borzoi_satg_gene_gpu.py rename src/scripts/{borzoi_satg_polya_gpu.py => borzoi_satg_polya.py} (98%) rename src/scripts/{borzoi_satg_splice_gpu.py => borzoi_satg_splice.py} (98%) mode change 100644 => 100755 src/scripts/borzoi_sed_folds.py rename src/scripts/{borzoi_test_apa_polaydb.py => borzoi_test_apa.py} (97%) rename src/scripts/{borzoi_test_apa_folds_polaydb.py => borzoi_test_apa_folds.py} (93%) rename src/scripts/{borzoi_test_tss_gencode.py => borzoi_test_tss.py} (96%) mode change 100644 => 100755 rename src/scripts/{borzoi_test_tss_folds_gencode.py => borzoi_test_tss_folds.py} (94%) mode change 100644 => 100755 mode change 100644 => 100755 src/scripts/borzoi_tfmodisco.py mode change 100644 => 100755 src/scripts/borzoi_tfmodisco_diff.py create mode 100755 src/scripts/bw_h5.py delete mode 100644 src/scripts/data/qtl_data/README.md delete mode 100644 src/scripts/data/qtl_data/download_finemap.py delete mode 100644 src/scripts/data/qtl_data/download_sumstat.py delete mode 100644 src/scripts/data/qtl_data/ipaqtl_make_negative_sets.py delete mode 100644 src/scripts/data/qtl_data/ipaqtl_make_positive_sets.py delete mode 100644 src/scripts/data/qtl_data/ipaqtl_vcfs.py delete mode 100644 src/scripts/data/qtl_data/make_expression_tables.py delete mode 100644 src/scripts/data/qtl_data/make_vcfs.py delete mode 100644 src/scripts/data/qtl_data/merge_finemapping_tables.py delete mode 100644 src/scripts/data/qtl_data/paqtl_make_negative_sets.py delete mode 100644 src/scripts/data/qtl_data/paqtl_make_positive_sets.py delete mode 100644 src/scripts/data/qtl_data/paqtl_vcfs.py delete mode 100644 src/scripts/data/qtl_data/sqtl_make_negative_sets.py delete mode 100644 src/scripts/data/qtl_data/sqtl_make_positive_sets.py delete mode 100644 src/scripts/data/qtl_data/sqtl_vcfs.py delete mode 100644 src/scripts/data/training_data/Makefile delete mode 100644 src/scripts/data/training_data/README.md create mode 100755 src/scripts/idx_genome.py create mode 100755 src/scripts/pygene.py create mode 100755 src/scripts/slurm.py create mode 100755 src/scripts/util.py create mode 100755 src/scripts/w5_merge.py create mode 100755 src/scripts/w5_qc.py diff --git a/download_models.sh b/download_models.sh new file mode 100755 index 0000000..1ec7a26 --- /dev/null +++ b/download_models.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# download model weights (data fold 3, 4 replicates) +for rep in f3c0,f0 f3c1,f1 f3c2,f2 f3c3,f3; do IFS=","; set -- $rep; + mkdir -p "examples/saved_models/$1/train" + local_model="examples/saved_models/$1/train/model0_best.h5" + if [ -f "$local_model" ]; then + echo "$1 model already exists." + else + wget --progress=bar:force "https://storage.googleapis.com/seqnn-share/borzoi/$2/model0_best.h5" -O "$local_model" + fi +done + +# download and uncompress annotation files +mkdir -p examples/hg38/genes/gencode41 +mkdir -p examples/hg38/genes/polyadb + +if [ -f examples/hg38/genes/gencode41/gencode41_basic_nort.gtf ]; then + echo "Gene annotation already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > examples/hg38/genes/gencode41/gencode41_basic_nort.gtf +fi + +if [ -f examples/hg38/genes/gencode41/gencode41_basic_nort_protein.gtf ]; then + echo "Gene annotation (no read-through, protein-coding) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort_protein.gtf.gz | gunzip -c > examples/hg38/genes/gencode41/gencode41_basic_nort_protein.gtf +fi + +if [ -f examples/hg38/genes/gencode41/gencode41_basic_protein.gtf ]; then + echo "Gene annotation (protein-coding) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein.gtf.gz | gunzip -c > examples/hg38/genes/gencode41/gencode41_basic_protein.gtf +fi + +if [ -f examples/hg38/genes/gencode41/gencode41_basic_tss2.bed ]; then + echo "TSS annotation already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_tss2.bed.gz | gunzip -c > examples/hg38/genes/gencode41/gencode41_basic_tss2.bed +fi + +if [ -f examples/hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz ]; then + echo "Splice site annotation already exist." +else + wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz -O examples/hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz +fi + +if [ -f examples/hg38/genes/gencode41/gencode41_basic_protein_splice.gff ]; then + echo "Splice site annotation already exist." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.gff.gz | gunzip -c > examples/hg38/genes/gencode41/gencode41_basic_protein_splice.gff +fi + +if [ -f examples/hg38/genes/polyadb/polyadb_human_v3.csv.gz ]; then + echo "PolyA site annotation already exist." +else + wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz -O examples/hg38/genes/polyadb/polyadb_human_v3.csv.gz +fi + +# download and index hg38 genome +mkdir -p examples/hg38/assembly/ucsc + +if [ -f examples/hg38/assembly/ucsc/hg38.fa ]; then + echo "Human genome FASTA already exists." +else + wget -O - http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > examples/hg38/assembly/ucsc/hg38.fa + python src/scripts/idx_genome.py examples/hg38/assembly/ucsc/hg38.fa +fi diff --git a/env_vars.sh b/env_vars.sh new file mode 100755 index 0000000..8d41e18 --- /dev/null +++ b/env_vars.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# set these variables before running the script +LOCAL_BORZOI_PATH="/home/jlinder/borzoi" +LOCAL_BASKERVILLE_PATH="/home/jlinder/baskerville" +LOCAL_WESTMINSTER_PATH="/home/jlinder/westminster" +LOCAL_USER="jlinder" + +# create env_vars sh scripts in local conda env +mkdir -p "$CONDA_PREFIX/etc/conda/activate.d" +mkdir -p "$CONDA_PREFIX/etc/conda/deactivate.d" + +file_vars_act="$CONDA_PREFIX/etc/conda/activate.d/env_vars.sh" +if ! [ -e $file_vars_act ]; then + echo '#!/bin/sh' > $file_vars_act +fi + +file_vars_deact="$CONDA_PREFIX/etc/conda/deactivate.d/env_vars.sh" +if ! [ -e $file_vars_deact ]; then + echo '#!/bin/sh' > $file_vars_deact +fi + +# append borzoi (and baskerville/westminster) env variable exports to /activate.d/env_vars.sh +echo "export BORZOI_DIR=$LOCAL_BORZOI_PATH" >> $file_vars_act +echo 'export PATH=$BORZOI_DIR/src/scripts:$PATH' >> $file_vars_act +echo 'export PYTHONPATH=$BORZOI_DIR/src/scripts:$PYTHONPATH' >> $file_vars_act + +echo "export BASKERVILLE_DIR=$LOCAL_BASKERVILLE_PATH" >> $file_vars_act +echo 'export PATH=$BASKERVILLE_DIR/src/baskerville/scripts:$PATH' >> $file_vars_act +echo 'export PYTHONPATH=$BASKERVILLE_DIR/src/baskerville/scripts:$PYTHONPATH' >> $file_vars_act + +echo "export WESTMINSTER_DIR=$LOCAL_WESTMINSTER_PATH" >> $file_vars_act +echo 'export PATH=$WESTMINSTER_DIR/src/westminster/scripts:$PATH' >> $file_vars_act +echo 'export PYTHONPATH=$WESTMINSTER_DIR/src/westminster/scripts:$PYTHONPATH' >> $file_vars_act + +echo 'export BORZOI_HG38=$BORZOI_DIR/examples/hg38' >> $file_vars_act +echo 'export BORZOI_MM10=$BORZOI_DIR/examples/mm10' >> $file_vars_act + +echo "export BORZOI_CONDA=/home/$LOCAL_USER/anaconda3/etc/profile.d/conda.sh" >> $file_vars_act + +# append borzoi env variable unsets to /deactivate.d/env_vars.sh +echo 'unset BASKERVILLE_DIR' >> $file_vars_deact +echo 'unset WESTMINSTER_DIR' >> $file_vars_deact +echo 'unset BORZOI_DIR' >> $file_vars_deact +echo 'unset BORZOI_HG38' >> $file_vars_deact +echo 'unset BORZOI_MM10' >> $file_vars_deact +echo 'unset BORZOI_CONDA' >> $file_vars_deact + +# finally activate env variables +source $file_vars_act diff --git a/examples/CD99_example.gtf b/examples/CD99_example.gtf new file mode 100644 index 0000000..7fc53a4 --- /dev/null +++ b/examples/CD99_example.gtf @@ -0,0 +1,156 @@ +chrX ENSEMBL transcript 2691187 2741300 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL exon 2691187 2691427 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 1; exon_id "ENSE00003729830.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL CDS 2691361 2691427 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 1; exon_id "ENSE00003729830.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL start_codon 2691361 2691363 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 1; exon_id "ENSE00003729830.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL exon 2714422 2714454 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 2; exon_id "ENSE00003535342.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL CDS 2714422 2714454 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 2; exon_id "ENSE00003535342.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL exon 2717605 2717652 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 3; exon_id "ENSE00003474982.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL CDS 2717605 2717652 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 3; exon_id "ENSE00003474982.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL exon 2719661 2719705 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 4; exon_id "ENSE00003586106.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL CDS 2719661 2719705 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 4; exon_id "ENSE00003586106.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL exon 2720356 2720424 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 5; exon_id "ENSE00003578821.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL CDS 2720356 2720424 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 5; exon_id "ENSE00003578821.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL exon 2722627 2722674 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 6; exon_id "ENSE00003552837.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL CDS 2722627 2722674 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 6; exon_id "ENSE00003552837.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL exon 2723314 2723364 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 7; exon_id "ENSE00003612985.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL CDS 2723314 2723364 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 7; exon_id "ENSE00003612985.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL exon 2726260 2726373 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 8; exon_id "ENSE00003785141.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL CDS 2726260 2726373 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 8; exon_id "ENSE00003785141.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL exon 2733357 2733374 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 9; exon_id "ENSE00003715936.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL CDS 2733357 2733361 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 9; exon_id "ENSE00003715936.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL stop_codon 2733362 2733364 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 9; exon_id "ENSE00003715936.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL exon 2738203 2738256 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 10; exon_id "ENSE00003720441.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL exon 2740779 2741300 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 11; exon_id "ENSE00003713358.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL UTR 2691187 2691360 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 1; exon_id "ENSE00003729830.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL UTR 2733362 2733374 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 9; exon_id "ENSE00003715936.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL UTR 2738203 2738256 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 10; exon_id "ENSE00003720441.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX ENSEMBL UTR 2740779 2741300 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000611428.5"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-210"; exon_number 11; exon_id "ENSE00003713358.1"; level 3; protein_id "ENSP00000479999.1"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; +chrX HAVANA transcript 2691276 2741101 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA exon 2691276 2691427 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 1; exon_id "ENSE00001870387.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA CDS 2691361 2691427 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 1; exon_id "ENSE00001870387.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA start_codon 2691361 2691363 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 1; exon_id "ENSE00001870387.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA exon 2714422 2714454 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 2; exon_id "ENSE00003535342.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA CDS 2714422 2714454 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 2; exon_id "ENSE00003535342.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA exon 2719661 2719705 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 3; exon_id "ENSE00003586106.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA CDS 2719661 2719705 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 3; exon_id "ENSE00003586106.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA exon 2720356 2720424 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 4; exon_id "ENSE00003578821.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA CDS 2720356 2720424 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 4; exon_id "ENSE00003578821.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA exon 2722627 2722674 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 5; exon_id "ENSE00003552837.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA CDS 2722627 2722674 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 5; exon_id "ENSE00003552837.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA exon 2723314 2723364 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 6; exon_id "ENSE00003612985.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA CDS 2723314 2723364 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 6; exon_id "ENSE00003612985.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA exon 2726260 2726373 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 7; exon_id "ENSE00003785141.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA CDS 2726260 2726373 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 7; exon_id "ENSE00003785141.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA exon 2738200 2738256 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 8; exon_id "ENSE00001032174.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA CDS 2738200 2738256 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 8; exon_id "ENSE00001032174.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA exon 2740779 2741101 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 9; exon_id "ENSE00001887497.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA CDS 2740779 2740801 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 9; exon_id "ENSE00001887497.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA stop_codon 2740802 2740804 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 9; exon_id "ENSE00001887497.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA UTR 2691276 2691360 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 1; exon_id "ENSE00001870387.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA UTR 2740802 2741101 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381187.8"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-204"; exon_number 9; exon_id "ENSE00001887497.1"; level 2; protein_id "ENSP00000370582.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS48071.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055626.1"; +chrX HAVANA transcript 2691280 2733667 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA exon 2691280 2691427 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 1; exon_id "ENSE00001487750.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA CDS 2691361 2691427 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 1; exon_id "ENSE00001487750.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA start_codon 2691361 2691363 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 1; exon_id "ENSE00001487750.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA exon 2714422 2714454 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 2; exon_id "ENSE00003535342.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA CDS 2714422 2714454 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 2; exon_id "ENSE00003535342.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA exon 2717605 2717652 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 3; exon_id "ENSE00003474982.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA CDS 2717605 2717652 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 3; exon_id "ENSE00003474982.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA exon 2719661 2719705 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 4; exon_id "ENSE00003586106.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA CDS 2719661 2719705 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 4; exon_id "ENSE00003586106.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA exon 2720356 2720424 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 5; exon_id "ENSE00003578821.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA CDS 2720356 2720424 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 5; exon_id "ENSE00003578821.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA exon 2722627 2722674 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 6; exon_id "ENSE00003552837.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA CDS 2722627 2722674 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 6; exon_id "ENSE00003552837.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA exon 2723314 2723364 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 7; exon_id "ENSE00003612985.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA CDS 2723314 2723364 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 7; exon_id "ENSE00003612985.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA exon 2726260 2726373 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 8; exon_id "ENSE00003785141.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA CDS 2726260 2726373 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 8; exon_id "ENSE00003785141.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA exon 2727293 2727343 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 9; exon_id "ENSE00001487748.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA CDS 2727293 2727343 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 9; exon_id "ENSE00001487748.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA exon 2733357 2733667 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 10; exon_id "ENSE00001487747.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA CDS 2733357 2733361 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 10; exon_id "ENSE00001487747.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA stop_codon 2733362 2733364 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 10; exon_id "ENSE00001487747.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA UTR 2691280 2691360 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 1; exon_id "ENSE00001487750.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA UTR 2733362 2733667 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381184.6"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-203"; exon_number 10; exon_id "ENSE00001487747.1"; level 2; protein_id "ENSP00000370579.1"; transcript_support_level "5"; hgnc_id "HGNC:7082"; tag "non_canonical_TEC"; tag "dotter_confirmed"; tag "basic"; tag "appris_alternative_2"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055627.1"; +chrX HAVANA transcript 2691295 2741309 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA exon 2691295 2691427 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 1; exon_id "ENSE00001487792.5"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA CDS 2691361 2691427 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 1; exon_id "ENSE00001487792.5"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA start_codon 2691361 2691363 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 1; exon_id "ENSE00001487792.5"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA exon 2714422 2714454 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 2; exon_id "ENSE00003535342.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA CDS 2714422 2714454 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 2; exon_id "ENSE00003535342.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA exon 2717605 2717652 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 3; exon_id "ENSE00003474982.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA CDS 2717605 2717652 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 3; exon_id "ENSE00003474982.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA exon 2719661 2719705 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 4; exon_id "ENSE00003586106.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA CDS 2719661 2719705 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 4; exon_id "ENSE00003586106.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA exon 2720356 2720424 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 5; exon_id "ENSE00003578821.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA CDS 2720356 2720424 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 5; exon_id "ENSE00003578821.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA exon 2722627 2722674 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 6; exon_id "ENSE00003552837.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA CDS 2722627 2722674 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 6; exon_id "ENSE00003552837.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA exon 2723314 2723364 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 7; exon_id "ENSE00003612985.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA CDS 2723314 2723364 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 7; exon_id "ENSE00003612985.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA exon 2726260 2726373 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 8; exon_id "ENSE00003785141.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA CDS 2726260 2726373 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 8; exon_id "ENSE00003785141.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA exon 2738200 2738256 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 9; exon_id "ENSE00001032174.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA CDS 2738200 2738256 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 9; exon_id "ENSE00001032174.1"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA exon 2740779 2741309 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 10; exon_id "ENSE00001487771.3"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA CDS 2740779 2740801 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 10; exon_id "ENSE00001487771.3"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA stop_codon 2740802 2740804 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 10; exon_id "ENSE00001487771.3"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA UTR 2691295 2691360 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 1; exon_id "ENSE00001487792.5"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA UTR 2740802 2741309 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381192.10"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-205"; exon_number 10; exon_id "ENSE00001487771.3"; level 2; protein_id "ENSP00000370588.3"; transcript_support_level "1"; hgnc_id "HGNC:7082"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_4"; tag "CCDS"; ccdsid "CCDS14119.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055624.3"; +chrX HAVANA transcript 2691310 2733672 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA exon 2691310 2691427 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 1; exon_id "ENSE00003331907.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA CDS 2691361 2691427 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 1; exon_id "ENSE00003331907.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA start_codon 2691361 2691363 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 1; exon_id "ENSE00003331907.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA exon 2714422 2714454 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 2; exon_id "ENSE00003535342.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA CDS 2714422 2714454 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 2; exon_id "ENSE00003535342.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA exon 2717605 2717652 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 3; exon_id "ENSE00003474982.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA CDS 2717605 2717652 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 3; exon_id "ENSE00003474982.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA exon 2719661 2719705 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 4; exon_id "ENSE00003586106.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA CDS 2719661 2719705 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 4; exon_id "ENSE00003586106.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA exon 2720356 2720424 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 5; exon_id "ENSE00003578821.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA CDS 2720356 2720424 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 5; exon_id "ENSE00003578821.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA exon 2722627 2722674 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 6; exon_id "ENSE00003552837.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA CDS 2722627 2722674 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 6; exon_id "ENSE00003552837.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA exon 2723314 2723364 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 7; exon_id "ENSE00003612985.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA CDS 2723314 2723364 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 7; exon_id "ENSE00003612985.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA exon 2726260 2726373 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 8; exon_id "ENSE00003785141.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA CDS 2726260 2726373 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 8; exon_id "ENSE00003785141.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA exon 2733357 2733672 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 9; exon_id "ENSE00001862861.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA CDS 2733357 2733361 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 9; exon_id "ENSE00001862861.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA stop_codon 2733362 2733364 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 9; exon_id "ENSE00001862861.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA UTR 2691310 2691360 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 1; exon_id "ENSE00003331907.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA UTR 2733362 2733672 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000482405.7"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-208"; exon_number 9; exon_id "ENSE00001862861.1"; level 2; protein_id "ENSP00000494027.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS75947.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000087928.3"; +chrX HAVANA transcript 2691313 2741290 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA exon 2691313 2691427 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 1; exon_id "ENSE00003755750.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA CDS 2691361 2691427 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 1; exon_id "ENSE00003755750.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA start_codon 2691361 2691363 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 1; exon_id "ENSE00003755750.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA exon 2714422 2714454 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 2; exon_id "ENSE00003535342.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA CDS 2714422 2714454 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 2; exon_id "ENSE00003535342.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA exon 2717605 2717652 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 3; exon_id "ENSE00003474982.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA CDS 2717605 2717652 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 3; exon_id "ENSE00003474982.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA exon 2719661 2719705 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 4; exon_id "ENSE00003586106.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA CDS 2719661 2719705 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 4; exon_id "ENSE00003586106.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA exon 2720356 2720424 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 5; exon_id "ENSE00003578821.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA CDS 2720356 2720424 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 5; exon_id "ENSE00003578821.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA exon 2722627 2722674 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 6; exon_id "ENSE00003552837.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA CDS 2722627 2722674 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 6; exon_id "ENSE00003552837.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA exon 2723314 2723364 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 7; exon_id "ENSE00003612985.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA CDS 2723314 2723364 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 7; exon_id "ENSE00003612985.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA exon 2726260 2726373 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 8; exon_id "ENSE00003785141.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA CDS 2726260 2726373 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 8; exon_id "ENSE00003785141.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA exon 2738203 2738256 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 9; exon_id "ENSE00003758767.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA CDS 2738203 2738256 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 9; exon_id "ENSE00003758767.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA exon 2740779 2741290 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 10; exon_id "ENSE00003758362.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA CDS 2740779 2740801 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 10; exon_id "ENSE00003758362.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA stop_codon 2740802 2740804 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 10; exon_id "ENSE00003758362.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA UTR 2691313 2691360 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 1; exon_id "ENSE00003755750.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA UTR 2740802 2741290 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000624481.4"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-212"; exon_number 10; exon_id "ENSE00003758362.1"; level 2; protein_id "ENSP00000485427.1"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "NAGNAG_splice_site"; tag "basic"; tag "appris_alternative_2"; tag "CCDS"; ccdsid "CCDS83452.1"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000478890.1"; +chrX HAVANA transcript 2691361 2692253 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381180.9"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-202"; level 2; protein_id "ENSP00000370573.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055625.2"; +chrX HAVANA exon 2691361 2691427 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381180.9"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-202"; exon_number 1; exon_id "ENSE00001902734.1"; level 2; protein_id "ENSP00000370573.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055625.2"; +chrX HAVANA CDS 2691361 2691427 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381180.9"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-202"; exon_number 1; exon_id "ENSE00001902734.1"; level 2; protein_id "ENSP00000370573.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055625.2"; +chrX HAVANA start_codon 2691361 2691363 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381180.9"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-202"; exon_number 1; exon_id "ENSE00001902734.1"; level 2; protein_id "ENSP00000370573.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055625.2"; +chrX HAVANA exon 2691788 2692253 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381180.9"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-202"; exon_number 2; exon_id "ENSE00001669439.3"; level 2; protein_id "ENSP00000370573.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055625.2"; +chrX HAVANA CDS 2691788 2691948 . + 2 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381180.9"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-202"; exon_number 2; exon_id "ENSE00001669439.3"; level 2; protein_id "ENSP00000370573.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055625.2"; +chrX HAVANA stop_codon 2691949 2691951 . + 0 gene_id "ENSG00000002586.20"; transcript_id "ENST00000381180.9"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-202"; exon_number 2; exon_id "ENSE00001669439.3"; level 2; protein_id "ENSP00000370573.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055625.2"; +chrX HAVANA UTR 2691949 2692253 . + . gene_id "ENSG00000002586.20"; transcript_id "ENST00000381180.9"; gene_type "protein_coding"; gene_name "CD99"; transcript_type "protein_coding"; transcript_name "CD99-202"; exon_number 2; exon_id "ENSE00001669439.3"; level 2; protein_id "ENSP00000370573.3"; transcript_support_level "2"; hgnc_id "HGNC:7082"; tag "basic"; havana_gene "OTTHUMG00000021073.12"; havana_transcript "OTTHUMT00000055625.2"; diff --git a/examples/CFHR2_example.gtf b/examples/CFHR2_example.gtf new file mode 100644 index 0000000..82ccfff --- /dev/null +++ b/examples/CFHR2_example.gtf @@ -0,0 +1,15 @@ +chr1 HAVANA transcript 196943738 196959622 . + . gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA exon 196943738 196943938 . + . gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 1; exon_id "ENSE00001920108.3"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA CDS 196943881 196943938 . + 0 gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 1; exon_id "ENSE00001920108.3"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA start_codon 196943881 196943883 . + 0 gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 1; exon_id "ENSE00001920108.3"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA exon 196949455 196949649 . + . gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 2; exon_id "ENSE00003745979.1"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA CDS 196949455 196949649 . + 2 gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 2; exon_id "ENSE00003745979.1"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA exon 196950852 196951028 . + . gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 3; exon_id "ENSE00003831930.1"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA CDS 196950852 196951028 . + 2 gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 3; exon_id "ENSE00003831930.1"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA exon 196957891 196958073 . + . gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 4; exon_id "ENSE00003836915.1"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA CDS 196957891 196958073 . + 2 gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 4; exon_id "ENSE00003836915.1"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA exon 196958881 196959622 . + . gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 5; exon_id "ENSE00003843688.1"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA CDS 196958881 196959077 . + 2 gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 5; exon_id "ENSE00003843688.1"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA stop_codon 196959078 196959080 . + 0 gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 5; exon_id "ENSE00003843688.1"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA UTR 196943738 196943880 . + . gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 1; exon_id "ENSE00001920108.3"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; +chr1 HAVANA UTR 196959078 196959622 . + . gene_id "ENSG00000080910.14"; transcript_id "ENST00000367415.8"; gene_type "protein_coding"; gene_name "CFHR2"; transcript_type "protein_coding"; transcript_name "CFHR2-201"; exon_number 5; exon_id "ENSE00003843688.1"; level 2; protein_id "ENSP00000356385.4"; transcript_support_level "1"; hgnc_id "HGNC:4890"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS30959.1"; havana_gene "OTTHUMG00000036518.4"; havana_transcript "OTTHUMT00000088815.4"; diff --git a/examples/GCFC2_example.gtf b/examples/GCFC2_example.gtf new file mode 100644 index 0000000..f271f76 --- /dev/null +++ b/examples/GCFC2_example.gtf @@ -0,0 +1,126 @@ +chr2 HAVANA transcript 75662705 75710915 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75710591 75710915 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 1; exon_id "ENSE00001261900.4"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75710591 75710855 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 1; exon_id "ENSE00001261900.4"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA start_codon 75710853 75710855 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 1; exon_id "ENSE00001261900.4"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75706523 75706651 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 2; exon_id "ENSE00003651464.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75706523 75706651 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 2; exon_id "ENSE00003651464.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75702199 75702423 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 3; exon_id "ENSE00003608878.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75702199 75702423 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 3; exon_id "ENSE00003608878.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75701190 75701287 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 4; exon_id "ENSE00003668748.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75701190 75701287 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 4; exon_id "ENSE00003668748.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75696200 75696315 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 5; exon_id "ENSE00003492242.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75696200 75696315 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 5; exon_id "ENSE00003492242.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75694241 75694427 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 6; exon_id "ENSE00003663933.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75694241 75694427 . - 1 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 6; exon_id "ENSE00003663933.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75691977 75692100 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 7; exon_id "ENSE00003535454.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75691977 75692100 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 7; exon_id "ENSE00003535454.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75690638 75690719 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 8; exon_id "ENSE00003556420.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75690638 75690719 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 8; exon_id "ENSE00003556420.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75689969 75690081 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 9; exon_id "ENSE00003593081.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75689969 75690081 . - 1 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 9; exon_id "ENSE00003593081.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75689026 75689225 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 10; exon_id "ENSE00003615519.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75689026 75689225 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 10; exon_id "ENSE00003615519.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75687827 75687977 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 11; exon_id "ENSE00003566359.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75687827 75687977 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 11; exon_id "ENSE00003566359.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75680193 75680314 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 12; exon_id "ENSE00003522865.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75680193 75680314 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 12; exon_id "ENSE00003522865.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75673444 75673520 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 13; exon_id "ENSE00003474620.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75673444 75673520 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 13; exon_id "ENSE00003474620.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75671950 75672016 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 14; exon_id "ENSE00003475605.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75671950 75672016 . - 1 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 14; exon_id "ENSE00003475605.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75670138 75670284 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 15; exon_id "ENSE00003563051.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75670138 75670284 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 15; exon_id "ENSE00003563051.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75665929 75666053 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 16; exon_id "ENSE00003582023.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75665929 75666053 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 16; exon_id "ENSE00003582023.1"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA exon 75662705 75664783 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 17; exon_id "ENSE00003465078.2"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA CDS 75664669 75664783 . - 1 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 17; exon_id "ENSE00003465078.2"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA stop_codon 75664666 75664668 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 17; exon_id "ENSE00003465078.2"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA UTR 75710856 75710915 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 1; exon_id "ENSE00001261900.4"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 HAVANA UTR 75662705 75664668 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000321027.8"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-201"; exon_number 17; exon_id "ENSE00003465078.2"; level 2; protein_id "ENSP00000318690.3"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_2"; tag "CCDS"; ccdsid "CCDS1961.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000252255.3"; +chr2 ENSEMBL transcript 75662708 75710985 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75710591 75710985 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 1; exon_id "ENSE00002281229.2"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL CDS 75710591 75710855 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 1; exon_id "ENSE00002281229.2"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL start_codon 75710853 75710855 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 1; exon_id "ENSE00002281229.2"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75706523 75706651 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 2; exon_id "ENSE00003651464.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL CDS 75706523 75706651 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 2; exon_id "ENSE00003651464.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75702199 75702423 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 3; exon_id "ENSE00003608878.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL CDS 75702199 75702423 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 3; exon_id "ENSE00003608878.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75701190 75701287 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 4; exon_id "ENSE00003668748.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL CDS 75701190 75701287 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 4; exon_id "ENSE00003668748.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75694241 75694427 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 5; exon_id "ENSE00003524832.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL CDS 75694383 75694427 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 5; exon_id "ENSE00003524832.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL stop_codon 75694380 75694382 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 5; exon_id "ENSE00003524832.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75691977 75692100 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 6; exon_id "ENSE00003693895.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75690638 75690719 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 7; exon_id "ENSE00003605616.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75689969 75690081 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 8; exon_id "ENSE00003479763.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75689026 75689225 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 9; exon_id "ENSE00003673268.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75687827 75687977 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 10; exon_id "ENSE00003597384.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75680193 75680314 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 11; exon_id "ENSE00003687660.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75673444 75673520 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 12; exon_id "ENSE00003551346.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75671950 75672016 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 13; exon_id "ENSE00003621594.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75670138 75670284 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 14; exon_id "ENSE00003659152.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75665929 75666053 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 15; exon_id "ENSE00003497772.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL exon 75662708 75664783 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 16; exon_id "ENSE00003721530.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75710856 75710985 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 1; exon_id "ENSE00002281229.2"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75694241 75694382 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 5; exon_id "ENSE00003524832.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75691977 75692100 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 6; exon_id "ENSE00003693895.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75690638 75690719 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 7; exon_id "ENSE00003605616.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75689969 75690081 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 8; exon_id "ENSE00003479763.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75689026 75689225 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 9; exon_id "ENSE00003673268.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75687827 75687977 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 10; exon_id "ENSE00003597384.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75680193 75680314 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 11; exon_id "ENSE00003687660.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75673444 75673520 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 12; exon_id "ENSE00003551346.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75671950 75672016 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 13; exon_id "ENSE00003621594.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75670138 75670284 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 14; exon_id "ENSE00003659152.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75665929 75666053 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 15; exon_id "ENSE00003497772.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 ENSEMBL UTR 75662708 75664783 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000541687.5"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-211"; exon_number 16; exon_id "ENSE00003721530.1"; level 3; protein_id "ENSP00000437767.1"; transcript_support_level "2"; hgnc_id "HGNC:1317"; tag "basic"; havana_gene "OTTHUMG00000129989.5"; +chr2 HAVANA transcript 75664419 75710892 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75710591 75710892 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 1; exon_id "ENSE00001910556.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75710591 75710855 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 1; exon_id "ENSE00001910556.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA start_codon 75710853 75710855 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 1; exon_id "ENSE00001910556.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75706523 75706651 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 2; exon_id "ENSE00003651464.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75706523 75706651 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 2; exon_id "ENSE00003651464.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75702313 75702423 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 3; exon_id "ENSE00001577517.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75702313 75702423 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 3; exon_id "ENSE00001577517.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75701190 75701287 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 4; exon_id "ENSE00003668748.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75701190 75701287 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 4; exon_id "ENSE00003668748.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75696200 75696315 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 5; exon_id "ENSE00003492242.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75696200 75696315 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 5; exon_id "ENSE00003492242.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75694241 75694427 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 6; exon_id "ENSE00003663933.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75694241 75694427 . - 1 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 6; exon_id "ENSE00003663933.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75691977 75692100 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 7; exon_id "ENSE00003535454.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75691977 75692100 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 7; exon_id "ENSE00003535454.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75690638 75690719 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 8; exon_id "ENSE00003556420.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75690638 75690719 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 8; exon_id "ENSE00003556420.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75689969 75690081 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 9; exon_id "ENSE00003593081.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75689969 75690081 . - 1 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 9; exon_id "ENSE00003593081.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75689026 75689225 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 10; exon_id "ENSE00003615519.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75689026 75689225 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 10; exon_id "ENSE00003615519.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75687827 75687977 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 11; exon_id "ENSE00003566359.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75687827 75687977 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 11; exon_id "ENSE00003566359.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75680193 75680314 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 12; exon_id "ENSE00003522865.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75680193 75680314 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 12; exon_id "ENSE00003522865.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75673444 75673520 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 13; exon_id "ENSE00003474620.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75673444 75673520 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 13; exon_id "ENSE00003474620.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75671950 75672016 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 14; exon_id "ENSE00003475605.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75671950 75672016 . - 1 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 14; exon_id "ENSE00003475605.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75670138 75670284 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 15; exon_id "ENSE00003563051.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75670138 75670284 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 15; exon_id "ENSE00003563051.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75665929 75666053 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 16; exon_id "ENSE00003582023.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75665929 75666053 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 16; exon_id "ENSE00003582023.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA exon 75664419 75664783 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 17; exon_id "ENSE00001837077.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA CDS 75664669 75664783 . - 1 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 17; exon_id "ENSE00001837077.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA stop_codon 75664666 75664668 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 17; exon_id "ENSE00001837077.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA UTR 75710856 75710892 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 1; exon_id "ENSE00001910556.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA UTR 75664419 75664668 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000409857.7"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-202"; exon_number 17; exon_id "ENSE00001837077.1"; level 1; protein_id "ENSP00000386552.3"; transcript_support_level "5"; hgnc_id "HGNC:1317"; tag "basic"; tag "appris_alternative_2"; tag "exp_conf"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328715.1"; +chr2 HAVANA transcript 75701796 75710899 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000470503.1"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-207"; level 2; protein_id "ENSP00000474481.1"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "CCDS"; ccdsid "CCDS62943.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328714.2"; +chr2 HAVANA exon 75710591 75710899 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000470503.1"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-207"; exon_number 1; exon_id "ENSE00001905495.1"; level 2; protein_id "ENSP00000474481.1"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "CCDS"; ccdsid "CCDS62943.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328714.2"; +chr2 HAVANA CDS 75710591 75710855 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000470503.1"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-207"; exon_number 1; exon_id "ENSE00001905495.1"; level 2; protein_id "ENSP00000474481.1"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "CCDS"; ccdsid "CCDS62943.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328714.2"; +chr2 HAVANA start_codon 75710853 75710855 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000470503.1"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-207"; exon_number 1; exon_id "ENSE00001905495.1"; level 2; protein_id "ENSP00000474481.1"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "CCDS"; ccdsid "CCDS62943.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328714.2"; +chr2 HAVANA exon 75706523 75706651 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000470503.1"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-207"; exon_number 2; exon_id "ENSE00003651464.1"; level 2; protein_id "ENSP00000474481.1"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "CCDS"; ccdsid "CCDS62943.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328714.2"; +chr2 HAVANA CDS 75706523 75706651 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000470503.1"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-207"; exon_number 2; exon_id "ENSE00003651464.1"; level 2; protein_id "ENSP00000474481.1"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "CCDS"; ccdsid "CCDS62943.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328714.2"; +chr2 HAVANA exon 75701796 75702423 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000470503.1"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-207"; exon_number 3; exon_id "ENSE00001956115.1"; level 2; protein_id "ENSP00000474481.1"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "CCDS"; ccdsid "CCDS62943.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328714.2"; +chr2 HAVANA CDS 75702173 75702423 . - 2 gene_id "ENSG00000005436.14"; transcript_id "ENST00000470503.1"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-207"; exon_number 3; exon_id "ENSE00001956115.1"; level 2; protein_id "ENSP00000474481.1"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "CCDS"; ccdsid "CCDS62943.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328714.2"; +chr2 HAVANA stop_codon 75702170 75702172 . - 0 gene_id "ENSG00000005436.14"; transcript_id "ENST00000470503.1"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-207"; exon_number 3; exon_id "ENSE00001956115.1"; level 2; protein_id "ENSP00000474481.1"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "CCDS"; ccdsid "CCDS62943.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328714.2"; +chr2 HAVANA UTR 75710856 75710899 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000470503.1"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-207"; exon_number 1; exon_id "ENSE00001905495.1"; level 2; protein_id "ENSP00000474481.1"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "CCDS"; ccdsid "CCDS62943.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328714.2"; +chr2 HAVANA UTR 75701796 75702172 . - . gene_id "ENSG00000005436.14"; transcript_id "ENST00000470503.1"; gene_type "protein_coding"; gene_name "GCFC2"; transcript_type "protein_coding"; transcript_name "GCFC2-207"; exon_number 3; exon_id "ENSE00001956115.1"; level 2; protein_id "ENSP00000474481.1"; transcript_support_level "1"; hgnc_id "HGNC:1317"; tag "basic"; tag "CCDS"; ccdsid "CCDS62943.1"; havana_gene "OTTHUMG00000129989.5"; havana_transcript "OTTHUMT00000328714.2"; diff --git a/examples/borzoi_example_eqtl_chr10_116952944_T_C.ipynb b/examples/borzoi_example_eqtl_chr10_116952944_T_C.ipynb index 7e1e2e6..b66c8ae 100644 --- a/examples/borzoi_example_eqtl_chr10_116952944_T_C.ipynb +++ b/examples/borzoi_example_eqtl_chr10_116952944_T_C.ipynb @@ -10,9 +10,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-26 11:08:24.099808: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-09-26 17:59:51.449884: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-09-26 17:59:51.449959: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-09-26 17:59:51.451178: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-09-26 17:59:51.459254: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-09-26 11:08:35.392354: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-09-26 17:59:53.076138: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -43,28 +46,25 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "a6315e46-79ce-4653-ba71-242e74516b47", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "bash: /home/jlinder/anaconda3/envs/borzoi_py39_2/lib/libtinfo.so.6: no version information available (required by bash)\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "f0 model already exists.\n", - "f1 model already exists.\n", - "f2 model already exists.\n", - "f3 model already exists.\n", - "Annotation already exists.\n", - "Splice sites already exist.\n", - "PolyA sites already exist.\n", + "f3c0 model already exists.\n", + "f3c1 model already exists.\n", + "f3c2 model already exists.\n", + "f3c3 model already exists.\n", + "Gene annotation already exists.\n", + "Gene annotation (no read-through, protein-coding) already exists.\n", + "Gene annotation (protein-coding) already exists.\n", + "TSS annotation already exists.\n", + "Splice site annotation already exist.\n", + "Splice site annotation already exist.\n", + "PolyA site annotation already exist.\n", "Human genome FASTA already exists.\n" ] } @@ -72,77 +72,100 @@ "source": [ "%%bash\n", "\n", - "#Download model weights\n", - "for rep in f0 f1 f2 f3; do\n", - " mkdir -p \"saved_models/$rep/\"\n", - " local_model=\"saved_models/$rep/model0_best.h5\"\n", + "#Download model weights (data fold 3, 4 replicates)\n", + "for rep in f3c0,f0 f3c1,f1 f3c2,f2 f3c3,f3; do IFS=\",\"; set -- $rep; \n", + " mkdir -p \"saved_models/$1/train\"\n", + " local_model=\"saved_models/$1/train/model0_best.h5\"\n", " if [ -f \"$local_model\" ]; then\n", - " echo \"$rep model already exists.\"\n", + " echo \"$1 model already exists.\"\n", " else\n", - " wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$rep/model0_best.h5\" -O \"$local_model\"\n", + " wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$2/model0_best.h5\" -O \"$local_model\"\n", " fi\n", "done\n", "\n", "#Download and uncompress annotation files\n", - "if [ -f gencode41_basic_nort.gtf ]; then\n", - " echo \"Annotation already exists.\"\n", + "mkdir -p hg38/genes/gencode41\n", + "mkdir -p hg38/genes/polyadb\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_nort.gtf ]; then\n", + " echo \"Gene annotation already exists.\"\n", "else\n", - " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > gencode41_basic_nort.gtf\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_nort.gtf\n", "fi\n", - "if [ -f gencode41_basic_protein_splice.csv.gz ]; then\n", - " echo \"Splice sites already exist.\"\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_nort_protein.gtf ]; then\n", + " echo \"Gene annotation (no read-through, protein-coding) already exists.\"\n", "else\n", - " wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort_protein.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_nort_protein.gtf\n", "fi\n", - "if [ -f polyadb_human_v3.csv.gz ]; then\n", - " echo \"PolyA sites already exist.\"\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein.gtf ]; then\n", + " echo \"Gene annotation (protein-coding) already exists.\"\n", "else\n", - " wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_protein.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_tss2.bed ]; then\n", + " echo \"TSS annotation already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_tss2.bed.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_tss2.bed\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz ]; then\n", + " echo \"Splice site annotation already exist.\"\n", + "else\n", + " wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz -O hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein_splice.gff ]; then\n", + " echo \"Splice site annotation already exist.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.gff.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_protein_splice.gff\n", + "fi\n", + "\n", + "if [ -f hg38/genes/polyadb/polyadb_human_v3.csv.gz ]; then\n", + " echo \"PolyA site annotation already exist.\"\n", + "else\n", + " wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz -O hg38/genes/polyadb/polyadb_human_v3.csv.gz\n", "fi\n", "\n", "#Download and index hg38 genome\n", - "if [ -f hg38.fa ]; then\n", + "mkdir -p hg38/assembly/ucsc\n", + "\n", + "if [ -f hg38/assembly/ucsc/hg38.fa ]; then\n", " echo \"Human genome FASTA already exists.\"\n", "else\n", - " wget -O - http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38.fa\n", - "fi" + " wget -O - http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38/assembly/ucsc/hg38.fa\n", + "fi\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "id": "f3dfe8ad-5c40-44b1-aab6-58491694da5d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Faidx(\"hg38.fa\")" + "Faidx(\"hg38/assembly/ucsc/hg38.fa\")" ] }, - "execution_count": 3, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pyfaidx.Faidx('hg38.fa')" + "pyfaidx.Faidx('hg38/assembly/ucsc/hg38.fa')" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "id": "e5fbf3da", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-09-26 11:10:26.366636: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10372 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:81:00.0, compute capability: 6.1\n" - ] - } - ], + "outputs": [], "source": [ "#Model configuration\n", "\n", @@ -181,7 +204,7 @@ "models = []\n", "for rep_ix in range(n_reps) :\n", " \n", - " model_file = \"saved_models/f\" + str(rep_ix) + \"/model0_best.h5\"\n", + " model_file = \"saved_models/f3c\" + str(rep_ix) + \"/train/model0_best.h5\"\n", "\n", " seqnn_model = seqnn.SeqNN(params_model)\n", " seqnn_model.restore(model_file, 0)\n", @@ -195,7 +218,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "id": "6f010781", "metadata": { "scrolled": true @@ -212,25 +235,25 @@ "source": [ "#Initialize fasta sequence extractor\n", "\n", - "fasta_open = pysam.Fastafile('hg38.fa')\n", + "fasta_open = pysam.Fastafile('hg38/assembly/ucsc/hg38.fa')\n", "\n", "#Load splice site annotation\n", "\n", - "splice_df = pd.read_csv('gencode41_basic_protein_splice.csv.gz', sep='\\t', compression='gzip')\n", + "splice_df = pd.read_csv('hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz', sep='\\t', compression='gzip')\n", "\n", "print(\"len(splice_df) = \" + str(len(splice_df)))\n" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 10, "id": "12df90e4", "metadata": {}, "outputs": [], "source": [ "#Load GTF (optional; needed to compute exon coverage attributions for example gene)\n", "\n", - "transcriptome = bgene.Transcriptome('gencode41_basic_nort.gtf')\n", + "transcriptome = bgene.Transcriptome('hg38/genes/gencode41/gencode41_basic_nort.gtf')\n", "\n", "search_gene = 'ENSG00000187164'\n", "center_pos = 116952944\n", @@ -256,7 +279,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "id": "073e4711", "metadata": {}, "outputs": [ @@ -280,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "id": "4ad40138", "metadata": { "scrolled": true @@ -290,8 +313,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-26 11:12:04.278470: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600\n", - "2023-09-26 11:12:20.024423: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" + "2024-09-25 10:47:49.900745: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", + "2024-09-25 10:47:52.112099: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", + "2024-09-25 10:47:54.815324: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" ] }, { @@ -299,8 +323,8 @@ "output_type": "stream", "text": [ "-- Counts --\n", - " - sum_wt = 134628.16\n", - " - sum_mut = 136860.81\n", + " - sum_wt = 138084.66\n", + " - sum_mut = 140380.31\n", " - max_y_wt = 1051.197\n", " - max_y_mut = 1060.9895\n", " -- (max_y = 1060.9895)\n" @@ -308,7 +332,7 @@ }, { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAABKYAAAC+CAYAAAAP1AcDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA0iUlEQVR4nO3dd3xUVf7/8fckoUiVIiA1BAIhTIpEomQpSUSEKOAKGqULKOBPXdZ1UVmEgCxi2dVdWb/oV5oFRYp+I0WlDaI00URClTZAiIjSQoBAkjm/P/hyvwwJIX1SXs/Hg0eYe+4953PvTDR5c+65NmOMEQAAAAAAAFDCvDxdAAAAAAAAAComgikAAAAAAAB4BMEUAAAAAAAAPIJgCgAAAAAAAB5BMAUAAAAAAACPIJgCAAAAAACARxBMAQAAAAAAwCMIpgAAAAAAAOARBFMAAAAAAADwCIIpAADKgdDQUIWGhiowMFDe3t7W69jY2EL37XA4FBoaet32GTNmaPr06bn2MWPGDE2bNi3PY6alpWns2LFq3bq1goKCFBISokGDBungwYOKiYmxzs9msykoKEihoaHq0qWLJLltu/LnxIkTeR5bkjIzMzV58mQFBATIbrcrNDRUjz/+uE6fPp2vfkq7Bx98UBs3bnTbNnToUNWqVUvnzp1z226z2azz9/X1VWJiYrb+nE6nZs6c6bYtJiZGe/bsKdK6C2Lp0qV6/PHHPV0GAAC4ho+nCwAAAIV3JSRwOp0KDQ3NMTTIzMyUj0/R/q//woUL+uc//6mkpKRc93v88cfVrl07/b//9/9Uu3btXPc1xigmJkbt2rVTUlKSbrrpJrlcLi1atEj79+/X8uXLrX1tNpvWr1+vm2++2a2PnLblx4gRI3Ty5Elt3LhRderUkTFGixYt0smTJwvVb14Ux/uUky1btujkyZPq1KmTtS01NVVffPGFQkJCtHDhQg0bNixffV4JpkaPHm1tu/r98qT77rtPkyZN0t69e+Xv7+/pcgAAwP9ixhQAAOWYr6+vnnvuOYWHh2vo0KE6duyYoqKiFBYWpvbt2+vJJ5+Uy+Wy9n/llVesGUp33nmnzp8/79ZfamqqevTooSlTpkiSFi1apD/84Q+qXr26JGnTpk0KCwtTaGio7Ha7/uu//kuSVLlyZfXo0UPz58+/Yc2rV6+W0+nUjBkzdNNNN0mSvLy89NBDD6l79+4FvhbffPONWrVqpZMnT0qSnnzyST322GPZ9tu3b58WLlyoOXPmqE6dOpIuB2APPvig/Pz8JEmvvfaa2rdvr6CgIA0cOFBnzpzR+fPnVa9ePR07dszqKy4uTn/+858lSXv37tW9996rjh07Kjg4WDNmzLD2s9lsmjRpkjp27KgXXnhBSUlJ6ty5szp06KDAwEBNnTrV2vfs2bOKjY1VQECAunTpolGjRrkFSK+//rrCw8PVoUMH9ezZU4cOHcrxerzzzjsaMGCA27aPP/5Y3bt31zPPPKNZs2bl5/JKkkaPHq09e/YoNDRUffr0keQ+u2rq1Klq166dNZPt0KFDunDhgmJjYxUYGKiQkBD16NFDUvaZetu3b5evr6/1+quvvlLnzp0VFham8PBwrV271rrOf/jDHxQSEqKgoCBNmDDBOuahhx7Se++9l+/zAgAAxcgAAIBy4+DBg6Z27drW6xYtWpgRI0YYl8tljDHmwoUL5uzZs8YYYzIzM829995rPv74Y2OMMXPnzjUdO3Y0p0+fNsYYc/LkSZOZmWnWrl1rQkJCzOHDh02HDh3MvHnzrP6HDx9u3nrrLet1nz59zPz5863XJ0+etP4+b948069fP+t1SEiIOXr0aLZzeOWVV0yfPn3ydL6SzKlTp7Jts9vtJiQkxISEhJjIyEirbdq0aaZ3795mwYIFJiQkxFy4cCFbnwsWLDDBwcHXHXP58uUmICDAGvexxx4zo0ePtv7+2muvGWOMcblcxtfX12zbts1kZmaasLAws2vXLmOMMefOnTNBQUFmy5YtVs2TJ0+2xkhNTTXp6enGGGPOnz9vQkNDzcaNG40xxjz77LNm6NChxuVymdTUVGO3283QoUONMcZ89NFHZuTIkSYzM9MYY8z7779vYmJicjwPPz8/k5SU5LatY8eOZsWKFebSpUumYcOGZvfu3W7X9co5t2jRwiQkJGTr88pn5WpX9j158qSpXbu2OX/+vHUNLly4YJYsWWJ69Ohh7X/ixIkc+0pKSjItWrQwxhizf/9+c+edd5ozZ84YY4zZu3evadSokUlPTzdPP/20mTZtWrb+jDFm3bp1JiwsLMfrAQAAPINb+QAAKOeGDRsmm80mSXK5XHruuef07bffyhij48ePy2636+GHH9bSpUs1evRo61a7K7OFJOnXX39V165d9d577+muu+6yticnJ6tnz57W66ioKL300kvau3evoqOj1blzZ6utUaNGSk5Otl7ndLthTtavX6+nnnpKaWlpGjBggDVb60bH5HTL3fPPP69evXrp8ccf15YtW1S1atU81XC1VatWKTY21up/zJgxevDBByVJjz76qEaOHKlnn31WDodD9erVU1BQkHbu3KkdO3bo4Ycftvo5e/asdu7cqY4dO0qShg8fbrVduHBBTzzxhBITE+Xl5aUjR44oMTFRd955p1avXq033nhDNptNNWvWVGxsrPbt2ydJ+vzzz/X9998rLCxMkpSVlXXd80hOTlbDhg2t10lJSfrll1/Uo0cPeXl5adCgQZo9e7ZeeeWVfF+jnNSqVUv+/v4aNGiQevTooXvvvVdNmzZVSEiIdu3apSeeeELdunVTTEzMDfv68ssvtW/fPnXt2tXa5uXlpcOHD6tr167661//qrS0NHXr1s1tlt21n0EAAOB53MoHAEA5V6NGDevv//znP3X8+HFt3rxZ27Zt04ABA5Senn7DPm6++WbZ7XYtXbpUxhhre7Vq1dyOHzt2rJYtW6Zbb71V48eP1xNPPGG1paenW7fm5ea2225TQkKCMjIyJEldunRRYmKiBg0apNTU1Dyd8/WcPXtWBw4cUPXq1fXbb7/luE+HDh20d+/ePC+YfiX0k6ROnTrJ5XJpy5Ytmjt3rh599FFJl9fNqlu3rhITE60/Bw8e1NChQ61jr36fxo8fr/r16yshIUE//fSTIiMjr/s+XT2+MUYvvPCCNUZSUtJ11/+69r2bNWuWzp49Kz8/P/n6+urjjz/W+++/r8zMzDxdhxvx9vbWpk2bNHbsWB0/flx33nmn1q9fLz8/P+3cuVM9e/bUd999J7vdrlOnTsnHx8ctWLu6VmOM7r77brfrefToUfn7+6tfv3767rvv1LZtW82YMUP33XefWx95+QwCAICSQzAFAEAFcurUKTVq1EhVq1bVsWPHtHDhQqutT58+mjlzps6cOSNJOn36tBUMVKlSRUuWLFFKSooee+wxa12q4OBgtyeu7dmzRy1bttRjjz2m8ePHa9OmTVbbrl27FBIScsMau3fvrmbNmulPf/qTLly4YG2/9ilxBTFixAgNHDhQn376qQYPHpxj+NS6dWv169dPI0aMsJ5CZ4zR4sWLdeDAAXXv3l2ffvqpFZK988471rpI0uVZU2+99ZaWLVtmreHUtm1b1apVS3PmzLH227dvn7Xe1bVOnTqlpk2bysfHR3v27NHKlSuttujoaM2bN0/GGKWlpenTTz+12u6//37NnDnT6jcjI0MJCQk5jnH1e3fp0iV9+OGH2rRpk5xOp5xOp44eParmzZtr2bJlN7yuV9SqVcv6/Fzr7Nmz+vXXX9WlSxe9+OKL6ty5sxISEpScnCybzaY+ffro9ddflzFGR44ckZ+fnw4dOmQFiB988IHV1z333KNVq1Zp27Zt1rYtW7ZIurzGVMOGDTVkyBC9+uqrBfoMAgCAksOtfAAAVCB/+tOf1L9/f7Vv316NGzd2u81p8ODBSklJUUREhHx8fFS9enWtWrXKaq9UqZLmz5+vkSNHauDAgfrggw/Uv39/DR8+3Fqce8aMGVqzZo0qV64sb29v/eMf/7CO//LLL/XSSy9Zr0NDQ7V8+XI1btzYrUabzaYVK1ZowoQJstvtql69umrWrCk/Pz+98MILeTrPLl26yNvb23q9YMECrVy5UidPntSLL74oLy8vjRkzRkOGDNHSpUvdZh1J0uzZszV16lTdcccd8vHxkcvlUteuXXXXXXepV69e2r59uzp16iQvLy8FBwfr7bffdruOzZs3V79+/azbIX18fLR06VKNHTtWb7zxhrKyslS/fv3rLgY/YcIEDR48WPPmzVOrVq0UHR1ttU2cOFEjRoxQu3btVL9+fYWEhFi3FQ4cOFAnTpxQVFSUpMtP+Bs+fLhuu+22bGP0799fX331lbp3767PP/9cLVq0UEBAgNs+AwcO1KxZs9S3b988Xffg4GC1b99edrtdfn5+io+Pt9rOnDmj/v3769y5c7LZbPL399fQoUO1YcMGvfDCCzLGKDMzU4MHD1ZwcLAkady4cQoPD1fDhg3Vq1cvq6/WrVtr/vz5GjVqlM6fP69Lly7ptttu0/z587Vo0SJ9+OGHqly5slwul2bOnGkd9+WXX6p///55OhcAAFAybObq+fgAAAD5dO+99youLs5aKyknO3fu1KhRo7R+/foSrKx8ysjIUFZWlqpWrapz587pnnvu0VNPPaXY2Nh89ZOWlqaIiAht3LjReqpiefb7778rOjpaW7duVeXKlT1dDgAA+F8EUwAAoFD279+vXbt2ua3lc62vvvpKzZo1U2BgYAlWVj4dP35cvXr1UlZWltLT09W3b19Nnz4926yvvFi9erUaNmwou91eDJWWLps3b1ZWVpYiIiI8XQoAALgKwRQAAAAAAAA8gsXPAQAAAAAA4BEEUwAAAAAAAPAIgikAAAAAAAB4BMEUAAAAAAAAPMKnJAZxuVxKSUlRzZo1C/TEGAAAAAAAAJQNxhidPXtWjRs3lpdX7nOiSiSYSklJUbNmzUpiKAAAAAAAAJQCR44cUdOmTXPdp0SCqZo1a1oF1apVqySGBAAAAAAAgAekpqaqWbNmVh6UmxIJpq7cvlerVi2CKQAAAAAAgAogL8s5sfg5AAAAAAAAPIJgCgAAAAAAAB5RIrfyAQAAAAAAlHdZWVnKyMjwdBklplKlSvL29i5UHwRTAAAAAAAAhZSWlqbk5GQZYzxdSomx2Wxq2rSpatSoUeA+CKYAAAAAAAAKISsrS8nJyapWrZpuueWWPC36fTWn06nevXvriy++kK+vb/EUWcSMMfrtt9+UnJwsf3//As+cIpgCAAAAAAAohIyMDBljdMstt+imm27K9/GLFy/Wvn37tGTJEo0fP74YKiwet9xyi5xOpzIyMgocTLH4OQAAAAAAQBHI70ypKz755BO3r2VFQc/3agRTAArMMSzS0yUUvbg4T1cAAAAAoAI5cOCAtm/fLklKSkrSgQMHiqzvJUuWKCwsTKGhoQoICFB0dLRcLpciIyNVr149nTlzxtq3f//+mjt3riRp7ty5ql27tkJDQ9W+fXv16tVLhw8fLrK6rkYwBQBXcTgdni4BAAAAQAWyePFieXldjme8vLy0ZMmSIun3l19+0eOPP64lS5YoMTFRu3fv1uuvv27NcqpVq5amT59+3eOjoqKUmJioHTt2qE2bNvrzn/9cJHVdizWmAAAoK+LimNUHAABQRm3dulXvvfdetu0rVqywnuRnjNFbb72lffv2Zdtv5MiRuv322/M83q+//ipvb2/VrVvX2tahQwfr788995wmTpyop556So0bN861r3vuuUfjxo3L89j5QTAFABUJwUaZ5nA6FOnpIgAAAFAg+/fv17vvvitjjLy9va1ZUsYYt2AqJSVFs2fPliS5XC5lZWXJZrMpKioqX8FUcHCwOnfurBYtWqhbt26KiIjQgAED1KRJE0lSo0aNNGrUKE2aNEn//d//fd1+srKytHDhQoWFhRX01HPFrXwAUIFwqyIAAADgGbGxsVq9erUaNGgg6fKT/DIyMpSZmem2X2ZmptUmSQ0aNNCaNWsUGxubr/G8vLy0ePFibdiwQT179tR3332n9u3bu83G+utf/6qlS5dq9+7d2Y5fu3atQkNDFRYWJpvNpn/84x/5PeU8YcYUAAAAAABACYiKitKOHTv06KOPaunSpTfcv1evXpozZ47q169f4DEDAgIUEBCgUaNGqWfPnoqPj7faatWqpeeee04vvPCCvL29s9X6+eefF3jcvGLGFAAAAAAAQAmpX7++4uPjNW3atFz3mzZtmuLj4wscSh09elTfffed9frUqVM6ePCgWrVq5bbfmDFjlJiYqB9++KFA4xQWwRQAAAAAAEAJstlsqlOnjvWEvJza69ate932vMjMzNSUKVPUpk0bhYaGqkuXLho6dKj69u3rtl+VKlU0ZcoUOZ3OAo9VGNzKBwAAAAAAUMI+/fRT2Ww2GWNy/Lpw4UKNGjWqwP23aNFCX331VY5tDofD7fXgwYM1ePBg6/WwYcM0bNiwAo+dH8yYAgAAAAAAKEEnTpzQunXr5HK55OPjoxo1aujFF19UjRo15OPjI5fLJYfDoZMnT3q61GJHMAUAAAAAAFCC4uPj5XK5JEkdO3bU9u3bNWXKFCUlJen222+XJGVlZbktVF5eEUwBAAAAAACUoBUrVsjLy0uTJk3SN998o+bNm0u6fPvd+vXrNXHiRHl5eWnFihUerrT4scYUAAAAAABACRo7dqz+8pe/6I477sjW5uPjo8mTJysmJkZZWVkeqK5kEUwBAAAAAACUoIiIiBvuk1NoVR5xKx8AACi8uDhPVwAAAIAyiGAKAAAUmsPp8HQJAAAAKIO4lQ8AAAAAAKAYxDniiqffyLz16+vrqypVquimm27ShQsX9Oijj+r555/P9ZhNmzbpsccek4+Pj6ZPn6577rmnCCq+PoIpAAAAAACAcmrBggUKDQ3V0aNHFRgYqOjoaIWHh193/3nz5mnAgAF64YUXSqQ+buUDAAAAAAAo55o0aaKAgAAdOnRIx44d00MPPaTw8HAFBQVpwoQJkqTp06drwYIFmjFjhkJDQ3X69Olir4sZUwAAAAAAAOXc7t27deLECUVGRmrQoEEaP368unXrpszMTN13331auHChnn/+ee3evVuhoaEaO3ZsidRFMAUAAAAAAFBOxcbGysvLS3v27NEbb7yhatWqafXq1fr111+tfdLS0rRnzx6P1EcwBQAAAAAAUE5dWWNq1apV6t27t6KjoyVdXuS8atWqHq6ONaZQ3OLiPF0BAAAAAAAVXvfu3TVmzBhNmDBBUVFRmj59utWWkpKi5ORkj9TFjCkAAAAAAIAK4MUXX1Tr1q21fPlyvfXWW7Lb7bLZbKpevbreeecdNW3atMRrIphCsXI4HYr0dBEAAAAAAHhAXGScR8d3Op1ur+vUqaMTJ05Iku64444cj5k7d24xV+WOW/kAAAAAAADgEQRTAAAAAAAA8AiCKaAsY3F5AAAAAEAZRjAFlGEOp8PTJQAAAAAAUGAEUwAAAAAAAPAIgikAAAAAAAB4BMEUAAAAAABAOXX27FnVqFFDI0aMsLbNnTtX999/vyTJ6XRq5syZHqpO8vHYyAAAAAAAAOVZcT2wKh/9LliwQGFhYVqyZIn+9a9/qUaNGm7tV4Kp0aNHF3GRecOMKQAAAAAAgHJq1qxZeu6559S1a1ctWLAgW/vo0aO1Z88ehYaGqk+fPiVeH8EUAKBgiutffwAAAAAUiZ07d+rIkSO65557NGLECM2aNSvbPjNnzlTbtm2VmJio+Pj4Eq+RYAq4gl+ygXxxOB2eLgEAAABALmbNmqUhQ4bI29tbMTExOnjwoHbt2uXpstywxhTwvxxOhyI9XQQAAAAAAEUgIyNDH3zwgSpVqqT58+dLks6fP69Zs2bJbrd7uLr/w4wpAAAAAACAciY+Pl5+fn46evSonE6nnE6nNm3apA8++EAZGRnWfrVq1dKZM2c8VifBFAAAAAAAQDkza9YsDRw40G1bu3bt1KRJE509e9baFhwcrPbt28tut3tk8XNu5QMAAAAAACgOHlzLePny5Tlu//HHHyVJzzzzjCTJx8dHS5cuLbG6rsWMKQAAAAAAAHgEwRQAAACKhWNYpKdLAAAApRzBFAAAAAAAADyCYAoAAAAAAAAeQTAFAAAAAABQBIwxni6hRBXF+eb5qXz//ve/c21/+umnC10MAAAAAABAWVOpUiXZbDb99ttvuuWWW2Sz2TxdUrEzxui3336TzWZTpUqVCtxPnoOphISE67ZVhAsOAAAAAACQE29vbzVt2lTJyclyOp2eLqfE2Gw2NW3aVN7e3gXuI8/B1Jw5cwo8CAAAAAAAQHlWo0YN+fv7KyMjw9OllJhKlSoVKpSS8hFMXS0lJUXbt29Xenq6ta1Pnz6FKgQAAKAicwyLVORch6fLAAAAheDt7V3ooKaiyXcwNXv2bE2ZMkUnT56Uv7+/fvrpJ915550EUwAAAAAAAMiXfD+V74033lBCQoJatWqlH374QWvWrFGbNm2KozYAAAAAAACUY/kOpipXrqw6deooMzNTktS1a1clJibmvYO4uPwOCQAAAAAAgHIo37fyValSRcYYtWnTRm+++aZatGihtLS0PB/vcDoUmd9BAQAAAAAAUO7kO5iaOnWqUlNT9eqrr2r06NE6ffq03n777eKoDQAAAAAAAOVYvoOp6OhoSVLt2rW1cuXKIi8IAAAAAAAAFUO+g6nMzEwtXrxY+/fvt9aZkqSJEyfmvZO4ONaaAgAAAAAAqODyHUw9/PDDOnbsmMLDw+Xt7V2gQVlnCgAAAAAAAPkOppKSkrR7927ZbLbiqAcAAAAAAAAVhFd+D2jWrJkuXbpUHLUAAAAAAACgAsn3jKnWrVsrMjJSf/zjH1W1alVr+9NPP33jg19+Ob/DAQAAAAAAoJzK94ypixcvKiAgQLt27VJCQoISEhKUmJhYDKUBAAAUAA9YQVnE5xYAUEHle8bUnDlzCjzY+kPrVb1yvodEWcdTGAEAAHLFw4EAABVVvlOi999/P9u2m2++WWFhYWrSpEmRFIXyhR+0AAAlif/vAAAAlB35DqY++ugjffPNN+rcubNsNpu+/fZbhYeH6+eff9abb76phx56qGCVXJlRw8waACj9+G81AAAAgCKQ7zWmatSooYSEBK1cuVJff/21EhISVLduXW3YsEFTp04tcCEOp0MOp6PAxwMASg7/vQaKGGEvAACooPIdTP38888KCAiwXrdt21b79u2Tr6+vvLzy3R0AAECFR9gLAAAqqnwnSTVr1tT7778vY4yMMXr//fdVo0aN4qgNAACgdGOmEwAAQKHkO5iaM2eOZsyYoSpVqqhq1aqaMWOGZs2apXPnzum1114rfEX8gAcAAAAAAFAh5DuYatu2rbZs2aITJ07o999/15YtW9SuXTtVr15dd999d947uk4AxVR2AABQVvBzCwAAQOHk+al8e/fulb+/v7Zt25Zje3BwcL4G5lHOAAAAAAAAFVueg6mxY8dq2bJl6tu3r7XNZrPJGCObzaYDBw7kf/S4uP/7AwAAAAAAgAolz8HU0qVLJUkHDx6UJO3fv1/x8fFq3bq1evfuXagimAYPAAAAAABQ8eR5jam7775biYmJkqSUlBR17NhRX3/9tcaNG6dXXnmlQIM7nI58zZZyDIss0DgAAAAAAAAoffIcTB09elShoaGSpPnz56tbt25asWKFNmzYoI8++qjABdxwthS3+QEAAAAAAJRLeQ6mbrrpJuvvGzZsUExMjCSpTp068vHJ8x2B+cZtfgAAAAAAAOVTnoMpLy8vJScnKy0tTevWrVO3bt2stvPnzxdLcTliBhUAAAAAAEC5kOepTuPHj9dtt90mHx8fRUVFqU2bNpIuz57y9fUtnupyCKEcTocii2c0AAAAAAAAlKA8B1MPPPCAIiIi9Ouvvyo4ONja7uvrq3fffbdYiuM2PgAAAAAAgPIrX4tDNWrUSI0aNXLb1rhx4yItCAAAAAAAABVDnteY8qi4OPfb+lhnCgAAAAAAoMwrncHUNcGTw+lwu62PW/wAAIDH8Q9lAAAAhVYqgymCJwAAUNrx8woAAEDhlcpgCh7Cv/wCAAAAAIASRDAFC//yCwAAAAAASlLpDaZuNHuH2T0AAAAAAABlWqkNpm40e4fZPQAAAAAAAGVbqQ2m8oRZUwAAAAAAAGVWmQ6mmDUFAAAAAABQdpXpYAoAAAAAAABlF8EUAAAAAAAAPIJg6mpxcaxbBQAAAAAAUELKXzBViGDJ4XSwbhUAAAAAAEAJKfPBlGNYpPtrpyPbNuQBM8UAAAAAAEAJK/PBFIoGM8UAAAAAAEBJK9fBFDOnAAAAAAAASq9yHUwBAAAAAIDSiwklKFfBlNsHmjWTAAAAAAAo/fj9vUIrV8HU1UpszaTy9g1U3s4HAAAAAFCqseZxxVY+gikPhinl7RuovJ0PAAAAAAAovcpFMJXXMMUxLJL7VwEAAAAAAEqJchFM5Sqvs6kKMuuqPN/2lt9zK8/XAgAAAAAAFIvyE0xdJxhxOB15miV19ayr3Pa/uq083/aW47nFxWW7zldmoTmcDikujhlpAAAAAAAgz8pNMFXiIVEFnCHkcDpyvc5X2ginAAAAAABAXpSbYOpGiiIsqSizpa647jWrgKEcAAAAAAAoehUmmMqmEOFKtsCmIgQ1V50jt+0BAAAAAICiUGGDqaKc8XS9vspTcHPtOVaEGWMAAAAAAKB4VchgqkCBkYdmRZWncAsAAAAAAOBqFTKYyi/rqXOFVUpv+bvhrYkFqJtADQAAAAAA3AjBlIo3RCmLC6Zz2x4AAAAAACgJFTqYKtZZPdeZZZTTmI5hkcwwKqV4XwAAAAAAKD4VOpjK1f8GSwUNJgo0y+h6t8yV0lsAAQAAAAAACoNg6nozmwp4+1qOs5/i4q4bcF17q9/Vx1tfPXgrHTOGAAAouIMHD6pNmzY6ePCgp0sBAAAolSp8MFWUoc91w6ciGKPYAiJmYwEAUGzmz5+vvXv36uOPP/Z0KQAAAKVShQ+mPCEvIdON9rl2ZtbVs6xu+JS9q/thYXMAAIrNJ5984vYVAAAA7gimikoJzDzKy+2AObYTPgEAUOIOHDig7du3S5KSkpJ04MABD1cEAABQ+hBMFZH8hj/F/SQ+1oYCAMCzFi9eLC+vyz9qeXl5acmSJR6uCAAAoPTx8XQBpVmZC3dYLwoAgBK3detWvffee9m2r1ixQsYYSZIxRm+99Zb27duXbb+RI0cWe40AAAClFcFUGXftU/0AAEDJ2r9/v959910ZY+Tt7W3NkjLGuAVTKSkpmj17tiTJ5XIpKytLNptNUVFRauix6gEAADyLW/nKmMLM4ipzM8AAACgDYmNjtXr1ajVo0ECSlJGRoYyMDGVmZrrtl5mZabVJUoMGDbRmzRrFxsaWeM0AAAClBcFUBZPTk/xwY1wrAEBuoqKitGPHDvXq1StP+/fq1Us7duxQZGRk8RYGAEBZwdI0FRbBVAVxbSBF0HJjV64R1woAkBf169dXfHy8pk2blut+06ZNU3x8vOrXr19ClQEAUPqxNE3FRTBVzhGqFB2uJQDgRmw2m+rUqSObzXbd9rp16163HQAAoKIhmEKxKctBzvVqz+tss7J87gCAwvn000+t4CmnrwsXLvRYbQAAAKUNwRRKxJVAp7QGNvmty6Pnwb3XAFBqnThxQuvWrZPL5ZKPj49q1KihF198UTVq1JCPj49cLpccDodOnjzp6VIBAABKBYIplLjrhjpxcW7hVU77lUS4ldf+c6rVrb4rAVIOQVJegjq3fa7qw+F0lNqADwAquvj4eLlcLklSx44dtX37dk2ZMkVJSUm6/fbbJUlZWVmKj4/3ZJkAAAClho+nC0DFdCVYifSNzHGRu2sDn8i57mHM1dvzO26kb6QV9DiGRea7j5z6vPqrtd3pkK5su0EAdcWVWnLtK4fjAAClw4oVK+Tl5aUXX3xREyZMkI/P5R+1WrRoofXr1+ull17S1KlTtWLFCg0bNsyzxQIAAJQCBFPwqLw+eSG3NZ+uhFvXC5jyEvLcaJyS4unxAQCFM3bsWP3lL3/RHXfcka3Nx8dHkydPVkxMjLKysjxQHQAApVtRTBxA2VMiwZQxRpJ0PiOzJIYr95YN6Jyn/VJTU3XuUvm/5st+XnX564DO6jJzuSRp/eiYvB+fx+tZWqWmpv7fi5df1vpD6yXJuhY5evll6YUXCj32uUuZ7uOXsPWjY3I/zwLw9DkVt6I6vyv/bSnP16o0Ks2fz9JUW0nVktP3gd1uz7btWu3atcu2T3n9nipNn4vSjmsFoKK69nfWK7+f5fXn/Cu/+xX17wUonCv/T7uSB+XGZvKyVyElJyerWbNmxT0MAAAAAAAASokjR46oadOmue5TIsGUy+VSSkqKatasaT0uGQAAAAAAAOWPMUZnz55V48aN5eWV+3P3SiSYAgAAAAAAAK6Ve2wFAAAAAAAAFBOCKQAAAAAAAHgEwRQAAAAAAAA8gmAKAIAi4uvrq8TExDzv379/fzVu3Fg2m02nT592a9u8ebNCQkLUpk0bRUdH6+jRozn28f333ysiIkLVqlXT/fffn+c2SVq3bp06duyo9u3bKzAwUBs3bpQkXbhwQUOGDJHdbpfdblefPn3022+/Sbr8QJNnnnlGgYGBCg4OVlRUlPbt2ydJ+uqrrxQaGmr9ady4sTp06JBt3EmTJslms7ldq7179yoiIkJt2rRRx44dtWPHDqtt+fLl6tChg0JDQ2W32zVv3rzrXtNp06apbdu28vLy0ueff57ntjvuuMOq2263y2azadu2bZKk8+fP65FHHlHr1q3Vpk0bLVq0yDpu9uzZCgoKko+Pj9588023Pvfu3auoqCiFhoYqICBAf/nLX+Ryudz2OX78uBo2bOj2/uR2/SVp1qxZ8vf3V6tWrfTYY48pIyPjutfj2Wef1SeffCIp98/DZ599puDgYIWGhiowMFB/+9vfrMc73+hzNHXqVLVq1UqtWrXS3/72N2v73//+d7fPQ61atfTMM89IktasWaPw8HAFBgaqffv2GjdunHVtHA6HQkNDr3tORWnixIn66KOP8n3c0qVLFRkZWSQ1bNu2Tb169SqSvgAAKJMMAAAoEi1atDAJCQk33C8jI8MYY8zKlSvNr7/+aiSZU6dOWe1ZWVmmVatWZs2aNcYYY1577TXTv3//HPs6cuSI2bx5s5k5c6bp27dvntuOHj1qWrRoYXbu3GmMMSY9Pd2q4Y033jD9+vUzLpfLGGPMyJEjzV//+ldjjDGfffaZCQ8PN5cuXTLGGPPSSy+ZBx98MMfa7r33XvP666+7bdu8ebPp1atXtmsVFRVl5syZY4wxZuHCheb22283xhjjcrlMnTp1zE8//WSMMebgwYOmSpUqJjU1NccxN2/ebPbv32+6detmPvvsszy3XW3hwoXGbrdbrydPnmyGDh1qjDHmwIED5pZbbjG///67McaYxMREs3PnTjN48GDzxhtvuPXTt29f869//csYY8yFCxeM3W43y5Ytc9vn/vvvN8OHD3d7f3K7/gcOHDC33nqr+eWXX4zL5TK9e/c2M2bMyPE8kpOTTbt27ax+cvs8pKammqysLGOMMRcvXjQdO3Y0S5YsueFx69atM4GBgSYtLc2kp6ebsLAws3Tp0my1pKenm7p165qtW7caY4z58ccfzf79+61r84c//MF6/9euXWtCQkJyPKfS4osvvjDdunUrsv769u1rVq9eXWT9AQBQljBjCgCAfNq4caM6d+6skJAQBQcH63/+53+stiVLlqhTp05q2bKlpk6dam2PjIzU008/rU6dOqlHjx6SpO7du6tBgwbZ+v/hhx/k4+OjqKgoSdKoUaP0xRdfKD09Pdu+TZs2VXh4uKpUqZKvtrffflsDBgxQu3btJElVqlTRzTffLEmy2Ww6f/68MjIylJmZqbS0NDVt2tRqu3jxotLT02WMUWpqqtV2tZSUFK1evVqDBw+2tp0/f15PPvmk3nnnHbd9jx8/rq1bt2rQoEGSpH79+unIkSPWTKyrZ5SlpqaqXr16OZ6TJIWHh8vPzy/fbVebNWuWRowYYb1esGCBRo8eLUlq2bKlIiMj9dlnn0mSQkJC1K5duxwfg2yz2XTmzBlJl2dBZWRk6NZbb3Ubp2XLlurSpUu24653/RctWqQ+ffqoUaNGstlsGj16tD7++OMcz2P27Nnq16+fbDabpNw/DzVr1rTOIT09XRcvXszTcQsWLNDgwYNVvXp1ValSRcOHD8+xns8//1zNmjVTWFiYJOm2226z3ouqVasqNDRUTqfT2j8zM9OaNRYWFmbNrnM4HLLb7Tm2Xe3nn39WmzZtJF1+XHXDhg01fvx4SdI333yj6OhoSdKwYcOsmW5xcXGKjY1V7969FRgYqOjoaJ08eVKSlJGRoSeeeEL+/v4KDw/X2rVr3cZ77bXX1L59ewUFBWngwIHW+96kSROlpKRIkh566CFFRERIki5evKh69erp4sWLkqRHHnkk2/cFAAAVBcEUAAD5cPLkSd1///16+eWX9dNPPykxMdEtWDh9+rQ2btyo77//Xq+99prbLXg///yzvvnmG61ZsybXMQ4fPqwWLVpYr2vWrKlatWpZv+DGxMRo69athTqPnTt36sKFC+revbtCQ0P11FNP6dy5c5IuB2E1a9ZUgwYN1LBhQ505c0ZPPvmkJKl3796KjIxUo0aNdOutt2r16tWaMmVKtv7nzp2rmJgYt+Bt3LhxGjNmjJo1a+a275EjR3TrrbfKx8dH0uVgpnnz5jp8+LBsNpsWLFigBx54QC1atFDnzp01b948Va5cuVDnfz1HjhzRunXrrJBMyv5++Pr66vDhwzfs680339TChQvVuHFjNW7cWEOGDNFtt90mSTp48KBmzpypv//979mOy+3656cWh8OhO+64I28nLmnDhg0KCgpSgwYNFB0drb59+97wmLzWc23Yd7Vjx45p0aJFuu+++6xtO3bs0NChQ7V9+3Y999xzevjhh61bC3Nru6JNmza6ePGiDh8+rG3btsnPz0+rV6+WJK1cuVLdu3fPsZbNmzdr7ty52rlzpxo0aGCFRe+++6727NmjHTt26Ntvv9WPP/5oHbNixQrNnj1b3333nZKSklS9enU9//zzkqS77rpLq1atksvl0k8//aQzZ84oNTVV3377rcLCwqywr1OnTlZ9AABUNARTAADkw8aNG9W2bVsrjPLy8lLdunWt9gEDBkiS6tevLz8/Px08eNBqGzRokCpVqlToGpYvX67bb7+9UH1kZmbqm2++0cKFC/X999/r1KlTmjRpkiTp66+/lsvl0rFjx/TLL7/o5ptv1sSJEyVJW7du1fbt23X06FGlpKTorrvusmYTXWGM0ezZs92CiJUrV+rQoUN69NFH813n1KlTtWTJEh06dMiahfX7778X6vyvZ+7cubrvvvtUv379Qvf19ttv65FHHlFKSooOHTqkjz76SCtXrpQxRsOHD9eMGTN00003ZTsut+ufH8nJyWrYsGGe94+IiFBSUpKOHDmiH374QevXr8/3mDk5dOiQvv32Ww0cODBbW2pqqnr37q1x48a5faZ9fX111113Sbo80+jYsWM6cuTIDduudiUUWrVqlQYPHqyLFy/q9OnTWrVqlXX8tXr27Kl69epJuhwW7d+/X5K0evVqDRkyRJUrV1blypU1fPhw65hVq1YpNjbWmnE4ZswYrVy5UtLlWZGrVq1SQkKCQkJCFB0dLYfDka2GRo0a6cSJEznOigQAoLwjmAIAoAhVrVrV+ru3t7cyMzOt1zVq1MhTH82bN9ehQ4es12fPntWZM2fUuHHjIquzefPmuvfee1WnTh1VqlRJjzzyiDZt2iTp8uyQP/7xj6pataoqV66sgQMHWrcuvf/++4qOjtbNN98sLy8vDR06NNttTevWrVN6erruuecea9uaNWv0448/ytfXV76+vkpOTlZMTIy++OILNWvWTL/88ot1rYwxOnz4sJo3b67ExESlpKSoa9eukqSOHTuqadOmSkhI0KpVq6yFtXOaeZRfxhjNmTMn28yea98Pp9Op5s2b37C///znPxo6dKgkqUGDBoqJiZHD4VBqaqq2bdum2NhY+fr66tlnn9XXX39tBRW5Xf/81FKtWrUCBR233HKLYmJitHDhwhvum5d65syZo759+7oFuNLlz3XPnj3Vt29fa1H067HZbNathXltuxIKrVq1St27d1dUVJQ+++wz7d2797rBbm7fv9eOmVutV9ewevVqa5bW1TVdHUylp6fL29u72GYCAgBQmhFMAQCQDxEREdq7d681m8Tlclnr0BSVsLAwZWRkWGHEO++8o969e7v90lxYAwYM0Nq1a601blasWKGQkBBJkp+fn77++msZY2SM0bJly2S32622NWvW6NKlS5IuP53sStsVs2bN0rBhw+Tt7W1te/nll3X06FE5nU45nU41bdpUy5cvV+/evdWgQQN16NBBH374oSRp8eLFatq0qVq3bm2FVrt27ZIk7du3T/v371fbtm3VvXt3JSYmKjEx0e1pcAW1Zs0aZWZm6u6773bb/uCDD2rmzJmSLt+C53A4cnw63bX8/Pz05ZdfSpLOnTuntWvXym63q3bt2jpx4oR1LV5//XX16NHDupUrt+vfr18/xcfH69ixYzLGaObMmXr44YdzHD84OFh79uzJ07nv3r3beire2bNntWzZMgUHB9/wuAcffFAffPCBzp07p4sXL2r27Nlu9bhcrhzDvrS0NPXs2VM9e/bUhAkTsvXrdDqtz/+iRYvUsGFDa52t3Nqudtddd2n16tVyOp1q06aNunfvrsmTJ6tz585un8286N69uz788ENlZGTo0qVLmjNnjlvbp59+qtTUVEmXv1+vrCPXuHFj1a5dWzNnzrTCsaVLl8rpdLo9sXLXrl2y2+05rlUGAEC555El1wEAKMM2btxoIiIiTFBQkAkJCTHx8fHGmOxP5QsLCzNr1641xpgcnwQXExNjmjRpYiSZxo0buz3la8OGDSYoKMj4+/ubbt26mcOHD1ttvXr1Mt9//70xxpjdu3ebJk2amDp16piqVauaJk2amP/85z83bDPGmFdffdW0a9fO2O12Exsbaz2V78SJE6Zfv34mMDDQBAYGmgceeMD89ttvxpjLT1cbOXKkCQgIMEFBQebuu++2nq5mjDGnT5821apVc9uWk2uv1e7du82dd95p/P39TVhYmNm2bZvVNn/+fGO3201wcLCx2+3mo48+um6/L730kmnSpImpXLmyqVevnmnSpIk5fvz4DduMMeaRRx4xEydOzNZnWlqaeeihh4yfn5/x9/c3CxYssNrmzJljmjRpYqpVq2Zq165tmjRpYn788UdjzOUnz0VERJjg4GATEBBgxo0bZz0h72pz5sxxe9pdbtffGGPeffdd4+fnZ/z8/Mzw4cOtJyReKz4+3u1pjrl9HuLi4ky7du1McHCwad++vZk0aZJV640+R5MnTzYtW7Y0LVu2NM8//7xbDV999ZXx9fXNdt5Tp041Pj4+JiQkxPozdepUY8zlp/K1b9/eDBkyxNjtdtOhQwfrmubWlpOgoCDz6KOPGmOMOXv2rKlUqZL597//bbUPHTrUepripEmTzJ/+9Cer7a233rKexnjp0iUzZswY07p1a9OxY0fzzDPPuH2/vvrqqyYwMNDY7XYzYMAAc/r0aavtqaeeMi1btrRed+rUyTzwwANudcbFxZmXXnrpuucBAEB5ZjPmmtUiAQAAUOa5XC6Fh4fr888/z3FGUVnkcDg0duzYHJ/EV1ZdunRJt99+u9asWVMka5sBAFDWMF8YAACgHPLy8tI777wjp9Pp6VKQi4MHD2r69OmEUgCACosZUwAAAAAAAPAIZkwBAAAAAADAIwimAAAAAAAA4BEEUwAAAAAAAPAIgikAAAAAAAB4BMEUAAAAAAAAPIJgCgAAAAAAAB5BMAUAAAAAAACPIJgCAAAAAACARxBMAQAAAAAAwCP+P/JWyzW5q+rNAAAAAElFTkSuQmCC", "text/plain": [ "
" ] @@ -320,8 +344,8 @@ "name": "stdout", "output_type": "stream", "text": [ - " - sum_wt = 75068.61\n", - " - sum_mut = 90392.24\n", + " - sum_wt = 77193.69\n", + " - sum_mut = 93064.84\n", " - max_y_wt = 696.2884\n", " - max_y_mut = 837.6076\n", " -- (max_y = 837.6076)\n" @@ -329,7 +353,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -341,8 +365,8 @@ "name": "stdout", "output_type": "stream", "text": [ - " - sum_wt = 44652.867\n", - " - sum_mut = 45602.734\n", + " - sum_wt = 45951.043\n", + " - sum_mut = 46931.766\n", " - max_y_wt = 425.3462\n", " - max_y_mut = 430.3084\n", " -- (max_y = 430.3084)\n" @@ -350,7 +374,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -362,8 +386,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 56.5 s, sys: 1.8 s, total: 58.3 s\n", - "Wall time: 1min 28s\n" + "CPU times: user 1min 6s, sys: 941 ms, total: 1min 7s\n", + "Wall time: 1min 18s\n" ] } ], @@ -900,7 +924,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.8.15" } }, "nbformat": 4, diff --git a/examples/borzoi_example_ipaqtl_chr10_116664061_G_A.ipynb b/examples/borzoi_example_ipaqtl_chr10_116664061_G_A.ipynb index 723fdbe..a412d73 100644 --- a/examples/borzoi_example_ipaqtl_chr10_116664061_G_A.ipynb +++ b/examples/borzoi_example_ipaqtl_chr10_116664061_G_A.ipynb @@ -10,9 +10,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-27 09:37:40.216505: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-09-26 18:00:42.776653: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-09-26 18:00:42.776733: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-09-26 18:00:42.777952: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-09-26 18:00:42.787432: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-09-27 09:37:43.728456: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-09-26 18:00:44.230820: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -47,24 +50,21 @@ "id": "a8911e01", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "bash: /home/jlinder/anaconda3/envs/borzoi_py39_2/lib/libtinfo.so.6: no version information available (required by bash)\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "f0 model already exists.\n", - "f1 model already exists.\n", - "f2 model already exists.\n", - "f3 model already exists.\n", - "Annotation already exists.\n", - "Splice sites already exist.\n", - "PolyA sites already exist.\n", + "f3c0 model already exists.\n", + "f3c1 model already exists.\n", + "f3c2 model already exists.\n", + "f3c3 model already exists.\n", + "Gene annotation already exists.\n", + "Gene annotation (no read-through, protein-coding) already exists.\n", + "Gene annotation (protein-coding) already exists.\n", + "TSS annotation already exists.\n", + "Splice site annotation already exist.\n", + "Splice site annotation already exist.\n", + "PolyA site annotation already exist.\n", "Human genome FASTA already exists.\n" ] } @@ -72,40 +72,71 @@ "source": [ "%%bash\n", "\n", - "#Download model weights\n", - "for rep in f0 f1 f2 f3; do\n", - " mkdir -p \"saved_models/$rep/\"\n", - " local_model=\"saved_models/$rep/model0_best.h5\"\n", + "#Download model weights (data fold 3, 4 replicates)\n", + "for rep in f3c0,f0 f3c1,f1 f3c2,f2 f3c3,f3; do IFS=\",\"; set -- $rep; \n", + " mkdir -p \"saved_models/$1/train\"\n", + " local_model=\"saved_models/$1/train/model0_best.h5\"\n", " if [ -f \"$local_model\" ]; then\n", - " echo \"$rep model already exists.\"\n", + " echo \"$1 model already exists.\"\n", " else\n", - " wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$rep/model0_best.h5\" -O \"$local_model\"\n", + " wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$2/model0_best.h5\" -O \"$local_model\"\n", " fi\n", "done\n", "\n", "#Download and uncompress annotation files\n", - "if [ -f gencode41_basic_nort.gtf ]; then\n", - " echo \"Annotation already exists.\"\n", + "mkdir -p hg38/genes/gencode41\n", + "mkdir -p hg38/genes/polyadb\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_nort.gtf ]; then\n", + " echo \"Gene annotation already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_nort.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_nort_protein.gtf ]; then\n", + " echo \"Gene annotation (no read-through, protein-coding) already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort_protein.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_nort_protein.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein.gtf ]; then\n", + " echo \"Gene annotation (protein-coding) already exists.\"\n", "else\n", - " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > gencode41_basic_nort.gtf\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_protein.gtf\n", "fi\n", - "if [ -f gencode41_basic_protein_splice.csv.gz ]; then\n", - " echo \"Splice sites already exist.\"\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_tss2.bed ]; then\n", + " echo \"TSS annotation already exists.\"\n", "else\n", - " wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_tss2.bed.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_tss2.bed\n", "fi\n", - "if [ -f polyadb_human_v3.csv.gz ]; then\n", - " echo \"PolyA sites already exist.\"\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz ]; then\n", + " echo \"Splice site annotation already exist.\"\n", "else\n", - " wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz\n", + " wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz -O hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein_splice.gff ]; then\n", + " echo \"Splice site annotation already exist.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.gff.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_protein_splice.gff\n", + "fi\n", + "\n", + "if [ -f hg38/genes/polyadb/polyadb_human_v3.csv.gz ]; then\n", + " echo \"PolyA site annotation already exist.\"\n", + "else\n", + " wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz -O hg38/genes/polyadb/polyadb_human_v3.csv.gz\n", "fi\n", "\n", "#Download and index hg38 genome\n", - "if [ -f hg38.fa ]; then\n", + "mkdir -p hg38/assembly/ucsc\n", + "\n", + "if [ -f hg38/assembly/ucsc/hg38.fa ]; then\n", " echo \"Human genome FASTA already exists.\"\n", "else\n", - " wget -O - http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38.fa\n", - "fi" + " wget -O - http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38/assembly/ucsc/hg38.fa\n", + "fi\n" ] }, { @@ -117,7 +148,7 @@ { "data": { "text/plain": [ - "Faidx(\"hg38.fa\")" + "Faidx(\"hg38/assembly/ucsc/hg38.fa\")" ] }, "execution_count": 3, @@ -126,7 +157,7 @@ } ], "source": [ - "pyfaidx.Faidx('hg38.fa')" + "pyfaidx.Faidx('hg38/assembly/ucsc/hg38.fa')" ] }, { @@ -139,7 +170,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-27 09:38:21.436402: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10372 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:81:00.0, compute capability: 6.1\n" + "2024-09-25 11:00:38.644220: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10232 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1\n" ] } ], @@ -181,7 +212,7 @@ "models = []\n", "for rep_ix in range(n_reps) :\n", " \n", - " model_file = \"saved_models/f\" + str(rep_ix) + \"/model0_best.h5\"\n", + " model_file = \"saved_models/f3c\" + str(rep_ix) + \"/train/model0_best.h5\"\n", "\n", " seqnn_model = seqnn.SeqNN(params_model)\n", " seqnn_model.restore(model_file, 0)\n", @@ -210,11 +241,11 @@ "source": [ "#Initialize fasta sequence extractor\n", "\n", - "fasta_open = pysam.Fastafile('hg38.fa')\n", + "fasta_open = pysam.Fastafile('hg38/assembly/ucsc/hg38.fa')\n", "\n", "#Load splice site annotation\n", "\n", - "splice_df = pd.read_csv('gencode41_basic_protein_splice.csv.gz', sep='\\t', compression='gzip')\n", + "splice_df = pd.read_csv('hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz', sep='\\t', compression='gzip')\n", "\n", "print(\"len(splice_df) = \" + str(len(splice_df)))\n" ] @@ -255,8 +286,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-27 09:39:03.367207: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600\n", - "2023-09-27 09:39:03.874328: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" + "2024-09-25 11:00:55.770144: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", + "2024-09-25 11:00:55.859364: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", + "2024-09-25 11:00:56.180294: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" ] }, { @@ -271,7 +303,7 @@ }, { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAABKYAAAC+CAYAAAAP1AcDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA1AklEQVR4nO3deVxVdf7H8fdlEfctTVMUQlFElqukqbkgkQum1i9LxyUzTa0px5qZSh8tZv4ay5pq8jdj/cbUFhs1nYZMKzfUXNIKXHLJDRVNM3EBRQXu9/eHeX4CF7ysFy6v5+PhI+75nvs9n3vuIS5vvt/vsRljjAAAAAAAAIAy5uXuAgAAAAAAAFA5EUwBAAAAAADALQimAAAAAAAA4BYEUwAAAAAAAHALgikAAAAAAAC4BcEUAAAAAAAA3IJgCgAAAAAAAG5BMAUAAAAAAAC3IJgCAAAAAACAWxBMAQDgAex2u+x2u0JDQ+Xt7W09Hjx4cLH7TkhIkN1uz7d95syZmj59eoF9zJw5U6+88orLx0xPT9fEiRPVsmVLhYeHKzIyUsOHD9ehQ4cUFxdnvT6bzabw8HDZ7XZ169ZNknJsu/bv9OnTLh9bkrKysvTSSy8pJCREYWFhstvtGjt2rM6ePVuofsq7+++/X5s2bcqxbeTIkapdu7YuXLiQY7vNZrNef2BgoJKSkvL0l5ycrFmzZuXYFhcXp71795Zo3UWxdOlSjR071t1lAACAXHzcXQAAACi+ayFBcnKy7Ha709AgKytLPj4l+6M/IyNDf/3rX7Vjx44C9xs7dqzatGmj3//+96pTp06B+xpjFBcXpzZt2mjHjh2qVq2aHA6HPv30Ux04cEDLli2z9rXZbFq/fr3q1q2bow9n2wpj9OjRSk1N1aZNm1SvXj0ZY/Tpp58qNTW1WP26ojTeJ2e2bNmi1NRUde7c2dp2/vx5ff7554qMjNSiRYv00EMPFarPa8HU+PHjrW3Xv1/udPfdd+vFF1/Uvn37FBwc7O5yAADAbxgxBQCABwsMDNQzzzyjjh07auTIkTpx4oR69uypqKgotW3bVo8//rgcDoe1/6uvvmqNUOrUqZMuXryYo7/z58+rV69emjp1qiTp008/1R133KEaNWpIkjZv3qyoqCjZ7XaFhYXpH//4hySpSpUq6tWrl+bPn3/DmletWqXk5GTNnDlT1apVkyR5eXnpgQceUGxsbJHPxbp169SiRQulpqZKkh5//HE98sgjefbbv3+/Fi1apDlz5qhevXqSrgZg999/v4KCgiRJM2bMUNu2bRUeHq5hw4bp3Llzunjxom666SadOHHC6mvKlCl68sknJUn79u1Tv3791KFDB0VERGjmzJnWfjabTS+++KI6dOigSZMmaceOHeratavat2+v0NBQTZs2zdo3LS1NgwcPVkhIiLp166Zx48blCJBef/11dezYUe3bt1efPn10+PBhp+fj3Xff1dChQ3Ns++STTxQbG6unnnpKs2fPLszplSSNHz9ee/fuld1u14ABAyTlHF01bdo0tWnTxhrJdvjwYWVkZGjw4MEKDQ1VZGSkevXqJSnvSL2dO3cqMDDQevzVV1+pa9euioqKUseOHbVmzRrrPN9xxx2KjIxUeHi4nnvuOes5DzzwgP75z38W+nUBAIBSZAAAgMc4dOiQqVOnjvU4ICDAjB492jgcDmOMMRkZGSYtLc0YY0xWVpbp16+f+eSTT4wxxsydO9d06NDBnD171hhjTGpqqsnKyjJr1qwxkZGR5siRI6Z9+/Zm3rx5Vv8PP/yweeedd6zHAwYMMPPnz7cep6amWl/PmzfP3HfffdbjyMhIc+zYsTyv4dVXXzUDBgxw6fVKMmfOnMmzLSwszERGRprIyEgTHR1ttb3yyiumf//+ZsGCBSYyMtJkZGTk6XPBggUmIiIi32MuW7bMhISEWMd95JFHzPjx462vZ8yYYYwxxuFwmMDAQLN9+3aTlZVloqKizO7du40xxly4cMGEh4ebLVu2WDW/9NJL1jHOnz9vLl26ZIwx5uLFi8Zut5tNmzYZY4z505/+ZEaOHGkcDoc5f/68CQsLMyNHjjTGGPPxxx+bMWPGmKysLGOMMR988IGJi4tz+jqCgoLMjh07cmzr0KGDWb58ubly5Ypp1KiR2bNnT47zeu01BwQEmMTExDx9XrtWrndt39TUVFOnTh1z8eJF6xxkZGSYJUuWmF69eln7nz592mlfO3bsMAEBAcYYYw4cOGA6depkzp07Z4wxZt++faZx48bm0qVLZsKECeaVV17J058xxqxdu9ZERUU5PR8AAMA9mMoHAICHe+ihh2Sz2SRJDodDzzzzjL755hsZY/TLL78oLCxMQ4YM0dKlSzV+/Hhrqt210UKSdPLkSXXv3l3//Oc/deedd1rbU1JS1KdPH+txz5499fLLL2vfvn2KiYlR165drbbGjRsrJSXFeuxsuqEz69ev1xNPPKH09HQNHTrUGq11o+c4m3L37LPPqm/fvho7dqy2bNmiqlWrulTD9VauXKnBgwdb/T/66KO6//77JUmjRo3SmDFj9Kc//UkJCQm66aabFB4erl27dunHH3/UkCFDrH7S0tK0a9cudejQQZL08MMPW20ZGRl67LHHlJSUJC8vLx09elRJSUnq1KmTVq1apTfffFM2m021atXS4MGDtX//fknSZ599pq1btyoqKkqSlJ2dne/rSElJUaNGjazHO3bs0M8//6xevXrJy8tLw4cP1/vvv69XX3210OfImdq1ays4OFjDhw9Xr1691K9fP/n7+ysyMlK7d+/WY489ph49eiguLu6GfX355Zfav3+/unfvbm3z8vLSkSNH1L17d/35z39Wenq6evTokWOUXe5rEAAAuB9T+QAA8HA1a9a0vv7rX/+qX375Rd9++622b9+uoUOH6tKlSzfso27dugoLC9PSpUtljLG2V69ePcfzJ06cqC+++EK33HKLJk+erMcee8xqu3TpkjU1ryDt2rVTYmKiMjMzJUndunVTUlKShg8frvPnz7v0mvOTlpamgwcPqkaNGjp16pTTfdq3b699+/a5vGD6tdBPkjp37iyHw6EtW7Zo7ty5GjVqlKSr62bVr19fSUlJ1r9Dhw5p5MiR1nOvf58mT56sBg0aKDExUdu2bVN0dHS+79P1xzfGaNKkSdYxduzYke/6X7nfu9mzZystLU1BQUEKDAzUJ598og8++EBZWVkunYcb8fb21ubNmzVx4kT98ssv6tSpk9avX6+goCDt2rVLffr00YYNGxQWFqYzZ87Ix8cnR7B2fa3GGN111105zuexY8cUHBys++67Txs2bFDr1q01c+ZM3X333Tn6cOUaBAAAZYdgCgCASuTMmTNq3LixqlatqhMnTmjRokVW24ABAzRr1iydO3dOknT27FkrGPDz89OSJUt0/PhxPfLII9a6VBERETnuuLZ3717deuuteuSRRzR58mRt3rzZatu9e7ciIyNvWGNsbKyaNWumP/zhD8rIyLC2575LXFGMHj1aw4YN08KFCzVixAin4VPLli113333afTo0dZd6IwxWrx4sQ4ePKjY2FgtXLjQCsneffdda10k6eqoqXfeeUdffPGFtYZT69atVbt2bc2ZM8fab//+/dZ6V7mdOXNG/v7+8vHx0d69e7VixQqrLSYmRvPmzZMxRunp6Vq4cKHVds8992jWrFlWv5mZmUpMTHR6jOvfuytXruijjz7S5s2blZycrOTkZB07dkzNmzfXF198ccPzek3t2rWt6ye3tLQ0nTx5Ut26ddPzzz+vrl27KjExUSkpKbLZbBowYIBef/11GWN09OhRBQUF6fDhw1aA+OGHH1p99e7dWytXrtT27dutbVu2bJF0dY2pRo0a6cEHH9Rrr71WpGsQAACUHabyAQBQifzhD3/QoEGD1LZtWzVp0iTHNKcRI0bo+PHj6tKli3x8fFSjRg2tXLnSavf19dX8+fM1ZswYDRs2TB9++KEGDRqkhx9+2Fqce+bMmVq9erWqVKkib29vvfHGG9bzv/zyS7388svWY7vdrmXLlqlJkyY5arTZbFq+fLmee+45hYWFqUaNGqpVq5aCgoI0adIkl15nt27d5O3tbT1esGCBVqxYodTUVD3//PPy8vLSo48+qgcffFBLly7NMepIkt5//31NmzZNt99+u3x8fORwONS9e3fdeeed6tu3r3bu3KnOnTvLy8tLERER+vvf/57jPDZv3lz33XefNR3Sx8dHS5cu1cSJE/Xmm28qOztbDRo0yHcx+Oeee04jRozQvHnz1KJFC8XExFhtL7zwgkaPHq02bdqoQYMGioyMtKYVDhs2TKdPn1bPnj0lXb3D38MPP6x27drlOcagQYP01VdfKTY2Vp999pkCAgIUEhKSY59hw4Zp9uzZGjhwoEvnPSIiQm3btlVYWJiCgoIUHx9vtZ07d06DBg3ShQsXZLPZFBwcrJEjR2rjxo2aNGmSjDHKysrSiBEjFBERIUl6+umn1bFjRzVq1Eh9+/a1+mrZsqXmz5+vcePG6eLFi7py5YratWun+fPn69NPP9VHH32kKlWqyOFwaNasWdbzvvzySw0aNMil1wIAAMqGzVw/Hh8AAKCQ+vXrpylTplhrJTmza9cujRs3TuvXry/DyjxTZmamsrOzVbVqVV24cEG9e/fWE088ocGDBxeqn/T0dHXp0kWbNm2y7qroyX799VfFxMTou+++U5UqVdxdDgAA+A3BFAAAKJYDBw5o9+7dOdbyye2rr75Ss2bNFBoaWoaVeaZffvlFffv2VXZ2ti5duqSBAwdq+vTpeUZ9uWLVqlVq1KiRwsLCSqHS8uXbb79Vdna2unTp4u5SAADAdQimAAAAAAAA4BYsfg4AAAAAAAC3IJgCAAAAAACAWxBMAQAAAAAAwC0IpgAAAAAAAOAWPmVxEIfDoePHj6tWrVpFumMMAAAAAAAAKgZjjNLS0tSkSRN5eRU8JqpMgqnjx4+rWbNmZXEoAAAAAAAAlANHjx6Vv79/gfuUSTBVq1Ytq6DatWuXxSEBAADcZqakNEm1JD1ejH6GSDovqbakf5XxsT1FWZ4Pzr37uPvcu/v4AFDenD9/Xs2aNbPyoIKUSTB1bfpe7dq1CaYAAIDHqyrpym//Lc4nH19d/bDmW4h+SurYnqIszwfn3n3cfe7dfXwAKK9cWc6Jxc8BAAAAAADgFgRTAAAAAAAAcIsymcoHAAAAAADg6bKzs5WZmenuMsqMr6+vvL29i9UHwRQAAAAAAEAxpaenKyUlRcYYd5dSZmw2m/z9/VWzZs0i90EwBQAAAMCzTZni2jYAKKLs7GylpKSoevXqatiwoUuLfl8vOTlZ/fv31+eff67AwMDSKbKEGWN06tQppaSkKDg4uMgjpwimAAAAAAAAiiEzM1PGGDVs2FDVqlUr9PMXL16s/fv3a8mSJZo8eXIpVFg6GjZsqOTkZGVmZhJMAQAAAKUi98gaRtoAAPJR2JFS1/zrX/+y/luRgqmivt7rcVc+AAAAAJXPlCk5/wGAmxw8eFA7d+6UJO3YsUMHDx4ssb6XLFmiqKgo2e12hYSEKCYmRg6HQ9HR0brpppt07tw5a99BgwZp7ty5kqS5c+eqTp06stvtatu2rfr27asjR46UWF3XI5gCAAAAAGcIrwCUgcWLF8vL62o84+XlpSVLlpRIvz///LPGjh2rJUuWKCkpSXv27NHrr79ujXKqXbu2pk+fnu/ze/bsqaSkJP34449q1aqVnnzyyRKpKzem8gEAAAAAwROAUvbdd9/pn//8Z57ty5cvt+7kZ4zRO++8o/379+fZb8yYMbrttttcPt7Jkyfl7e2t+vXrW9vat29vff3MM8/ohRde0BNPPKEmTZoU2Ffv3r319NNPu3zswiCYAgAAAAAAKGUHDhzQe++9J2OMvL29rVFSxpgcwdTx48f1/vvvS5IcDoeys7Nls9nUs2fPQgVTERER6tq1qwICAtSjRw916dJFQ4cOVdOmTSVJjRs31rhx4/Tiiy/qf//3f/PtJzs7W4sWLVJUVFRRX3qBCKYAAAC4lXzlxcLmAIAyMnjwYN18880aMmSITp8+rczMTKf7ZWVlWV97e3vr5ptv1oIFCxQdHV2o43l5eWnx4sXas2eP1q5dq+XLl+u///u/9d1331n7/PnPf1br1q21Z8+ePM9fs2aN7Ha7pKsjrd54441CHd9VBFMAAADANQRTAIBS1LNnT/34448aNWqUli5desP9+/btqzlz5qhBgwZFPmZISIhCQkI0btw49enTR/Hx8VZb7dq19cwzz2jSpEny9vbOU+tnn31W5OO6isXPAQAAAAAAykiDBg0UHx+vV155pcD9XnnlFcXHxxc5lDp27Jg2bNhgPT5z5owOHTqkFi1a5Njv0UcfVVJSkr7//vsiHae4GDEFAAAAAK5g2i+AEmKz2VSvXj3ZbDZrfanc7fXr17fuoFcUWVlZmjp1qg4dOqTq1asrKytLI0eO1MCBA/Xmm29a+/n5+Wnq1Kl68MEHi3ys4iCYAgAAAAAAKGMLFy60giln/120aJHGjRtX5P4DAgL01VdfOW1LSEjI8XjEiBEaMWKE9fihhx7SQw89VORjFwbBFAAAgDOuLIrNwtkAAKAITp8+rbVr18rhcMjHx0fVqlXTxIkT9dZbbykjI0NZWVlKSEhQamqq6tev7+5ySxXBFAAAAADPQkgMoJyLj4+Xw+GQJHXo0EH/+te/1Lx5c40ePVpDhgzR5s2blZ2drfj4+DIbueQuBFMAAACu4BddAABQQpYvXy4vLy89//zzeu655+TjczWeCQgI0Pr16/Xyyy9r2rRpWr58OcEUAAAA4BEIFwEA5cTEiRP1xz/+UbfffnueNh8fH7300kuKi4tTdna2G6orWwRTAAAAADxaQnJCnm3RgdFlXgcAXNOlS5cb7uMstPJEXu4uAAAAAAAAAJUTI6YAAACAAuQebRPtlioAAPBMBFMAAAAAAAClYErClNLpN9q1fgMDA+Xn56dq1aopIyNDo0aN0rPPPlvgczZv3qxHHnlEPj4+mj59unr37l0CFeePYAoAAAAAAMBDLViwQHa7XceOHVNoaKhiYmLUsWPHfPefN2+ehg4dqkmTJpVJfQRTAACg8uHubCgOZ9cP11SFk2eKppPF0F3ZBwAqiqZNmyokJESHDx9W8+bNNWHCBCUnJysjI0MDBw7UtGnTNH36dC1YsEDVqlXTggULlJCQoLp165ZqXQRTAAAAAAAAHm7Pnj06ffq0oqOjNXz4cE2ePFk9evRQVlaW7r77bi1atEjPPvus9uzZI7vdrokTJ5ZJXQRTAAAAACq93KOjAMBTDB48WF5eXtq7d6/efPNNVa9eXatWrdLJkyetfdLT07V371631EcwBQAAgEqLMAKF4ex6iS7zKgCgcK6tMbVy5Ur1799fMTExkq4ucl61alU3Vyd5ubsAAAAAAAAAlK7Y2Fg9+uijeu6559SzZ09Nnz7dajt+/LhSUlLcUhcjpgAAAAB4FEbCAYBzzz//vFq2bKlly5bpnXfeUVhYmGw2m2rUqKF3331X/v7+ZV4TwRQAAAAAAEApmBI9xa3HT05OzvG4Xr16On36tCTp9ttvd/qcuXPnlnJVOTGVDwAAAAAAAG5BMAUAAAAAAAC3YCofAADwbFOmuLsCAAAA5INgCgAAAJUCC2IDAFD+EEwBAACUlN9GZ3WWdFmS33XbAAAAkBdrTAEAAAAAAMAtCKYAAAAAAAA8VFpammrWrKnRo0db2+bOnat77rlHkpScnKxZs2a5qTqm8gEAAACF4mytqugyrwIAUCGU1pT+QvS7YMECRUVFacmSJXr77bdVs2bNHO3Xgqnx48eXcJGuYcQUAAAAAACAh5o9e7aeeeYZde/eXQsWLMjTPn78eO3du1d2u10DBgwo8/oIpgAAAAAAADzQrl27dPToUfXu3VujR4/W7Nmz8+wza9YstW7dWklJSYqPjy/zGpnKBwAAUBHkHrLP3f4AAMANzJ49Ww8++KC8vb0VFxencePGaffu3e4uKweCKQAAAAAAAA+TmZmpDz/8UL6+vpo/f74k6eLFi5o9e7bCwsLcXN3/I5gCAACVntPFrAOjy7wOAACAkhIfH6+goCBt3rzZ2rZ7925FR0dr2rRp1rbatWvr3Llz7ihREmtMAQAAAAAAeJzZs2dr2LBhOba1adNGTZs2VVpamrUtIiJCbdu2VVhYmFsWP2fEFAAAAAAAQGlw45qQy5Ytc7r9hx9+kCQ99dRTkiQfHx8tXbq0zOrKjRFTAAAAAAAAcAtGTAEAABRR7rWpWJcKAACgcAimAAAAXOBsgXQAAAAUD8EUAACAEwRRAAAApY81pgAAAAAAAEqAMcbdJZSpkni9Lo+Y+tvf/lZg+4QJE4pdDAAAAFAhTZmizpIuS/K7bhsAoHLw9fWVzWbTqVOn1LBhQ9lsNneXVOqMMTp16pRsNpt8fX2L3I/LwVRiYmK+bZXhhAMAAJQrzkIPghAAANzC29tb/v7+SklJUXJysrvLKTM2m03+/v7y9vYuch8uB1Nz5swp8kEAAAAAoFQ4G60GAG5Qs2ZNBQcHKzMz092llBlfX99ihVJSERc/P378uHbu3KlLly5Z2wYMGFCsQgAAAAAAACoyb2/vYgc1lU2hg6n3339fU6dOVWpqqoKDg7Vt2zZ16tSJYAoAAFQYpXXHvWv9HpWULclbUudSORIAAIBnKPRd+d58800lJiaqRYsW+v7777V69Wq1atWqNGoDAAAAAACAByv0iKkqVaqoXr16ysrKkiR1795dEydOLOm6AAAAAOCGEpITcoxSvOzmegAAhVPoYMrPz0/GGLVq1UpvvfWWAgIClJ6eXhq1AQAA4De5px9GB0a7pQ4AAICSVOhgatq0aTp//rxee+01jR8/XmfPntXf//730qgNAAAAAAAAHqzQwVRMTIwkqU6dOlqxYkWJFwQAAAAAAIDKodDBVFZWlhYvXqwDBw5Y60xJ0gsvvFCihQEAAAAVhbN1jqLdWhEAABVDoYOpIUOG6MSJE+rYsaO8vb1LoyYAAIASk3ttJgAAAJQfhQ6mduzYoT179shms5VGPQAAAAAAAKgkvAr7hGbNmunKlSulUQsAAAAAAAAqkUKPmGrZsqWio6N17733qmrVqtb2CRMmlGhhAAAAAAAA8GyFDqYuX76skJAQ7d6929rGtD4AAAAAAAAUVqGDqTlz5pRGHQAAAAAAAKhkCh1MffDBB3m21a1bV1FRUWratGmJFAUAAFBkU6a4uwIAAAC4qNDB1Mcff6x169apa9eustls+uabb9SxY0f99NNPeuutt/TAAw+URp0AAAC4TkJyQp5t0WVeBQAAQPEUOpiqWbOmEhMTFRISIknau3evJk2apI0bN2rAgAEEUwAAAMX126ivIZIuS/JzZy0ACjZlijrruu9VRm0CQKF4FfYJP/30kxVKSVLr1q21f/9+BQYGysur0N0BAAAAAACgkip0klSrVi198MEHMsbIGKMPPvhANWvWLI3aAAAAAAAA4MEKHUzNmTNHM2fOlJ+fn6pWraqZM2dq9uzZunDhgmbMmFEaNQIAAAAAAMADFXqNqdatW2vLli1KS0uTdHUE1TV33XVXyVUGAAAAAAAAj+ZyMLVv3z4FBwdr+/btTtsjIiJKrCgAAAAAAAB4PpeDqYkTJ+qLL77QwIEDrW02m03GGNlsNh08eLBUCgQAVBC570LEXYkAAAAA3IDLwdTSpUslSYcOHZIkHThwQPHx8WrZsqX69+9fOtUBAAAAQDmWkJygo5KyJXlL6uzmegCgonF58fO77rpLSUlJkqTjx4+rQ4cO+vrrr/X000/r1VdfLa36AAAAAAAA4KFcHjF17Ngx2e12SdL8+fPVo0cP/fvf/9aZM2fUo0cPPfPMM6VVIwAAQKWSkJwgSfpV/z8K4xY31gMAAFBaXA6mqlWrZn29ceNGxcXFSZLq1asnH59C39wPAFCRsX4UyguuRQAAgArN5UTJy8tLKSkpqlu3rtauXavp06dbbRcvXiyV4gAA5QS//AMAAAAoBS4HU5MnT1a7du3k4+Ojnj17qlWrVpKujp4KDAwsrfoAAAAAAADgoVwOpv7rv/5LXbp00cmTJxUREWFtDwwM1HvvvVcqxQEAAKAQpkxRZ0mXJfn99hgAAKA8K9TiUI0bN1bjxo1zbGvSpEmJFgQAAAAAAIDKgVXLAaAyYzQFAKCCmZIwJcfjaLdUAQAoKQRTAIASce329tdEu6UKAEBFljt0AgB4PoIpAAAAoAJxFt5Mic67DQCAioBgCgAAeJTco/dQeSU8FK2GkupL8nZ3MQAAwCmCKQBAofGLPwAAAICSQDAFAJVJERc7L1IQ5exYLLYOACim6LkJ7i4BAFCCCKYAoLwh0AHyx/cC4DFcWeicEAoAPB/BFABUBLl/GeeXcwBOJCQn6KikbF1dU6mzm+tB6XAW1kzRlJyPK+hi6ARRAFD5EEwBAHJg/SgAKF9yjyyKLsJzpIobVgEAPBvBFABURCU03Y8QCgAAAIA7EUwBgLsxLQ8AAABAJUUwBQCegnWoAAAAAFQwBFMAAABAOeHKneqccWnR8OgidV1qWOgcACARTAFAueNs3afowOjCd+RkxBRrSsHTcE2jMijLACd3MMaC6QCA0kYwBQAVQO5fvosUVJUxpwFbmVcBAAAAoDwjmAKAssQoJsB1rJOGSojpbQCAyoZgCgAqoBKb7gcAbuZsTaXKNH0s9+uPdksVAAC4D8EUAAAAAJQUV0Z7MiIUACwEUwDgISriOlRFlvsDPR/wgQqjqHed81Tlbepennqi3VEFAKAyIZgCgDJU6deTIlACUEqcBV7RZV5FwSpCjSg+V37WRzv7+cfPRACVFMEUAKDi4wM+4FFyBzjO1pyqiCOvytvoKJQzTAEEUEkRTAGAh/KY0Vl8CEcF50rIAs9U3hY2dyXMiy71KiC5OKrKk6fkA8B1CKYAAECF4TGBayXhUhDiZBRRwkPRhe4H5RvTGAvP6R14mRIPwAMRTAEAAKDQCItujKl7KGl5bnTCVHYAHoBgCgDgPnx4RiXkLNCpTNP7CGvcJ/e5zz0yDQBcxug9lCCCKQAAAJQrBChloyxDQgLJ0uHSdD9nCBFQTIzeQ0kimAIAAECFQ9ABOOc0rGIhdZQyp9ddUTtjNFalQzAFACgzef66VsQPyiXVD8o3Fjqv+EoqPCKEAoonz89Nt1SBSseFQMmlO1QWuxCUdwRTAFCa+AtPmSjRv9K5E8PgUY7lXhuLsAgAPIALnz2KOp2aPzDBVQRTAIByjQ81qAzyhD5uqQJApVHUP4QwxcrjFHmdMqAEEUwBQCkiVCkY58cF/BIAAChhLo00dvLzhimB5VwJLXzP5zOUNYIpAAAqEndP9yMoAwDPlHv6FuFEhePSe1YR73Lq7s8+lZizaZylMZWfYAoAADjHh74Sc+bnM/ro6Y80/LXhuverbXnai7p+R1liTSnAsxFElXOV+GeyS9MNK/H5KUmufB7JvU9JfD4gmAIAoJwo01t8l9QHOP6KeUPRcxP00fbDSk1J1YUZ8VJEgLtLAoASUVajKSodF6ZRVnZMKy2+kvqjWEn8f4BgCgAAdynCOg9Og6oSWlMCpWf1oV8kSWsOndJwgikAnoyfSQWqCCNkKyJPCUmLen248lrL87VHMAUAQAVS5FFVRfglwJW/zroUlFXyX1KOp2Xo0NmLkqSDZy/oeFqGmtSq5uaqAKB0uPSzowynYZXYtKMijhAuz2GAxytvnz9KcR23in6dEUwBAFAJldSHIZeCsko+3W/d4V9lk2Qk2SStP/yrBoc1y7FPRfyrLgAUVZ7RwGX5c6Ikp8lV8DDA0xVlMXhX/tjnymefyj71MuGhaF24kuXy/gRTAABUcEUe2VRKXJp+6IH2/pqmL/b9LElK1/8HUT8eS82x35I9x3QsLSPP8/sF36LWDWqVfqEumJIwJc+26DKvAkBl4fQX/TI8FnBNUa8PrqviIZgCAMBNyvJDjDs/MLlyNx1P+EB3LC1DS3/6+WogZZO8ZLPazHX/PX3xipbtO/HbYyOHuRpg2RvXLTfBFAC4W1Gm4FX06UxAZUUwBQAlpRJNTUIu3LIYkmJuvVn1qvrq5XW7de5yprKNcbrf9du9bFK9qr56oUeo7I3rllGlN8bUQgAAUFYIpgAAQJnzhBFSzrS7pZ7mDOygKRv2KCkl9Yb73960vp65I0R1qvqWQXX5yz11L9otVQBAAUpybSgA5QrBFACUED4cAZCkOlV99WRMmOJ3HtXiHw7lu9+Y9rdqaFgz2Wy2fPcpLmdrRU2JzrsNAMo7PmcBnotgCkDlU95uHYsKL89i326pAuWJzWZTjSoFf8yqVcWnVEMpKZ8pedEu7gcAAFAGCKYAeJYSWlA5z22LCaoAFNKW5FOy2STz2+Lm1+7SZ3R1bam1yac0oHWTMq+LO+4BAIDyhGAKQMVVimsNMAIGQHGkXcrU3hNnZSR526Qq3t4aFNpUn+46pivZ2co2UtLJszp/OVO1/cp2fSlGRwEAgPKEYAqA2zm7tW90YM5tbl9XgBFTKC6uoUol8ehpXbv3XkiD2nq+exs1qllVccG36OW1u7Tr1zQ5jLTx6Gn1adnYrbUCAAC4U9kGU3/5ixJ+3nTD3TzlL3nOftnOrSiv1ZV+S+pYnsLptIUinI+inkPes5xcOR9uD6JyKW/1oOLhGqpcth07LZukgZEBejwiQN5eV9eSalyzqv7Wt50+2H5YH247rG9TUgmmAABApVamwdT6w+tvuBBoUTkdcVGGv9QXNXgo6vPK+7HKm+gS6qcyn0NJJbZ+E1Be5P6eLrGfG4yOqvT6hPqrd9tmatWwtrxztXl72TTKHqjbm9aXwxinzwcAAKgsyiSYMr996LqYmeXS/ucnTcq5IfdjJy5cydv3+fPnb/i89ePj8mzrNmtZkZ4HlDRXrmFXcL2iMnLl+yf3z46ifs/l7ueLn1YWqR94Dv+6NZQtKeNKli7ks09AneqSnH+GuSZDkkOSl5RvP86eky3JuxDP8WRleT488dwX9f+LZf3Zw93n3t3HB4Dy5lr+Y1z4I5zNuLJXMaWkpKhZs2alfRgAAAAAAACUE0ePHpW/v3+B+5RJMOVwOHT8+HHVqlVLNputtA8HAAAAAAAANzHGKC0tTU2aNJGXl1eB+5ZJMAUAAAAAAADkVnBsBQAAAAAAAJQSgikAAAAAAAC4BcEUAAAAAAAA3IJgCgCAMhIYGKikpCSX9x80aJCaNGkim82ms2fP5mj79ttvFRkZqVatWikmJkbHjh1z2sfWrVvVpUsXVa9eXffcc4/LbZK0du1adejQQW3btlVoaKg2bdp0w7a5c+eqTp06stvtstvt6tmzp/WcCxcuaNSoUQoPD1dISIieffZZ6xbCDodDf/rTnxQWFqaQkBCNHj1aV65csZ575MgR9e/fX61bt1ZoaKjeeecdSVJycrK8vb2t49ntdh04cMDpuUhPT1fv3r3VoEED1a1b1+W2go5fnLZXX31VoaGhstvt6tSpk7Zs2WK1FfTe22w2hYeHW693/fr1kqQdO3bkOA+BgYGqX7++03MhSdu2bVO/fv2sx/PmzbP6bdeunZYtW2a1TZgwQYGBgbLZbHmu4S+//FK33XabIiIi1KlTJ23bts1qGzVqlFq1aqXIyEjdcccd2rp1q9UWHR2tzz77LN/6Ssrx48fVrVu3Ij33tttuU0JCQonUcf/992vjxo0l0hcAAB7FAACAMhEQEGASExNvuF9mZqYxxpgVK1aYkydPGknmzJkzVnt2drZp0aKFWb16tTHGmBkzZphBgwY57evo0aPm22+/NbNmzTIDBw50ue3YsWMmICDA7Nq1yxhjzKVLl6waCmqbM2dOnr6umTx5shkxYoRxOBzmypUrpk+fPmbhwoXGGGPee+8907NnT3P58mXjcDjMmDFjzGuvvWaMMcbhcJj27dtb+xpjzIkTJ4wxxhw6dMjUqVPH+YnM5dKlS2bVqlUmMTExz3MKaivo+EVtS0xMNM2bNzdpaWnGGGM+/PBD06FDB2u//N57Y4zTbc78/ve/N48//ni+7X379jXr1q0zxhhz+vRpU6tWLfPzzz8bY4xZv369adiwobXv2rVrzdGjR/Ncw6mpqaZ+/fpm586dxhhj1q1bZ9q2bWu1/+c//7Gu588//9wEBARYbT169DD//ve/b/g63CkqKsqsWbOmRPpKTEw03bp1K5G+AADwJIyYAgCghG3atEldu3ZVZGSkIiIi9J///MdqW7JkiTp37qxbb71V06ZNs7ZHR0drwoQJ6ty5s3r16iVJio2N1c0335yn/++//14+Pj7WaKRx48bp888/16VLl/Ls6+/vr44dO8rPz69QbX//+981dOhQtWnTRpLk5+dnjSQqqK0g27ZtU58+fWSz2eTr66u77rpLH374odUWGxurKlWqyGazqW/fvlbbqlWr5Ofnp/vvv9/qq1GjRjc8Xm5+fn6KiYlxWmtBbQUdv6htNptNmZmZunDhgiTp7Nmz8vf3t/bL77131aVLl/Txxx9r9OjRTtuPHDmiH3/80RpJ5HA4rNs6O6une/fuOR5fc+DAAd10001q27atJKlbt246cuSIfvjhB0nSgAED5OPjI0nq1KmTjh07pqysLOv5q1atUocOHdSyZUv98Y9/tEbQRUdH64knnnDadr2hQ4dq/vz5kq5el1WqVLHOaUxMjNatW6fk5OQc76vNZtMrr7yijh076tZbb9WcOXOsto0bN8putyssLEyjRo3KUev+/fsVGxuriIgI2e12a7TXe++9p7Fjx0qSdu3aJZvNpq+//lqSNHXqVE2dOlWSZLfbderUKe3evdvpewIAQGVFMAUAQAlKTU3VPffco7/85S/atm2bkpKSckwjOnv2rDZt2qStW7dqxowZOabg/fTTT1q3bp1Wr15d4DGOHDmigIAA63GtWrVUu3ZtHT9+XJIUFxen7777rlivY9euXcrIyFBsbKzsdrueeOIJ6xf+gtok6ZtvvpHdbleXLl20aNEia3tUVJQWLVqky5cvKz09XZ999pmSk5Ottvj4eJ0/f16ZmZlauHCh1bZr1y41bNhQQ4YMUbt27XTvvffq4MGDVr8XLlxQhw4d1L59e02dOlXZ2dnFeu3OzkV+xy9qW2RkpJ588kndeuut8vf315tvvpljmt+N3HnnnYqMjNRTTz2V49xfs2TJEgUFBclutzt9/rWpmNc0aNBAs2bNUvv27RUQEKCHH35Yc+fOvWEdwcHBOn36tDVFLT4+XmlpadZ7d723335bcXFxVlAlXT1HGzdu1Pbt27V27Vp98sknLrVdExsbq5UrV0qSVqxYodtuu01r167VxYsXtW3bNnXu3Nlp3X5+ftqyZYuWL1+uCRMmKCsrS1euXNHgwYP1+uuva+fOnfrd736XY1risGHDdP/992v79u1atGiRRo8ercOHD+epoXPnzjkex8bGWn107txZq1atuuF5BQCgMiGYAgCgBG3atEmtW7e2wigvL68c6/wMHTpU0tUgICgoSIcOHbLahg8fLl9f32LXsGzZMt12223F6iMrK0vr1q3TokWLtHXrVp05c0YvvvjiDdvuvvtuHTlyRElJSZo9e7aeeuopbd68WZL07LPPqnnz5rr99tvVr18/dezY0QopHnroIfXp00c9evRQjx491KpVK6stKytLq1ev1vPPP6/ExET17t1bDzzwgCTplltu0bFjx7R161atXLlS69ev1xtvvFGs1+7sXOR3/KK2HTp0SEuWLNH+/fuVkpKiJ598UoMHD3apnsOHD+v777/Xxo0bderUKf35z3/Os8/s2bPzHS0lSSkpKTlGnZ07d05vv/22tmzZosOHD2v27Nm69957c6zz5UydOnX06aefatKkSYqKitLXX3+t0NDQHOGTJH300UdauHCh3nvvvRzbH3zwQfn6+qp69eoaPny4FejcqO2a2NhYrVq1StnZ2dq1a5eeeuop6zro2LFjvt9Pw4YNkySFhITIx8dHJ06c0J49e+Tj42MFSb169VJQUJAkKS0tTT/88IN1ToODg9W1a1etX7/e2ufgwYNauXKl/vKXv2j16tVKT0/Xrl271LFjR+u4jRs3VkpKSoHnFACAyoZgCgCAMlS1alXra29v7xxThWrWrOlSH82bN9fhw4etx2lpaTp37pyaNGlSYnU2b95c/fr1U7169eTr66vf/e53VsBUUFuDBg1UvXp1SVKbNm0UFxenDRs2SJKqVaumt99+W0lJSVq7dq0aNGhgTQGz2WyaMmWKEhMTtXHjRoWGhlptzZs3V7t27azHI0aM0A8//KDMzEz5+flZU97q16+vhx9+2FoMfMKECdZC4Dt27CjWucjv+EVtW7x4scLDw633bNSoUdqwYcMNg6Br9UhSjRo19Nhjj1mv95pDhw5p8+bNVgjqTPXq1XNM/VyxYoXq1q1rTc/s37+/zp8/n+M6y0/Pnj21du1aff/993rjjTd0/PhxhYaGWu0LFizQSy+9pBUrVtxwCqbNZitUW/PmzeXn56ePP/5YUVFRuvPOO7VmzRqtXLlSd955Z759FfR9WJR6YmNjtXz5cu3bt089evSQMUaLFy9W586dc4R0ly5dUrVq1fLtEwCAyohgCgCAEtSlSxft27fPCgscDodSU1NL9BhRUVHKzMzUmjVrJEnvvvuu+vfvn+OX7eIaOnSo1qxZo8uXL0uSli9frsjIyBu2XT818eTJk1q9erXatWsnSTp//rwuXrwo6Wp48o9//EN//OMfJV39hf3MmTOSpF9//VXTp0/X008/LUnq27evUlJSrL6XLVumNm3ayNfXV7/88osyMzMlSZcvX9aSJUus4/3tb39TUlKSkpKSFB4eXuRzUdDxi9oWFBSkDRs2KD09XZK0dOlStWrVSlWqVCmwljNnzljn0OFwaMGCBdbrveb999/XvffeW+C6XxEREdq7d6/1OCgoSElJSTpx4oSkqyP/srKy1KxZsxuen59//tn6+uWXX1ZMTIxatmwpSVq4cKGee+45rVy50grUrvfRRx8pMzNTGRkZmj9/fo5pbwW1XS82NlYvvPCCYmNjrbB00aJF+e6fn5CQEGVlZVnfVytXrrTu8FirVi21b9/eWo9q//79+uabb9S9e3erhhkzZlijo2JiYvTiiy/mqWH37t3W9woAAPiNe9deBwDA82zatMl06dLFhIeHm8jISBMfH2+MyXtXvuvv+OXsDmVxcXGmadOmRpJp0qSJ6dGjh9W2ceNGEx4eboKDg02PHj3MkSNHrLa+ffuarVu3GmOM2bNnj2natKmpV6+eqVq1qmnatKn5n//5nxu2GWPMa6+9Ztq0aWPCwsLM4MGDc9wJLr+2SZMmmdDQUBMZGWnCw8Nz9JeYmGiCg4NNmzZtTHh4uFm8eLHVduLECRMSEmJCQ0NNSEiI+cc//pHjXHz11VcmMjLSREREmG7dupnt27cbY4xZvHixadu2rYmIiDChoaHm8ccfN5cuXcr3vQkPDzeNGzc2NpvNNG3a1AwfPtyltvyOX9Q2h8Nhnn32WdO6dWsTERFhOnfubL777jvrefm999fe92uvd/jw4eb06dPW87Kzs42/v791x8b8XLlyxQQEBOR47ltvvWXatGljIiIiTPv27c3XX39ttY0dO9Y0bdrUeHt7m5tvvtm0aNHCahszZoxp3bq1adGihRk+fHiO68THx8f4+/ubyMhI69+vv/5qjLl6zT/xxBOmQ4cOpkWLFuapp54yDofjhm25LV682Egyhw8fNsZcvQYbNGhg7Z/7zo3KdVfDm266yRw6dMgYY8yGDRtMZGSkCQsLM6NGjTKRkZHW9+i+ffvMnXfeaSIiIkxkZGSO79dff/3V2Gw2M2/ePGPM1fddUo5rIT093TRt2tS6EyMAALjKZoyTW5wAAADAo82YMUOSnK5R5W7R0dGaOHGi7rnnHneXUmJmzZqllJSUHHfjBAAATOUDAAColP7whz+4vK4Zis/Ly0uTJk1ydxkAAJQ7jJgCAAAAAACAWzBiCgAAAAAAAG5BMAUAAAAAAAC3IJgCAAAAAACAWxBMAQAAAAAAwC0IpgAAAAAAAOAWBFMAAAAAAABwC4IpAAAAAAAAuAXBFAAAAAAAANyCYAoAAAAAAABu8X/7pUmo1yPV9wAAAABJRU5ErkJggg==", "text/plain": [ "
" ] @@ -290,7 +322,7 @@ }, { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAABKYAAAC+CAYAAAAP1AcDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAzs0lEQVR4nO3deXiU1d3/8c9kIYBhFSxCIGEPWYfdIEuCqSxWlBZFWSwRBW1d0FpZLsUUfSoFrVVpRZ4iuMCvgKEWEVplCYuAICQsApEtQIhQZA17lvP7I3I/JJmESTKZSSbv13Vxycw5c+7v3DOYmU/OObfNGGMEAAAAAAAAuJmPpwsAAAAAAABA9UQwBQAAAAAAAI8gmAIAAAAAAIBHEEwBAAAAAADAIwimAAAAAAAA4BEEUwAAAAAAAPAIgikAAAAAAAB4BMEUAAAAAAAAPIJgCgAAAAAAAB5BMAUAgJez2+2y2+0KCwuTr6+vdXvo0KHlHjs5OVl2u73Y9hkzZmjq1KkljjFjxgz98Y9/dPqYFy5c0Lhx49SmTRtFRkYqOjpaI0aM0KFDhzRw4EDr+dlsNkVGRsput6tXr16SVOC+639OnTrl9LElKScnR3/4wx8UGhqqiIgI2e12jRkzRmfPni3VOJXdAw88oI0bN2ry5MnWuQoMDFTLli2t22lpaQoJCVFqamqF1mK325WVleWwrUuXLkpOTpYkvfDCC5o/f36F1gIAAFzLz9MFAACAinU9NEhPT5fdbncYIuTk5MjPz7UfCy5fvqw///nP2rlzZ4n9xowZow4dOui3v/2t6tWrV2JfY4wGDhyoDh06aOfOnapVq5by8vL06aef6sCBA1q2bJnV12azad26dapfv36BMRzdVxqjR4/W6dOntXHjRjVo0EDGGH366ac6ffp0ucZ1RkW8To5s3rxZp0+fVkxMjGJiYjRlyhRJUmxsrMaNG6f777/fpce72fNyNvh68cUX1bNnTw0dOlS+vr4uqg4AAFQkZkwBAFBNhYSEaPz48erWrZt+/etf6/jx44qLi1Pnzp0VHh6up556Snl5eVb/P/3pT9YMpTvuuEOXLl0qMN758+d19913WyHGp59+qjvvvFO33HKLJGnTpk3q3Lmz7Ha7IiIi9N5770mSatSoobvvvtupmS4rV65Uenq6ZsyYoVq1akmSfHx89OCDDyo+Pr7M52Lt2rVq3bq1Tp8+LUl66qmn9Pjjjxfpt3//fi1atEhz5sxRgwYNJOUHYA888IBatWolSZo+fbrCw8MVGRmp4cOH69y5c7p06ZJuvfVWHT9+3BorMTFRzz33nCRp3759uueee9S1a1dFRUVpxowZVj+bzaZXXnlFXbt21cSJE7Vz50717NlTnTp1UlhYmF577TWrb1ZWloYOHarQ0FD16tVLY8eO1ahRo6z2N954Q926dVOnTp3Uv39/HT582OH5eP/99zVs2DCnz9/ixYsVExOjli1bWvV8++23Cg0NlTHG6tejRw8tX75c6enpql+/vsaPH69OnTppxowZ2r9/v+Lj4xUVFSW73a7PPvuswDm4PiNtw4YN1nsoISFBOTk5Vr/bbrtNrVu31pdfful07QAAwLMIpgAAqMZOnTqlb775RvPmzVP9+vX1+eefa+vWrdqxY4fS09O1cOFCSdKHH36opKQkrV+/Xtu3b9fy5csVEBBgjXP06FHFxcVpxIgRmjx5sqT8ZX7du3e3+rz++ut64YUXlJqaql27dumhhx6y2mJiYrRy5Urrtt1uV2ZmZpF6t23bpo4dO8rf37/Mz7lXr17WUrS4uDhJUu/evfXYY49p1KhRWrhwodavX693333X4fHbtm2rRo0aORx7+fLl+uCDD/T1119r586duuWWWzRhwgTVrl1bv/rVr/TJJ59Iyp/59eGHH+rRRx9Vbm6uHn74Yb355pvasmWLNm3apFmzZmnLli3WuL6+vtqyZYumT5+ukJAQrVy5Utu2bdPWrVuVlJSkTZs2SZKmTJmiWrVqac+ePVq2bJk2bNhgjTF//nylpaVp48aN2rZtm4YPH67f/OY3Dp9H4dfuZs6ePauNGzdaNR47dkxdunTRrbfeqq+++kqSlJKSopMnT6p///6SpHPnzik8PFzbtm3TuHHjNHz4cD3wwAPasWOHFi1apNGjRxcJzq5du6ahQ4fqjTfe0K5du/Twww9r+/btBfoUfi8BAIDKjWAKAIBqbNSoUbLZbJKkvLw8jR8/XtHR0erYsaO+/fZbawnV0qVL9cQTT1hL7Ro0aGAtlTpx4oR69+6tadOm6ZFHHrHGzsjI0M9+9jPrdlxcnF599VVNmTJF69evt2YcSVKTJk2UkZFh3U5NTVXTpk1vWv+6detkt9vVpk0bKxBz5jGpqalKTU3V6tWrrfsnTJiga9euacyYMVq4cKFq1qzp1Hg3WrFihYYOHWot6XvyySetYCYhIUFz5syRlB/83HrrrYqMjFRaWpq+++47PfTQQ7Lb7erRo4eysrK0e/dua9xHH33U+vvly5f12GOPKTIyUnfccYcOHz5svU4rV65UQkKCbDab6tSpU2Afsc8++0wrVqywZq1NmzZNR44ccfg8Cr92N3N9dlWjRo3UqlUrHTp0SJL07LPPWrO//vrXv+o3v/mN9X7z9/fXiBEjJOXP9Nq2bZtGjx4tSWrbtq169uypdevWFTjO3r175efnZ82Ou/vuu62ZatcVfi8BAIDKjWAKAIBqLDAw0Pr7n//8Z/33v//VN998ox07dmjYsGG6cuXKTceoX7++IiIitHTp0gLLtmrXrl3g8ePGjdMXX3yh22+/XZMmTSowW+fKlSvW0rySdOzYUSkpKcrOzpaUP/spNTVVI0aM0Pnz5516zsXJysrSwYMHdcstt+jkyZMO+3Tq1En79u1zesP06yGMlD+TJy8vT5s3b9bcuXOVkJAgKX/2VMOGDa2wLDU1VYcOHdKvf/1r67E3vk6TJk1So0aNlJKSou3btys2NrbY1+nG4xtjNHHiROsYO3fuLHb/r8Kv3c3cGOL5+vpay+t++ctfaseOHUpJSdGSJUus53z9GD4+xX8UvbH2khTu5+x7CQAAVA4EUwAAQJJ05swZNWnSRDVr1tTx48e1aNEiq23QoEGaOXOmzp07Jyl/6VZubq4kKSAgQIsXL1ZmZqYef/xxa1+qqKgopaWlWWOkpaWpZcuWevzxxzVp0iRr+Zkk7dmzR9HR0TetMT4+Xs2bN9ezzz6ry5cvW/dfvHixfE9e+ZuaDx8+XAsXLtTIkSMdhk9t2rTRr371K40ePdra88gYo6SkJB08eFDx8fFauHChFZK9//77uvvuu63HJyQk6N1339UXX3xhzTJq37696tata82mkvL3srq+31VhZ86cUVBQkPz8/JSWlmbNyJKkvn376sMPP5QxRhcuXLCWYkrS/fffr5kzZ1rjZmdnKyUlxeExCr92ZeXn56cnnnhCgwYN0uDBg4vdHL5OnTrq1KmTdQ7279+v9evXq3fv3gX6hYaGKicnx5rptmLFCh04cKBAH2ffSwAAoHIgmAIAAJLyl1198803Cg8P18iRIwtsJj5y5Ej96le/Uo8ePRQdHa2BAwfq6tWrVru/v7/mz5+v3NxcDR8+XDk5ORoyZIj+85//WH1mzJih8PBwdezYUS+99JLefPNNq+3f//63hgwZYt0ubo8pm82m5cuXy8/PTxEREYqKitKdd96p48ePa8yYMU49zxv3mLLb7UpLS9OMGTN0+vRpvfzyy7rzzjv15JNP6pFHHikwA+y6Dz74QNHR0erevbvCw8MVFhamL7/8Ug0bNtSAAQOUkJCgmJgYRUZG6vz583r99dcLnMd//OMfio+Pt5Yy+vn5aenSpVq8eLGioqIUHh6u0aNHFwjebvTSSy9pzpw5ioqK0oQJE9S3b1+rbfLkycrKylKHDh3Uv39/RUdHW2HQ8OHDNWrUKMXFxSk6Olp2u12rVq1yeIzCr115jB49WseOHdNTTz1VYr958+ZpwYIFio6O1pAhQ/T3v/9dLVq0KNCnRo0aWrBggZ577jlFRkZq/vz5BUIoY4xWrlypwYMHu6R2AABQ8WzG0ScuAAAAF7jnnnuUmJiorl27Fttn9+7dGjt2bJH9hFB62dnZys3NVc2aNXXx4kX169dPTz/9dIG9ppxx4cIF9ejRQxs3brSuqlhWn376qd577z23bEj+73//W5988om1yTwAAKj8/DxdAAAA8F7vvPOO9uzZU2Kfo0eP6v3333dTRd7tzJkzGjBggHJzc3XlyhXdd999evDBB0s9TmBgoN566y0dOnRIERERZa6nf//++v777/XPf/6zzGOUxrlz5zRt2jS3HAsAALgGM6YAAAAAAADgEewxBQAAAAAAAI8gmAIAAAAAAIBHEEwBAAAAAADAIwimAAAAAAAA4BFuuSpfXl6eMjMzVadOHdlsNnccEgAAAAAAAB5gjFFWVpaaNm0qH5+S50S5JZjKzMxU8+bN3XEoAAAAAAAAVAJHjx5VUFBQiX3cEkzVqVPHKqhu3bruOCQAAIDHzJCUJamOpKfKMc5Dks5LqivpH24+trdw5/ng3HuOp8+9p48PAJXN+fPn1bx5cysPKolbgqnry/fq1q1LMAUAALxeTUnXfvpveT75+Cv/w5p/KcZx1bG9hTvPB+feczx97j19fACorJzZzonNzwEAAAAAAOARBFMAAAAAAADwCLcs5QMAAAAAAPB2ubm5ys7O9nQZbuPv7y9fX99yjUEwBQAAAAAAUE4XLlxQRkaGjDGeLsVtbDabgoKCFBgYWOYxCKYAAAASE527DwAAwIHc3FxlZGSodu3aaty4sVObft8oPT1d9957rz7//HOFhIRUTJEuZozRyZMnlZGRobZt25Z55hTBFAAAAAAAQDlkZ2fLGKPGjRurVq1apX58UlKS9u/fr8WLF2vSpEkVUGHFaNy4sdLT05WdnU0wBQAAAFSIwrPnmE0HAChGaWdKXfePf/zD+m9VCqbK+nxvxFX5AAAAAAAAPOTgwYPatWuXJGnnzp06ePCgy8ZevHixOnfuLLvdrtDQUPXt21d5eXmKjY3VrbfeqnPnzll9hwwZorlz50qS5s6dq3r16slutys8PFwDBgzQkSNHXFbXjZgxBQAAAMC7sY8cgEosKSlJPj4+ysvLk4+PjxYvXqwXXnih3OP+8MMPGjNmjLZu3arg4GBJ0rZt26xZTnXr1tXUqVP1+uuvO3x8XFycPvvsM0nSs88+q+eee05JSUnlrqswgikAAABHWL4FAABc6Ntvv9Xf//73IvcvX77cupKfMUbvvvuu9u/fX6TfY489pi5dujh9vBMnTsjX11cNGza07uvUqZP19/Hjx2vy5Ml6+umn1bRp0xLH6tevn1588UWnj10aBFMAAACovgggAQBucuDAAc2aNUvGGPn6+srHJ393JWNMgWAqMzNTH3zwgSQpLy9Pubm5stlsiouLK1UwFRUVpZ49eyo4OFh9+vRRjx49NGzYMDVr1kyS1KRJE40dO1avvPKK/vd//7fYcXJzc7Vo0SJ17ty5rE+9RARTAAAAZUWo4X14DQEAFWTo0KG67bbb9NBDD+nUqVPKzs522C8nJ8f6u6+vr2677TYtWLBAsbGxpTqej4+PkpKStHfvXq1Zs0bLly/X//zP/+jbb7+1+vz+979X+/bttXfv3iKPX716tex2u6T8mVZvvvlmqY7vLIIpAAAAAAAAN4iLi9N3332nhIQELV269Kb9BwwYoDlz5qhRo0ZlPmZoaKhCQ0M1duxY9e/fX0uWLLHa6tatq/Hjx2vixIny9fUtUuv1PaYqEsEUAACAM5hJAwAAXKBRo0ZasmSJpk6dqkmTJhXb749//KMmTJhgbVZeWseOHVN6erruvPNOSdKZM2d06NAhtW7dukC/J598Um+//bYk6Re/+EWZjlUePm4/IgAAAAAAQDVms9nUoEGDYkMnm82mhg0bljmUkvKXBE6ZMkXt2rWT3W5Xr1699Otf/1r33XdfgX4BAQGaMmWK0tPTy3ys8mDGFAAAAKoHZr0BACqRhQsXymazyRjj8L+LFi3S2LFjyzx+cHCw/vOf/zhsS05OLnB75MiRGjlypHV71KhRGjVqVJmPXRrMmAIAAAAAAHCjU6dOac2aNcrLy5Ofn58CAwP18ssvKzAwUH5+fsrLy1NycrJOnz7t6VIrHMEUAAAAgOonMbHgHwBwoyVLligvL0+S1LVrV+3atUtTpkzRzp071aVLF0lSbm5ugY3KvRVL+QAAAAB4F1cFTYXHIcAC4CLLly+Xj4+PXn75Zb300kvy88uPZ4KDg7Vu3Tq9+uqreu2117R8+XK3LanzFIIpAAAAACB0AuBG48aN0+9+9zt17969SJufn5/+8Ic/aODAgcrNzfVAde5FMAUAAAAAAOBGPXr0uGkfR6GVN2KPKQAAAAAAAHgEM6YAAED1w5IdAACASoFgCgAAAChBcnpygduxjoJNws5KrfBrKEmxIbFurwNA9ZOYnFgx48Y6N25ISIgCAgJUq1YtXb58WQkJCZowYUKJj9m0aZMef/xx+fn5aerUqerXr58LKi4ewRQAAAAAOINQEkAVtGDBAtntdh07dkxhYWHq27evunXrVmz/Dz/8UMOGDdPEiRPdUh97TAEAAAAAAHi5Zs2aKTQ0VIcPH9bx48f14IMPqlu3boqMjNRLL70kSZo6daoWLFigGTNmyG636+zZsxVeFzOmAAAAAAAAvNzevXt16tQpxcbGasSIEZo0aZL69OmjnJwc/eIXv9CiRYs0YcIE7d27V3a7XePGjXNLXQRTAAAAqLaK7B/FvkMAAC8zdOhQ+fj4KC0tTW+99ZZq166tlStX6sSJE1afCxcuKC0tzSP1EUwBAAC4yk97zcRIuiop4Ib7AAAAPOH6HlMrVqzQvffeq759+0rK3+S8Zs2aHq6OYAoAAACwOLp6GwAA3iA+Pl5PPvmkXnrpJcXFxWnq1KlK/OkXaJmZmcrLy1NQUJDb6yKYAgAAAOBVCBgBwLGXX35Zbdq00bJly/Tuu+8qIiJCNptNt9xyi95//32CKQAAAAAAAG+RGJvo0eOnp6cXuN2gQQOdOnVKktS9e3eHj5k7d24FV1UQwRQAAACqBWbR4EZsfA8AlQPBFAAA8G5sPg6gjAivAKDi+Xi6AAAAAAAAAFRPzJgCAAAAUO2x1BMAPIMZUwAAAAAAAPAIZkwBAAAAgBMczaqKdXsVAOBdmDEFAAAAlEJyenKRPwAAVFZZWVkKDAzU6NGjrfvmzp2r+++/X5KUnp6umTNneqg6ZkwBAAAAAABUjIq6OnApxl2wYIE6d+6sxYsX6+2331ZgYGCB9uvB1BNPPOHiIp3DjCkAAFDtMQMGAAB4q9mzZ2v8+PHq3bu3FixYUKT9iSeeUFpamux2uwYNGuT2+gimAAAAAAAAvNDu3bt19OhR9evXT6NHj9bs2bOL9Jk5c6bat2+v1NRULVmyxO01spQPAACgKig8Zb+ilgYAAACvMXv2bD3yyCPy9fXVwIEDNXbsWO3Zs8fTZRVAMAUAAAAAAOBlsrOz9fHHH8vf31/z58+XJF26dEmzZ89WRESEh6v7PyzlAwAAAAAA8DJLlixRq1atdOzYMaWnpys9PV2bNm3Sxx9/rOzsbKtf3bp1de7cOY/VyYwpAAAAoLwSExUj6aqkgBvuAwDAU2bPnq3hw4cXuK9Dhw5q1qyZsrKyrPuioqIUHh6uiIgItWrVyu37TBFMAQAAOFCWK/PFhsS6vA4AAFCFefCXFMuWLXN4/7Zt2yRJzz//vCTJz89PS5cudVtdhbGUDwAAAAAAAB5BMAUAAAAAAACPIJgCAAAAAACARxBMAQAAAAAAwCPY/BwAAABA1eXoiogA4CHGGE+X4FaueL5OB1PvvPNOie3PPPNMuYsBAACoyq5fye+opFxJvpJiPFgPAABwD39/f9lsNp08eVKNGzeWzWbzdEkVzhijkydPymazyd/fv8zjOB1MpaSkFNtWHU44AABApeLo8tMevCQ1AADVma+vr4KCgpSRkaH09HRPl+M2NptNQUFB8vX1LfMYTgdTc+bMKfNBAAAAAAAAvFlgYKDatm2r7OxsT5fiNv7+/uUKpaQy7jGVmZmpXbt26cqVK9Z9gwYNKlchAAAAAFBayenJBZbPXvVwPQCqN19f33IHNdVNqYOpDz74QFOmTNHp06fVtm1bbd++XXfccQfBFAAAqDKu7wUFuIqjcCTWoxUBAFA1+JT2AW+99ZZSUlLUunVrbd26VatWrVK7du0qojYAAAAAAAB4sVIHUzVq1FCDBg2Uk5MjSerdu7dSU1NdXRcAAAAAAAC8XKmX8gUEBMgYo3bt2ukvf/mLgoODdeHChYqoDQAAAD8pvPwwNiTWI3UAAAC4UqmDqddee03nz5/XtGnT9MQTT+js2bP629/+VhG1AQAAAAAAwIuVOpjq27evJKlevXr66quvXF4QAAAAAAAAqodSB1M5OTlKSkrSgQMHrH2mJGny5MkuLQwAAAAAAADerdTB1EMPPaTjx4+rW7du8vX1rYiaAAAAAAAAUA2UOpjauXOn9u7dK5vNVhH1AAAAuFThTcMBAABQefiU9gHNmzfXtWvXKqIWAAAAAAAAVCOlnjHVpk0bxcbGavDgwapZs6Z1/zPPPOPSwgAAAAAAAODdSh1MXb16VaGhodqzZ491H8v6AAAAAAAAUFqlDqbmzJlTEXUAAAAAAACgmil1MPXRRx8Vua9+/frq3LmzmjVr5pKiAAAAqrXEREnSQ5KuSgrwZC0AAAAVqNTB1Lx587R27Vr17NlTNptN69evV7du3fT999/rL3/5ix588MGKqBMAAMA5P4U63s7R1QZj3V4FAABA+ZQ6mAoMDFRKSopCQ0MlSWlpaZo4caI2bNigQYMGEUwBQHVVOAyoJuEAAAAAgLLzKe0Dvv/+eyuUkqT27dtr//79CgkJkY9PqYcDAAAAAABANVXqJKlOnTr66KOPZIyRMUYfffSRAgMDK6I2AAAAAAAAeLFSB1Nz5szRjBkzFBAQoJo1a2rGjBmaPXu2Ll68qOnTp1dEjQAAAAAAAPBCpd5jqn379tq8ebOysrIk5c+guu7nP/+56yoDAAAAgMouMVExuuEKmuyxCACl4nQwtW/fPrVt21Y7duxw2B4VFeWyogAAlRwfugEAAAC4gNPB1Lhx4/TFF1/ovvvus+6z2Wwyxshms+ngwYMVUiAAAAAAAAC8k9PB1NKlSyVJhw4dkiQdOHBAS5YsUZs2bXTvvfdWTHUAAAAAAADwWk4HUz//+c/1xhtvyG63KzMzU127dlX37t01a9Ys7d69W+PHj6/IOgEAnsTSPcCtktOTJUk/SsqV5Cvpdg/WAwAAUFGcvirfsWPHZLfbJUnz589Xnz59tHz5cm3YsEHz5s2rqPoAAAAAAADgpZyeMVWrVi3r7xs2bNDAgQMlSQ0aNJCfX6kv7gcAAFB+zOYD4GHJ6ck6qv+b3Rjj4XoAoKpxOlHy8fFRRkaG6tevrzVr1mjq1KlW26VLlyqkOABA1XF96dF1sR6pAgAAAEBV4nQwNWnSJHXs2FF+fn6Ki4tTu3btJOXPngoJCamo+gAAAADAkpicWOB2rEeqAAC4itPB1C9/+Uv16NFDJ06cUFRUlHV/SEiIZs2aVSHFAQA8gKVRQNWVmKgYSVclBfx0GwAAoDIr1eZQTZo0UZMmTQrc17RpU5cWBABwI760AkCVU3jGkCQlxha9DwCAqoBdywEApVZ4PykAACqCoxAOAOBdfDxdAAAAAAAAAKonZkwBAADAKyWPilVjSQ0l+Xq6GDiFGVIAUP0QTAEAboqlewCAyiJ2brKnSwAAuBDBFABUJ+7c7NzRsdhsHW5AkApv5zCYiXV3FQAAuAbBFAAAqDoINwGHCi+B4yp9AICqgmAKAADASySnJ+uopFzl76kU4+F64BqFQ6dYj1Thes7sJ8WyPQDwfgRTAIACWAYFAFWfo9CnKsyiIogCgOrHx9MFAAAAAAAAoHpixhQAVGPMjgKAysWZ5W0AAHgTgikAqGzKurkzm0IDgFdyZnlb4T7Jo2IrpBYAAFyNYAoAvEXhYIqgCgBQSlzdDwDgbgRTAAAAQCXhqs2/HY4T65KhXYaNzgEAEsEUAHheRc1scjAue0oBQOVSeIZSrEeqAADAcwimAAAVwlEIFuv2KuDtCFsBAACqNoIpAABQObFPGgAAgNcjmAIAd3Lii7bDmUYhsS4vBQDgeeyzBACo7gimAKAKKBxWEVQB8BaF91iSvPdKcI6ea6zbqyhZkaAs1hNVAACqE4IpAKiCmFUFAEAl5cwyZJYqA4CFYAoAPMxVmzdXiVlVhT+Il/WDuavGAYAKVOSKeyzbAwCgCIIpAHAjriBWQRwFU4RVQKXkaDlbRY0T65Ijea+qsLSwKnLmZ30sP7cAwEIwBQBeihAMqBwKf/n31v2TXMlbzlllmyHlqlAQFYQlgACqKYIpAABQZRC4eh9H4U3yqNgCtwlUqj5mZxXk1KwqR0vyCa9QWbCtAlyIYAoAULnxQQeosgiUgLLjQieozIrsbcryVJQDwRQAwHP4wIJqyFFYU1WXqhVGEFW5VbalhSg9p2ZaVXgVQFEOg1S3V4GqimAKAAAApVbWEIpwpHridXcjlvvB1Vx1FeXyjAWvRjAFAACASqVwiFF4zylHfQDkYwkgSsWJ8Mil+zuWJZgizPJ6BFMAALcpsh8BH5RRAjY6x3WEUED5FPn565EqUBk5DDJdFAS56ud4rEtGQWVGMAUAqHK8do8NprzDyxAoAUDVwy+G4G4EUwBQkQgVAFQjBFFAFcEvQrwTryFczU3/ryCYAgBUatX+t3aFf/jzodMrFd5IPNYjVZSsSI2EUED1w8+kSs1rPzMRpLqPE/uLxVbAYQmmAKACee0HBBfh/JSBpz+c8aUEAKo8p75sOvj/O3tVVSLV6OevU/tgVaPz4TJO/Bt3pPAFSVzxiyqCKQAAgAp25ocz+uTFTzRi2gg1uL2Bp8sBgKIq8kpsKD9enwKKhKSe/sWdq5S1ZmceV0HvIVdcOZdgCgCASqLMl/guy28NnfkA56pxqrnYucn6ZMdhnc44rYvTl2hwVHCRPo4+1FU2LN0DvFuZvqTyM8BtqnsQdTPuWnLmUmWcseSIM1dSrMzvIYIpAAA8pQwfIsoUVDl5LI9+mfDyLzKrDv1XkrT60EmNcBBMAUBV5NQSK0eq8xJ0F4YRKFmRJWdl/QzlTB8n3mcV+TpX9fcQwRQAAFWIy2ZVuej4Do9d2b4EeFhm1mUdOntJknTw7EVlZl1W0zq1PFwVAFQMZ74guzO8cmrJV1k58wumKjAjtrpw1XvTmUC2qgdF5ZWYnKirF6863Z9gCgCAasiZgMupDTCdCcqq+VKPtYd/lE2SkWSTtO7wjxoa0dzDVQGA5zgVEHjw2E4ry4wcVGplfX9U9yCqsNi5ybp4Lcfp/gRTAABUcU59wHfiw7LLNsEsy/JDL5D2Y5a+2PeDJOmC/i+I+u7Y6QL9Fu89pmNZlws+eOP3uqft7WrfqI5bar2ZxOTEIvfFur0KANVaGWbfenp2EuEEUDYEUwAAeIg7P8B68sNydZnyfizrspZ+/0N+IGWTfGSz2swN/z116ZqW7Tv+022jPJMfYNmb1K80wRQAeFqRX3J4pAoA7kAwBQAA4AJ9W96mBjX99eraPTp3NVu5xjjsd+P9PjapQU1/Te4TJnuT+m6qFAC8RDVaFg54M4IpAHAVPhxVX2z2XWreMEPKkY63N9Cc+7oq8eu9Ss04fdP+3Zs11Pg7Q1Wvpr8bqnNe7NxkT5cAAAV4epkegIpDMAUALuKtX7QBlE69mv56rm+Eluw6qqRth4rt91inlhoW0Vw2m63YPuXlaK+oxFgH9xXqF1sh1QAAABTl4+kCAAAAvI3NZtMtNUr+/V+dGn4VGkoBAABUBcyYAgCgnNigFY5sTj8pm00yP21ufv0qfUb5e0utST+pQe2bVmgNjpbkJSrRqX4AAADuQDAFoPpxZv8f9ggCUA5ZV7KVdvysjCRfm1TD11dDwprp093HdC03V7lGSj1xVuevZqtugHv3lyKEAgAAlQnBFADv4kSg5NReUIU22OSLHMqNsLNaSTl6StevvRfaqK5e7t1BPwusqYFtb9era3Zr949ZyjPShqOn1L9NE4/WCgAA4EkEUwA8z9EXdmcCJndencVBPWx2DqA424+dkk3SfdHBeioqWL4++XtJNQmsqXcGdNRHOw7r4+2H9U3GaYIpAABQrRFMAeVU+EpGjq525EyA4rUzcsoa6FSySwITQnmpwu9PV81qIsis9vqHBalfeHO1a1xXvoXafH1sSrCHqHuzhsozxuHjAQAAqgu3BFPmpw9d58+fd8fhKo3X171e4HbPeeuK9Ok1c1mpx133xMAi960f3uumxyrcZ2KviaU+tlT0eRV3vOqia6HbX8xaUaZxvhjWs8h9ZXl/OFL4PePMuI7eZ0BV4szPnHXfF/z32uv882U61sVrOQVuf/F92f4/AO8RVP8W5Uq6fC1HF4vpE1yvtqSi758bXZaUp/zLKBc3jqPH5EryLcVjvJk7z4c3nvuyfn539+cIT597Tx8fACqbS9n5n2+ME7+EsxlnepVTRkaGmjdvXtGHAQAAAAAAQCVx9OhRBQUFldjHLcFUXl6eMjMzVadOHdlstoo+HAAAAAAAADzEGKOsrCw1bdpUPj4+JfZ1SzAFAAAAAAAAFFZybAUAAAAAAABUEIIpAAAAAAAAeATBFAAAAAAAADyCYAoAADcJCQlRamqq0/2HDBmipk2bymaz6ezZswXavvnmG0VHR6tdu3bq27evjh075nCMLVu2qEePHqpdu7buv/9+p9skac2aNeratavCw8MVFhamjRs33rRt7ty5qlevnux2u+x2u+Li4qzHXLx4UQkJCYqMjFRoaKgmTJhgXUI4Ly9PL7zwgiIiIhQaGqrRo0fr2rVr1mOPHDmie++9V+3bt1dYWJjeffddSVJ6erp8fX2t49ntdh04cMDhubhw4YL69eunRo0aqX79+k63lXT88rT96U9/UlhYmOx2u+644w5t3rzZaivptbfZbIqMjLSe77p16yRJO3fuLHAeQkJC1LBhQ4fnQpK2b9+ue+65x7r94YcfWuN27NhRy5Yts9qeeeYZhYSEyGazFXkP//vf/1aXLl0UFRWlO+64Q9u3b7faEhIS1K5dO0VHR+vOO+/Uli1brLbY2Fh99tlnxdbnKpmZmerVq1eZHtulSxclJye7pI4HHnhAGzZscMlYAAB4FQMAANwiODjYpKSk3LRfdna2McaYr776ypw4ccJIMmfOnLHac3NzTevWrc2qVauMMcZMnz7dDBkyxOFYR48eNd98842ZOXOmue+++5xuO3bsmAkODja7d+82xhhz5coVq4aS2ubMmVNkrOsmTZpkRo4cafLy8sy1a9dM//79zcKFC40xxsyaNcvExcWZq1evmry8PPPYY4+ZadOmGWOMycvLM506dbL6GmPM8ePHjTHGHDp0yNSrV8/xiSzkypUrZuXKlSYlJaXIY0pqK+n4ZW1LSUkxLVq0MFlZWcYYYz7++GPTtWtXq19xr70xxuF9jvz2t781Tz31VLHtAwYMMGvXrjXGGHPq1ClTp04d88MPPxhjjFm3bp1p3Lix1XfNmjXm6NGjRd7Dp0+fNg0bNjS7du0yxhizdu1aEx4ebrX/61//st7Pn3/+uQkODrba+vTpY/75z3/e9Hl4UufOnc3q1atdMlZKSorp1auXS8YCAMCbMGMKAAAX27hxo3r27Kno6GhFRUXpX//6l9W2ePFixcTEqGXLlnrttdes+2NjY/XMM88oJiZGd999tyQpPj5et912W5Hxt27dKj8/P2s20tixY/X555/rypUrRfoGBQWpW7duCggIKFXb3/72Nw0bNkwdOnSQJAUEBFgziUpqK8n27dvVv39/2Ww2+fv76+c//7k+/vhjqy0+Pl41atSQzWbTgAEDrLaVK1cqICBADzzwgDXWz372s5ser7CAgAD17dvXYa0ltZV0/LK22Ww2ZWdn6+LFi5Kks2fPKigoyOpX3GvvrCtXrmjevHkaPXq0w/YjR47ou+++s2YS5eXlWZd1dlRP7969C9y+7sCBA7r11lsVHh4uSerVq5eOHDmibdu2SZIGDRokPz8/SdIdd9yhY8eOKScnx3r8ypUr1bVrV7Vp00a/+93vrBl0sbGxevrppx223WjYsGGaP3++pPz3ZY0aNaxz2rdvX61du1bp6ekFXlebzaY//vGP6tatm1q2bKk5c+ZYbRs2bJDdbldERIQSEhIK1Lp//37Fx8crKipKdrvdmu01a9YsjRkzRpK0e/du2Ww2ffnll5KkKVOmaMqUKZIku92ukydPas+ePQ5fEwAAqiuCKQAAXOj06dO6//779frrr2v79u1KTU0tsIzo7Nmz2rhxo7Zs2aLp06cXWIL3/fffa+3atVq1alWJxzhy5IiCg4Ot23Xq1FHdunWVmZkpSRo4cKC+/fbbcj2P3bt36/Lly4qPj5fdbtfTTz9tfeEvqU2S1q9fL7vdrh49emjRokXW/Z07d9aiRYt09epVXbhwQZ999pnS09OttiVLluj8+fPKzs7WwoULrbbdu3ercePGeuihh9SxY0cNHjxYBw8etMa9ePGiunbtqk6dOmnKlCnKzc0t13N3dC6KO35Z26Kjo/Xcc8+pZcuWCgoK0ltvvVVgmd/N3HXXXYqOjtbzzz9f4Nxft3jxYrVq1Up2u93h468vxbyuUaNGmjlzpjp16qTg4GA9+uijmjt37k3raNu2rU6dOmUtUVuyZImysrKs1+5Gb7/9tgYOHGgFVVL+OdqwYYN27NihNWvW6P/9v//nVNt18fHxWrFihSTpq6++UpcuXbRmzRpdunRJ27dvV0xMjMO6AwICtHnzZi1fvlzPPPOMcnJydO3aNQ0dOlRvvPGGdu3apYcffrjAssThw4frgQce0I4dO7Ro0SKNHj1ahw8fLlJDTExMgdvx8fHWGDExMVq5cuVNzysAANUJwRQAAC60ceNGtW/f3gqjfHx8CuzzM2zYMEn5QUCrVq106NAhq23EiBHy9/cvdw3Lli1Tly5dyjVGTk6O1q5dq0WLFmnLli06c+aMXnnllZu2/eIXv9CRI0eUmpqq2bNn6/nnn9emTZskSRMmTFCLFi3UvXt33XPPPerWrZsVUowaNUr9+/dXnz591KdPH7Vr185qy8nJ0apVq/Tyyy8rJSVF/fr104MPPihJuv3223Xs2DFt2bJFK1as0Lp16/Tmm2+W67k7OhfFHb+sbYcOHdLixYu1f/9+ZWRk6LnnntPQoUOdqufw4cPaunWrNmzYoJMnT+r3v/99kT6zZ88udraUJGVkZBSYdXbu3Dm9/fbb2rx5sw4fPqzZs2dr8ODBBfb5cqRevXr69NNPNXHiRHXu3FlffvmlwsLCCoRPkvTJJ59o4cKFmjVrVoH7H3nkEfn7+6t27doaMWKEFejcrO26+Ph4rVy5Urm5udq9e7eef/55633QrVu3Yv89DR8+XJIUGhoqPz8/HT9+XHv37pWfn58VJN19991q1aqVJCkrK0vbtm2zzmnbtm3Vs2dPrVu3zupz8OBBrVixQq+//rpWrVqlCxcuaPfu3erWrZt13CZNmigjI6PEcwoAQHVDMAUAgBvVrFnT+ruvr2+BpUKBgYFOjdGiRQsdPnzYup2VlaVz586padOmLquzRYsWuueee9SgQQP5+/vr4YcftgKmktoaNWqk2rVrS5I6dOiggQMH6uuvv5Yk1apVS2+//bZSU1O1Zs0aNWrUyFoCZrPZlJiYqJSUFG3YsEFhYWFWW4sWLdSxY0fr9siRI7Vt2zZlZ2crICDAWvLWsGFDPfroo9Zm4M8884y1EfjOnTvLdS6KO35Z25KSkhQZGWm9ZgkJCfr6669vGgRdr0eSbrnlFv3mN7+xnu91hw4d0qZNm6wQ1JHatWsXWPr51VdfqX79+tbyzHvvvVfnz58v8D4rTlxcnNasWaOtW7fqzTffVGZmpsLCwqz2BQsW6A9/+IO++uqrmy7BtNlspWpr0aKFAgICNG/ePHXu3Fl33XWXVq9erRUrVuiuu+4qdqyS/h2WpZ74+HgtX75c+/btU58+fWSMUVJSkmJiYgqEdFeuXFGtWrWKHRMAgOqIYAoAABfq0aOH9u3bZ4UFeXl5On36tEuP0blzZ2VnZ2v16tWSpPfff1/33ntvgS/b5TVs2DCtXr1aV69elSQtX75c0dHRN227cWniiRMntGrVKnXs2FGSdP78eV26dElSfnjy3nvv6Xe/+52k/C/sZ86ckST9+OOPmjp1ql588UVJ0oABA5SRkWGNvWzZMnXo0EH+/v7673//q+zsbEnS1atXtXjxYut477zzjlJTU5WamqrIyMgyn4uSjl/WtlatWunrr7/WhQsXJElLly5Vu3btVKNGjRJrOXPmjHUO8/LytGDBAuv5XvfBBx9o8ODBJe77FRUVpbS0NOt2q1atlJqaquPHj0vKn/mXk5Oj5s2b3/T8/PDDD9bfX331VfXt21dt2rSRJC1cuFAvvfSSVqxYYQVqN/rkk0+UnZ2ty5cva/78+QWWvZXUdqP4+HhNnjxZ8fHxVli6aNGiYvsXJzQ0VDk5Oda/qxUrVlhXeKxTp446depk7Ue1f/9+rV+/Xr1797ZqmD59ujU7qm/fvnrllVeK1LBnzx7r3woAAPiJZ/deBwDA+2zcuNH06NHDREZGmujoaLNkyRJjTNGr8t14xS9HVygbOHCgadasmZFkmjZtavr06WO1bdiwwURGRpq2bduaPn36mCNHjlhtAwYMMFu2bDHGGLN3717TrFkz06BBA1OzZk3TrFkz89e//vWmbcYYM23aNNOhQwcTERFhhg4dWuBKcMW1TZw40YSFhZno6GgTGRlZYLyUlBTTtm1b06FDBxMZGWmSkpKstuPHj5vQ0FATFhZmQkNDzXvvvVfgXPznP/8x0dHRJioqyvTq1cvs2LHDGGNMUlKSCQ8PN1FRUSYsLMw89dRT5sqVK8W+NpGRkaZJkybGZrOZZs2amREjRjjVVtzxy9qWl5dnJkyYYNq3b2+ioqJMTEyM+fbbb63HFffaX3/drz/fESNGmFOnTlmPy83NNUFBQdYVG4tz7do1ExwcXOCxf/nLX0yHDh1MVFSU6dSpk/nyyy+ttjFjxphmzZoZX19fc9ttt5nWrVtbbY899php3769ad26tRkxYkSB94mfn58JCgoy0dHR1p8ff/zRGJP/nn/66adN165dTevWrc3zzz9v8vLybtpWWFJSkpFkDh8+bIzJfw82atTI6l/4yo0qdFXDW2+91Rw6dMgYY8zXX39toqOjTUREhElISDDR0dHWv9F9+/aZu+66y0RFRZno6OgC/15//PFHY7PZzIcffmiMyX/dJRV4L1y4cME0a9bMuhIjAADIZzPGwSVOAAAA4NWmT58uSQ73qPK02NhYjRs3Tvfff7+nS3GZmTNnKiMjo8DVOAEAAEv5AAAAqqVnn33W6X3NUH4+Pj6aOHGip8sAAKDSYcYUAAAAAAAAPIIZUwAAAAAAAPAIgikAAAAAAAB4BMEUAAAAAAAAPIJgCgAAAAAAAB5BMAUAAAAAAACPIJgCAAAAAACARxBMAQAAAAAAwCMIpgAAAAAAAOARBFMAAAAAAADwiP8PsQKoHF2SL+kAAAAASUVORK5CYII=", "text/plain": [ "
" ] @@ -309,7 +341,7 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAABKYAAAC+CAYAAAAP1AcDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAzbUlEQVR4nO3dd3hU1b7G8XdSCCVUQTkQitQQUgYCoUgJIUo7gh4LSlEQBPWIInoUeCwRvYrtYOEqeg8CopwLCEcRQaUFkCIoCUWKIAQIEURCSagp6/4R2ZdJJmESkplk8v08jw/MXnuv/Zs9gzN5s9baNmOMEQAAAAAAAOBmPp4uAAAAAAAAAOUTwRQAAAAAAAA8gmAKAAAAAAAAHkEwBQAAAAAAAI8gmAIAAAAAAIBHEEwBAAAAAADAIwimAAAAAAAA4BEEUwAAAAAAAPAIgikAAAAAAAB4BMEUAADlkN1ul91uV0hIiHx9fa3HAwcOvOa+4+PjZbfb822fOnWqJk+eXGAfU6dO1SuvvOLyOdPT0zV27Fg1a9ZMYWFhioiI0JAhQ3TgwAH17dvXen42m01hYWGy2+3q2rWrJDlsu/zfiRMnXD63JGVmZurFF19UcHCwQkNDZbfbNWrUKJ06dapQ/ZR2d911lzZs2CBJiouLk81m09q1a632qVOnatiwYW6r59ixY4qKilJmZqbbzgkAAIqXn6cLAAAA7peYmChJSkpKkt1utx5fKTMzU35+xftV4fz58/rnP/+p7du3F7jfqFGj1KpVK/39739X9erVC9zXGKO+ffuqVatW2r59uypVqqTs7Gx9/vnn+vXXX7VkyRJr38tBSo0aNRz6cLatMEaMGKHU1FRt2LBBNWvWlDFGn3/+uVJTU6+pX1eUxOvkzKZNm5SamqpOnTpZ2xo3bqxnnnlG69evv6a+s7Ky5OvrW+jjbrjhBnXu3FmffPKJHnjggWuqAQAAeAYjpgAAgOVy0BAVFaX7779fR48eVY8ePRQZGanWrVvr0UcfVXZ2trX/a6+9Zo1Q6tixo86dO+fQ35kzZ3TLLbdo0qRJkqTPP/9cN910k6pUqSJJ2rhxoyIjI2W32xUaGqoPPvhAklShQgXdcsstmjNnzlVrXrFihZKSkjR16lRVqlRJkuTj46O7775bsbGxRb4Wa9asUdOmTZWamipJevTRR/Xggw/m2W/fvn2aP3++ZsyYoZo1a0rKCcDuuusuNWnSRJL0xhtvqHXr1goLC9PgwYN1+vRpnTt3Ttddd52OHj1q9RUXF6cnnnhCkrR3717169dP7du3V3h4uKZOnWrtZ7PZ9MILL6h9+/aaMGGCtm/fri5duqht27YKCQnRyy+/bO2blpamgQMHKjg4WF27dtXo0aMdRjW9+eabioqKUtu2bdW7d28dPHjQ6fX48MMPNWjQIIdt/fv3V0ZGhv7zn/84PWb27Nnq0KGD2rZtq27dumnr1q2SpJkzZ6pHjx664447FBYWpo0bNyosLMzh2OjoaH355ZeSpG+//VZdunRRZGSkoqKitGrVKmu/e++9Vx9++KHT8wMAgNKPEVMAAMDBiRMn9MMPP8hms+nChQv66quvFBgYqKysLA0YMEDz5s3TPffco1mzZmnBggX6/vvvVb16dZ08eVIBAQFWP4cPH9Ztt92mxx9/XPfdd5+knGl+HTp0sPZ59dVX9dRTT+nee++VJJ08edJq69SpkxYtWqSHH35YUs70wyVLlqhevXoO9W7ZskVt2rSRv79/kZ9z165drRE7NWvW1KpVq9StWzeNHDlSw4YN05AhQ/T9999r48aNeY7dsmWLmjdvrtq1azvte+nSpfr444+1YcMG1ahRQ6NGjdL48eP1wQcf6I477tCnn36qp556SsYYzZo1S4sWLVJWVpbuvfdeffrppwoODta5c+fUsWNHdejQQe3bt5ck+fr6avPmzZJywqcVK1YoICBA58+fV+fOnRUbG6uOHTtq0qRJqlSpknbt2qX09HR17txZkZGRkqQ5c+Zoz5492rBhg3x9fTV79mw98sgj+vrrr/M8j/j4eCs0u8xms2ny5Ml69NFH1b9/f4e2devW6d///rfWrFmjgIAArV27VoMGDdLPP/8sSfrhhx+UkJCgli1bSpIuXryoH3/8Ue3atdP+/fu1Z88e9evXT/v371dcXJy+/fZbVatWTfv27VPXrl2VlJSkgIAARUZGatu2bTpz5oyqVavm8msOAABKB4IpAADgYNiwYbLZbJKk7OxsPfPMM/r+++9ljNHvv/+u0NBQ3XPPPVq8eLEeeugha6rd5dFCUs7aP926ddO//vUv9ezZ09qenJys3r17W4979Oihl156SXv37lVMTIy6dOlitdWtW1fJycnWY2fTDZ1Zu3atxowZo/T0dA0aNMgarXW1Y5xNuRs/frz69OmjUaNGadOmTapYsaJLNVxp+fLlGjhwoNX/ww8/rLvuukuSNHz4cI0cOVJPPfWU4uPjdd111yksLEw7d+7Uzz//rHvuucfqJy0tTTt37rSCqSunrp0/f16PPPKIEhMT5ePjo8OHDysxMVEdO3bUihUrNGXKFNlsNlWtWlUDBw7Uvn37JElffPGFNm/ebAVVWVlZ+T6P5ORk3XDDDXm29+zZUw0aNNDHH3/ssP3LL7/U1q1bHYLI1NRUnT9/XpLUuXNnK5S6fC1mzJihdu3aadasWRo8eLD8/Pz0zTffaN++ferWrZu1r4+Pjw4dOqTmzZvLz89PNWvWVEpKCsEUAABlEMEUAABwEBgYaP39n//8p37//Xf98MMPqlixosaNG6cLFy5ctY8aNWqoWbNmWrx4sWJiYqygq3Llyg7Hjx07VgMGDNDy5cs1ceJEhYaG6v3335ckXbhwwZqaV5A2bdpo6tSpysjIkL+/v7p27arExETFxcVd8+LjaWlp2r9/v6pUqaLjx4+rRYsWefZp27at9u7dqxMnTui66667ap+Xr4WUMyosOztbmzZt0syZMzV8+HBJOetm1apVq8Aw7srXaeLEiapdu7YSEhLk5+env/3tb/m+Tlee3xijCRMmaNSoUVetO/drd6XJkydrwIABGjNmjEPf999/f76L2F9ZvyTdf//9ioiI0JtvvqlPPvlEixcvtvq5+eabC5zW6ep7BQAAlD6sMQUAAPJ18uRJ1a1bVxUrVtTRo0c1f/58q61///6aNm2aTp8+LUk6deqUNeImICBACxcuVEpKih588EFrXarw8HDt2bPH6mPPnj268cYb9eCDD2rixIkOU+V27dqliIiIq9YYGxurBg0a6PHHH7dG40jS2bNnr+3JK2dR88GDB2vevHkaOnSo07v1NWvWTHfccYdGjBhhBWHGGC1YsED79+9XbGys5s2bpzNnzkjKWavplltusY4fPny43nvvPX399dfWGk4tW7ZUtWrVNGPGDGu/ffv2Wetd5Xby5EkFBQXJz89Pe/bs0bJly6y2mJgYzZo1S8YYpaena968eVbbbbfdpmnTpln9ZmRkKCEhwek5cr92V2rbtq26dOlirREm5bw/Pv30Ux06dEhSzui7H3/80enxklSvXj21b99eTzzxhK6//nq1bt1aktSrVy8tX75c27Zts/bdtGmT9fdjx47JZrOpQYMG+fYNAABKL0ZMAQCAfD3++OO688471bp1a9WrV89hMfGhQ4cqJSVFnTt3lp+fn6pUqaLly5db7f7+/pozZ45GjhypwYMHa/bs2brzzjv1wAMPWItzT506VStXrlSFChXk6+urt956yzr+m2++0UsvvWQ9zm+NKZvNpqVLl+rZZ59VaGioqlSpoqpVq6pJkyaaMGGCS8/zyjWmJGnu3LlatmyZUlNT9dxzz8nHx0cPP/yw7rvvPi1evNhh1JEkffzxx3r55ZfVoUMH+fn5KTs7W926dVPPnj3Vp08f7dixQ506dZKPj4/Cw8OtUWGXr2PDhg11xx13WNMh/fz8tHjxYo0dO1ZTpkxRVlaWateune+ooWeffVZDhw7VrFmz1LRpU8XExFhtzz//vEaMGKFWrVqpdu3aioiIsKYVDh48WCdOnFCPHj0k5dzh74EHHlCbNm3ynOPOO+/Ut99+m++C8v/1X/+l4OBgh2v6+uuv6/bbb1dmZqYuXbqkfv36qV27dvm+DsOHD9fdd9/tEHA1a9ZMc+bM0ejRo3Xu3DldunRJbdq0sa7FN998o9tvv10+Pvy+FQCAsshmjDGeLgIAAJQf/fr1U1xcnLVWkjM7d+7U6NGjtXbtWjdW5p0yMjKUlZWlihUr6uzZs+rVq5fGjBmjgQMHFqqfywunb9iwwbqrYmnQtWtXffTRR2rVqpWnSwEAAEXAr5YAAIBbvfvuuzp27FiB+xw+fFgffvihmyrybidPntRNN90ku92uyMhI3XTTTbr77rsL3U9gYKCmTJmiAwcOlECVRXPs2DE9/PDDhFIAAJRhjJgCAAAAAACARzBiCgAAAAAAAB5BMAUAAAAAAACPIJgCAAAAAACARxBMAQAAAAAAwCP83HGS7OxspaSkqGrVqrLZbO44JQAAAAAAADzAGKO0tDTVq1dPPj4Fj4lySzCVkpKiBg0auONUAAAAAAAAKAUOHz6soKCgAvdxSzBVtWpVq6Bq1aq545QAAAAeM1VSmqSqkh69hn7ukXRGUjVJ/+vmc3sLd14Prr3nePrae/r8AFDanDlzRg0aNLDyoIK4JZi6PH2vWrVqBFMAAMDrVZR06c8/r+Wbj79yvqz5F6Kf4jq3t3Dn9eDae46nr72nzw8ApZUryzmx+DkAAAAAAAA8gmAKAAAAAAAAHuGWqXwAAAAAAADeLisrSxkZGZ4uw238/f3l6+t7TX0QTAEAAAAAAFyj9PR0JScnyxjj6VLcxmazKSgoSIGBgUXug2AKAAAAgHeLi3NtGwAUUVZWlpKTk1W5cmXVqVPHpUW/r5SUlKRbb71VX331lRo3blwyRRYzY4yOHz+u5ORkNW/evMgjpwimAAAAAAAArkFGRoaMMapTp44qVapU6OMXLFigffv2aeHChZo4cWIJVFgy6tSpo6SkJGVkZBBMAQAAAIDLco+YYgQVgGJQ2JFSl/3v//6v9WdZCqaK+nyvxF35AAAAAAAAPGT//v3asWOHJGn79u3av39/sfW9cOFCRUZGym63Kzg4WDExMcrOzlZ0dLSuu+46nT592tr3zjvv1MyZMyVJM2fOVPXq1WW329W6dWv16dNHhw4dKra6rkQwBQAAAAAA4CELFiyQj09OPOPj46OFCxcWS7+//fabRo0apYULFyoxMVG7d+/Wm2++aY1yqlatmiZPnpzv8T169FBiYqJ+/vlntWjRQk888USx1JUbU/kAAAAAgAXSAZSwH3/8Uf/617/ybF+6dKl1Jz9jjN577z3t27cvz34jR45Uu3btXD7fsWPH5Ovrq1q1alnb2rZta/39mWee0fPPP68xY8aoXr16BfbVq1cvPf300y6fuzAIpgAAAAAAAErYr7/+qo8++kjGGPn6+lqjpIwxDsFUSkqKPv74Y0lSdna2srKyZLPZ1KNHj0IFU+Hh4erSpYsaNWqk7t27q3Pnzho0aJDq168vSapbt65Gjx6tF154Qf/zP/+Tbz9ZWVmaP3++IiMji/rUC8RUPgAAAAAAgBI2cOBArVixQtdff72knDv5ZWRkKDMz02G/zMxMq02Srr/+eq1cuVIDBw4s1Pl8fHy0YMECrV+/Xr1799a6devUunVrh9FY//jHP7R48WLt3r07z/GrVq2S3W5XZGSkbDab3nrrrcI+ZZcwYgoAAAAAAMANevTooZ9//lnDhw/X4sWLr7p/nz59NGPGDNWuXbvI5wwODlZwcLBGjx6t3r17a9GiRVZbtWrV9Mwzz2jChAny9fXNU+sXX3xR5PO6ihFTAAAAAAAAblK7dm0tWrRIr7zySoH7vfLKK1q0aFGRQ6kjR45o3bp11uOTJ0/qwIEDatq0qcN+Dz/8sBITE/XTTz8V6TzXihFTAAAAAOBM7sXPWQwdQDGx2WyqWbOmbDabtb5U7vZatWpZd9AriszMTE2aNEkHDhxQ5cqVlZmZqfvvv18DBgzQlClTrP0CAgI0adIk3XfffUU+17UgmAIAAAAAAHCzefPmWcGUsz/nz5+v0aNHF7n/Ro0a6dtvv3XaFh8f7/B46NChGjp0qPV42LBhGjZsWJHPXRhM5QMAAAAAAHCjEydOaPXq1crOzpafn58CAwP13HPPKTAwUH5+fsrOzlZ8fLxSU1M9XWqJI5gCAAAAAABwo0WLFik7O1uS1L59e+3YsUOTJk3S9u3b1a5dO0lSVlaWw0Ll3oqpfAAAAAC8C2tBASjlli5dKh8fHz333HN69tln5eeXE880atRIa9eu1UsvvaSXX35ZS5cudduUOk8hmAIAAAAAAHCjsWPH6sknn1SHDh3ytPn5+enFF19U3759lZWV5YHq3ItgCgAAAAAAwI06d+581X2chVbeiGAKAAAAAFzhbIog0wYB4Jqw+DkAAAAAAAA8ghFTAAAAALxafFJ8nm3RjaPdXgeA8icuPq5k+o12rd/GjRsrICBAlSpV0vnz5zV8+HCNHz++wGM2btyoBx98UH5+fpo8ebJ69epVDBXnj2AKAAAAAADAS82dO1d2u11HjhxRSEiIYmJiFBUVle/+s2bN0qBBgzRhwgS31MdUPgAAAAAAAC9Xv359BQcH6+DBgzp69KjuvvtuRUVFKSwsTM8++6wkafLkyZo7d66mTp0qu92uU6dOlXhdjJgCAAAAUO7knt7H1D4A3m737t06ceKEoqOjNWTIEE2cOFHdu3dXZmam/vrXv2r+/PkaP368du/eLbvdrrFjx7qlLoIpAAAAAAAALzVw4ED5+Phoz549mjJliipXrqwVK1bo2LFj1j7p6enas2ePR+ojmAIAAAAAAPBSl9eYWr58uW699VbFxMRIylnkvGLFih6ujmAKAAAAALhzHwCvFxsbq4cffljPPvusevToocmTJysuLk6SlJKSouzsbAUFBbm9LoIpAAAAAACAcuC5555Ts2bNtGTJEr333nsKDQ2VzWZTlSpV9OGHHxJMAQAAAAAAeIu46DiPnj8pKcnhcc2aNXXixAlJUocOHZweM3PmzBKuyhHBFAAAAAA4wZ37AKDkEUwBAAD8ub7CVbcBKBOcrRcFACidfDxdAAAAAAAAAMonRkwBAAAABck9eo7RdAAAFBuCKQAAAABwgbMpgtFurwIAvAtT+QAAAAAAAOARBFMAAAAAAABeKi0tTYGBgRoxYoS1bebMmbrtttskSUlJSZo2bZqHqmMqHwAAAAAAQMkoqXUJC9Hv3LlzFRkZqYULF+qdd95RYGCgQ/vlYOqhhx4q5iJdw4gpAAAAAAAALzV9+nQ988wz6tatm+bOnZun/aGHHtKePXtkt9vVv39/t9fHiCkAAABnXLkTG3drAwAApdjOnTt1+PBh9erVS5mZmZo8ebLDlD5JmjZtmsaOHavExESP1EgwBQAA4ApCJ+/E6woA8GLTp0/XfffdJ19fX/Xt21ejR4/Wrl27PF2WA4IpAAAAAAAAL5ORkaHZs2fL399fc+bMkSSdO3dO06dPV2hoqIer+3+sMQUAAAAAAOBlFi1apCZNmujIkSNKSkpSUlKSNm7cqNmzZysjI8Par1q1ajp9+rTH6iSYAgAAAAAA8DLTp0/X4MGDHba1atVK9evXV1pamrUtPDxcrVu3VmhoKIufAwAAAAAAeA0PrmW4ZMkSp9u3bNkiSRo3bpwkyc/PT4sXL3ZbXbkRTAEAAJQF3AEQAAB4IYIpAAAAoADxSfEOj6M9UgUAAN6JYAoAAJQ/jDbCtXD2/uE9BQBAkbD4OQAAAAAAADyCYAoAAAAAAKAYGGM8XYJbFcfzdXkq37vvvltg+2OPPXbNxQAAAAAAAJQ1/v7+stlsOn78uOrUqSObzebpkkqcMUbHjx+XzWaTv79/kftxOZhKSEjIt608XHAAAAAAAABnfH19FRQUpOTkZCUlJXm6HLex2WwKCgqSr69vkftwOZiaMWNGkU8CAAAAeBwLlAMASlBgYKCaN2+ujIwMT5fiNv7+/tcUSklFvCtfSkqKduzYoQsXLljb+vfvf02FAAAAAAAAlGW+vr7XHNSUN4UOpj7++GNNmjRJqampat68ubZu3aqOHTsSTAEAgNLJnaNk/jxXJ0kXJQW4+/wotPikeE+XAABAuVboYGrKlClKSEhQTEyMfvrpJ61Zs0YzZ84sgdIAAAAA4Cri4hzDYABAmeJT2AMqVKigmjVrKjMzU5LUrVs3JSYmFnddAAAAAAAA8HKFHjEVEBAgY4xatGiht99+W40aNVJ6enpJ1AYAAID8OJsiyLRBAABQxhQ6mHr55Zd15swZvf7663rooYd06tQpvf/++yVRGwAAAFDqOFuXKtrtVQAA4B0KHUzFxMRIkqpXr65ly5YVe0EAAAAAAAAoHwodTGVmZmrBggX69ddfrXWmJOn5558v1sIAAAAAAADg3QodTN1zzz06evSooqKi5OvrWxI1AQAAIJfc08eiG0d7pA6gtIlPitdhSVmSfJVzdz4AQNlR6GBq+/bt2r17t2w2W0nUAwAAAAAAgHKi0MFUgwYNdOnSJQUEBJREPQAAAECJcLZoOQAA8KxCB1PNmjVTdHS0br/9dlWsWNHa/thjjxVrYQAAAO7i9C5rTJUDAAAocYUOpi5evKjg4GDt2rXL2sa0PgAAAAAAABRWoYOpGTNmlEQdAAAAQNkVF6dOyll4O+CKbQAAoGCFDqY++eSTPNtq1KihyMhI1a9fv1iKAgAAAAAAgPcrdDD12Wefac2aNerSpYtsNpu+//57RUVF6ZdfftHbb7+tu+++uyTqBAAA8ChXFs5mXSoAAIDC8SnsAYGBgUpISNCyZcv03XffKSEhQbVq1dL69ev18ssvl0SNAAAAAAAA8EKFHjH1yy+/KDg42HrcsmVL7du3T40bN5aPT6FzLgAAAAAou3KvL8baYgBQKIVOkqpWrapPPvlExhgZY/TJJ58oMDCwJGoDAAAAAACAFyvSXfmGDh2qkSNHymazKSIiQrNmzdLZs2f1xhtvlESNAAAAbufKmlL5HXNYUpYkX0mdirEmAAAAb1PoYKply5batGmT0tLSJOWMoLrs5ptvLr7KAAAAkC9nwVm026sAAAC4Ni4HU3v37lXz5s21bds2p+3h4eHFVhQAAAAAAAC8n8vB1NixY/X1119rwIAB1jabzSZjjGw2m/bv318iBQIAAAClXXxSvMMUzotiBBsAAK5wOZhavHixJOnAgQOSpF9//VWLFi1Ss2bNdOutt5ZMdQAAACWgKOtHudWfd/W6R1fc6QsAAMALuXxXvptvvlmJiYmSpJSUFLVv317fffednn76ab322mslVR8AAAAAlFrxSfHalxSvpD//BAAUjsvB1JEjR2S32yVJc+bMUffu3bV06VKtX79en332WUnVBwAAAAAAAC/lcjBVqVIl6+/r169X3759JUk1a9aUn1+hb+4HAAAAAACAcs7lYMrHx0fJyclKT0/X6tWr1b17d6vt3LlzJVIcAAAAAAAAvJfLQ50mTpyoNm3ayM/PTz169FCLFi0k5Yyeaty4cUnVBwAAcE1K/ULnAAAA5ZjLwdTf/vY3de7cWceOHVN4eLi1vXHjxvroo49KpDgAAAAAAAB4r0ItDlW3bl3VrVvXYVu9evWKtSAAAAAA5VNcfJzj4+g4p/sBALyHy2tMAQAAAAAAAMWJ2+kBAACUMpfXxfpDUpYkX0l/8WA9QEnIPToKAFA+EUwBAAB4i7g4dZJ0UVLAn48Bb5M70Ir2SBUAgOJCMAUAALwLYQxQZkXPjHd4HKc4j9QBAHAfgikAKM+c/QBf1B/qcx9HOAAAAADgKgimAACOXAmUCJ0AwGOcrc2U++51ruxTVuUeVQUAKNsIpgAAAAC4HQETAEAimAIAAADKPG+9wx3hFQB4P4IpAChPimsKHlP5AKBUcxroRLu7CgAAro5gCgAAAChDnIVO8cOi3V4HAADFgWAKAADAS8QnxeuwpCxJvpI6ebgeAACAqyGYAgAAZRfTSgFJrMUEACi7CKYAAMUiPine4XG0R6oAAO+Te2HzaDeeKy46zul+KICzwJwQHQDyRTAFAHCQO2CSpOjG0W6vAwAAAID3I5gCgHLMWQjlyn4EVQBQMnKPWAIAwNsRTAEAAAClGOtHlS1ORx7nnsrH1D4AsBBMAQAKzaWRVqyxAQCF5ukQKs/5oz1RBQCgPCGYAoCywJVAx4V9XJ26BwDeIH5YtOpIqiXJ19PFlHPOpihGu70KAEBpRDAFAN6C0UiAJAJYlC3uvOMePIc71wJA/gimAKC0IWACAK/kdNQQ60eVT0x3BwALwRQAACg7+MENZZinQ6jcwVhcdJzT/UqCp587AKD0IpgCAAAAyiHWffIcp3fuy72BUVUAygmCKQDwNL5kAoBXKm3rR+UetRQ/LNojdSAffB8AUE4RTAFAGeDKYs7RjaNLvI7CcOm3wQDgxZi+hsJw6bM+d3hFmAXACxBMAYA7ufAFsqh3FMtzx59SFlQBJYE78AFFR3BW9uT5rHd1uh+BFoBSjGAKALwUP7ADpYMnF5wGAAAo7QimAMDDylWAxG9sURi8P8oFZwtwE94BrnE6bd7J/zvzjLQqkWpQrvCdDsWIYAoA3KhchVAliS9DQJnlLIgqqX6jS+RMQOlWot81+PyFO/A+K3cIpgAAZR+31EYZwgihq3Nl+mNJBVxAucDnJi4r4nuhyOuduYARfuUPwRQAwHP4EgyUe84W4I4fFu3wmBAK8ACmBJYLxXUX5SL3w3dBiGAKAAAARcDIL6DscmVtKpYfKINcCXmKax9XlODdqOFdCKYAoCTxWyCgWPEF1nNcGbXkyj7ORkhdbZ/cI6iK2i+A/PH/17LPldfQ6ZS7IvTjCt5TcBXBFADAbfJMAWgc7dF+AHc5+dtJffr0pxry+hDV/EvNPO25A51o95RVZhA6AWUY61mVKoRFKI0IpgCgBPHh7x7FtT4CUFK2r9iu1ORU7VixQ12HdPV0OcXClbWhAHi5Io6+yTNqh6Dq6srzNXLn9EN4BMEUAMBjCO5y4bfKXil6Zrw++XKzJClpwQ96LjMrzz5lIdApyhQ8AN6tqJ/jJXlHt6v2UxY+V11YeL48KfIUxbLwWpc2Lqw1VxKf9QRTAACgVPKWL+Epaed14NQ5SdL+U2eVknZe9apW8nBVAFB6lOTI5zJxJ0EWnr9mjJ4vgiIGoLl/mVYcQRXBFAAApVlZ/E0vHKw5+Idskowkm6S1B//QwNAGHq4KAEq5onz+lYHPyLIwQtZrlMXvUEUMKfOst+osdCqh956zfgsbVhFMAQBQlnh6qHpZ/JLnJnv+SNPXe3+TJKXr/4Oon4+kOuy3cPcRHUk773jwhl/Ur/lf1LJ2VbfUejXO7q4X7fYqAJRnRRnpVJTRHpJrP9QXFUGU5+R5PxRDgFKcivO94cpzLc0IpgAA8BRX5vG7csfB4vqtMqHTNTmSdl6Lf/ktJ5CyST6yWW3mij9PnLukJXuP/vnYKNvkBFj2ujVKTTAFAGVCMX1OFfWHeu4KXPblmZZW1NfUhe9QTNHMH8EUAAClmEsLfhbXb3qLcpynR3CVIjE3Xq+aFf310ppdOn0xQ1nGON3vyu0+NqlmRX893z1E9ro13FTp1bGIOYBSpxT+oO/p86P4Ffk1LWMjlEpa/LBonb2U6fL+BFMAAJRxeaY7uPDbPldGZxUpFJO8dl0QV7T5S03NGNBecet2KzE59ar7d6hfS8/cFKzqFf1LpB5nU/Liop1sy7VfdIlUAwBFRwgEeC+CKQAoLl7ygzWKoIhT4ErqS3Zx3b67uI4pb7dwrl7RX0/EhGrRjsNasOVAvvuNbHujBoU2kM1my3efa+V05FO0i/sBAAC4AcEUABQTfpMHOOd0dJaX3xrbZrOpSoWCv2ZVreBXoqFUfljYHAAAlCYEUwAAXKOi3DmovPO2IMqZTUnHZbNJ5s/FzS/fpc8oZ22p1UnH1b9lPbfXxegoAABQmvh4ugAAAABvk3YhQ3uOnpIxkq9Nqujnq6HhDVXRz1e+NinbSInHTunMxQxPlwoAAOBRjJgCgHK2/g2Akpdw+IQu33svuHY1PdetlW4IrKi+zf+il1bv1M4/0pRtpPWHT6h3s7oerRUAAMCTCKYAeBcXAiWXphDluuWrs6kv8dwWFoVB2FmubD1yQjZJAyIa6dHwRvL1yVlLqm5gRb3bp40+2XZQs7ce1A/JqQRTAACgXCOYAuBxZSHgKQs1ogiKeDe9Qver8rGmEv5f75Ag9WrdQC3qVJNvrjZfH5uG2xurQ/1ayjbG6fFAWcfnJgDAVW4JpsyfX7rOnDnjjtMBWvtQX0+XUCy6TlviuZO/+mqeTWsPrvVAIUDRuPKZs/aX5Q6Pu54ZV6Rznb2U6fD461z9ovwJqlFFWZLOX8rU2Xz2aVS9sqS8758rnZeUrZxFQfPrx9kxWZJ8C3GMN3Pn9fDGa1/U7+8Fva9LgqevvafPDwClzbmMnM8B48Iv4WzGlb2uUXJysho0aFDSpwEAAAAAAEApcfjwYQUFBRW4j1uCqezsbKWkpKhq1aqy2WwlfToAAAAAAAB4iDFGaWlpqlevnnx8fArc1y3BFAAAAAAAAJBbwbEVAAAAAAAAUEIIpgAAAAAAAOARBFMAAAAAAADwCIIpAADcpHHjxkpMTHR5/zvvvFP16tWTzWbTqVOnHNp++OEHRUREqEWLFoqJidGRI0ec9rF582Z17txZlStX1m233eZymyStXr1a7du3V+vWrRUSEqINGzZctW3mzJmqXr267Ha77Ha7evToYR1z9uxZDR8+XGFhYQoODtb48eOtWwhnZ2frqaeeUmhoqIKDgzVixAhdunTJOvbQoUO69dZb1bJlS4WEhOi9996TJCUlJcnX19c6n91u16+//ur0WqSnp6tXr16qXbu2atSo4XJbQee/lrbXXntNISEhstvt6tixozZt2mS1FfTa22w2hYWFWc937dq1kqTt27c7XIfGjRurVq1aTq+FJG3dulX9+vWzHs+aNcvqt02bNlqyZInV9thjj6lx48ay2Wx53sPffPON2rVrp/DwcHXs2FFbt2612oYPH64WLVooIiJCN910kzZv3my1RUdH64svvsi3vuKSkpKirl27FunYdu3aKT4+vljquOuuu7R+/fpi6QsAAK9iAACAWzRq1MgkJCRcdb+MjAxjjDHLli0zx44dM5LMyZMnrfasrCzTtGlTs3LlSmOMMW+88Ya58847nfZ1+PBh88MPP5hp06aZAQMGuNx25MgR06hRI7Nz505jjDEXLlywaiiobcaMGXn6umzixIlm6NChJjs721y6dMn07t3bzJs3zxhjzEcffWR69OhhLl68aLKzs83IkSPN66+/bowxJjs727Rt29ba1xhjjh49aowx5sCBA6Z69erOL2QuFy5cMCtWrDAJCQl5jimoraDzF7UtISHBNGzY0KSlpRljjJk9e7Zp3769tV9+r70xxuk2Z/7+97+bRx99NN/2Pn36mDVr1hhjjDlx4oSpWrWq+e2334wxxqxdu9bUqVPH2nf16tXm8OHDed7DqampplatWmbHjh3GGGPWrFljWrdubbV/+eWX1vv5q6++Mo0aNbLaunfvbv7zn/9c9Xl4UmRkpFm1alWx9JWQkGC6du1aLH0BAOBNGDEFAEAx27Bhg7p06aKIiAiFh4fryy+/tNoWLlyoTp066cYbb9TLL79sbY+OjtZjjz2mTp066ZZbbpEkxcbG6vrrr8/T/08//SQ/Pz9rNNLo0aP11Vdf6cKFC3n2DQoKUlRUlAICAgrV9v7772vQoEFq1aqVJCkgIMAaSVRQW0G2bt2q3r17y2azyd/fXzfffLNmz55ttcXGxqpChQqy2Wzq06eP1bZixQoFBATorrvusvq64YYbrnq+3AICAhQTE+O01oLaCjp/UdtsNpsyMjJ09uxZSdKpU6cUFBRk7Zffa++qCxcu6LPPPtOIESOcth86dEg///yzNZIoOzvbuq2zs3q6devm8PiyX3/9Vdddd51at24tSeratasOHTqkLVu2SJL69+8vPz8/SVLHjh115MgRZWZmWsevWLFC7du3V7NmzfTkk09aI+iio6M1ZswYp21XGjRokObMmSMp531ZoUIF65rGxMRozZo1SkpKcnhdbTabXnnlFUVFRenGG2/UjBkzrLb169fLbrcrNDRUw4cPd6h13759io2NVXh4uOx2uzXa66OPPtKoUaMkSTt37pTNZtN3330nSZo0aZImTZokSbLb7Tp+/Lh27drl9DUBAKC8IpgCAKAYpaam6rbbbtOrr76qrVu3KjEx0WEa0alTp7RhwwZt3rxZb7zxhsMUvF9++UVr1qzRypUrCzzHoUOH1KhRI+tx1apVVa1aNaWkpEiS+vbtqx9//PGansfOnTt1/vx5xcbGym63a8yYMdYP/AW1SdL3338vu92uzp07a/78+db2yMhIzZ8/XxcvXlR6erq++OILJSUlWW2LFi3SmTNnlJGRoXnz5lltO3fuVJ06dXTPPfeoTZs2uv3227V//36r37Nnz6p9+/Zq27atJk2apKysrGt67s6uRX7nL2pbRESEnnjiCd14440KCgrSlClTHKb5XU3Pnj0VERGhcePGOVz7yxYuXKgmTZrIbrc7Pf7yVMzLateurWnTpqlt27Zq1KiRHnjgAc2cOfOqdTRv3lwnTpywpqgtWrRIaWlp1mt3pXfeeUd9+/a1giop5xqtX79e27Zt0+rVq/Xvf//bpbbLYmNjtXz5cknSsmXL1K5dO61evVrnzp3T1q1b1alTJ6d1BwQEaNOmTVq6dKkee+wxZWZm6tKlSxo4cKDefPNN7dixQ/fee6/DtMTBgwfrrrvu0rZt2zR//nyNGDFCBw8ezFNDp06dHB7HxsZafXTq1EkrVqy46nUFAKA8IZgCAKAYbdiwQS1btrTCKB8fH4d1fgYNGiQpJwho0qSJDhw4YLUNGTJE/v7+11zDkiVL1K5du2vqIzMzU2vWrNH8+fO1efNmnTx5Ui+88MJV2/7617/q0KFDSkxM1PTp0zVu3Dht3LhRkjR+/Hg1bNhQHTp0UL9+/RQVFWWFFMOGDVPv3r3VvXt3de/eXS1atLDaMjMztXLlSj333HNKSEhQr169dPfdd0uS/vKXv+jIkSPavHmzli9frrVr1+qtt966pufu7Frkd/6ith04cEALFy7Uvn37lJycrCeeeEIDBw50qZ6DBw/qp59+0vr163X8+HH94x//yLPP9OnT8x0tJUnJyckOo85Onz6td955R5s2bdLBgwc1ffp03X777Q7rfDlTvXp1ff7555owYYIiIyP13XffKSQkxCF8kqRPP/1U8+bN00cffeSw/b777pO/v78qV66sIUOGWIHO1doui42N1YoVK5SVlaWdO3dq3Lhx1vsgKioq339PgwcPliQFBwfLz89PR48e1e7du+Xn52cFSbfccouaNGkiSUpLS9OWLVusa9q8eXN16dJFa9eutfbZv3+/li9frldffVUrV65Uenq6du7cqaioKOu8devWVXJycoHXFACA8oZgCgAAN6pYsaL1d19fX4epQoGBgS710bBhQx08eNB6nJaWptOnT6tevXrFVmfDhg3Vr18/1axZU/7+/rr33nutgKmgttq1a6ty5cqSpFatWqlv375at26dJKlSpUp65513lJiYqNWrV6t27drWFDCbzaa4uDglJCRo/fr1CgkJsdoaNmyoNm3aWI+HDh2qLVu2KCMjQwEBAdaUt1q1aumBBx6wFgN/7LHHrIXAt2/ffk3XIr/zF7VtwYIFCgsLs16z4cOHa926dVcNgi7XI0lVqlTRI488Yj3fyw4cOKCNGzdaIagzlStXdpj6uWzZMtWoUcOannnrrbfqzJkzDu+z/PTo0UOrV6/WTz/9pLfeekspKSkKCQmx2ufOnasXX3xRy5Ytu+oUTJvNVqi2hg0bKiAgQJ999pkiIyPVs2dPrVq1SsuXL1fPnj3z7augf4dFqSc2NlZLly7V3r171b17dxljtGDBAnXq1MkhpLtw4YIqVaqUb58AAJRHBFMAABSjzp07a+/evVZYkJ2drdTU1GI9R2RkpDIyMrRq1SpJ0ocffqhbb73V4YftazVo0CCtWrVKFy9elCQtXbpUERERV227cmrisWPHtHLlSrVp00aSdObMGZ07d05STnjywQcf6Mknn5SU8wP7yZMnJUl//PGHJk+erKefflqS1KdPHyUnJ1t9L1myRK1atZK/v79+//13ZWRkSJIuXryohQsXWud79913lZiYqMTERIWFhRX5WhR0/qK2NWnSROvWrVN6erokafHixWrRooUqVKhQYC0nT560rmF2drbmzp1rPd/LPv74Y91+++0FrvsVHh6uPXv2WI+bNGmixMREHT16VFLOyL/MzEw1aNDgqtfnt99+s/7+0ksvKSYmRs2aNZMkzZs3T88++6yWL19uBWpX+vTTT5WRkaHz589rzpw5DtPeCmq7UmxsrJ5//nnFxsZaYen8+fPz3T8/wcHByszMtP5dLV++3LrDY9WqVdW2bVtrPap9+/bp+++/V7du3awa3njjDWt0VExMjF544YU8Nezatcv6twIAAP7k2bXXAQDwPhs2bDCdO3c2YWFhJiIiwixatMgYk/eufFfe8cvZHcr69u1r6tevbySZevXqme7du1tt69evN2FhYaZ58+ame/fu5tChQ1Zbnz59zObNm40xxuzevdvUr1/f1KxZ01SsWNHUr1/f/Pd///dV24wx5vXXXzetWrUyoaGhZuDAgQ53gsuvbcKECSYkJMRERESYsLAwh/4SEhJM8+bNTatWrUxYWJhZsGCB1Xb06FETHBxsQkJCTHBwsPnggw8crsW3335rIiIiTHh4uOnatavZtm2bMcaYBQsWmNatW5vw8HATEhJiHn30UXPhwoV8X5uwsDBTt25dY7PZTP369c2QIUNcasvv/EVty87ONuPHjzctW7Y04eHhplOnTubHH3+0jsvvtb/8ul9+vkOGDDEnTpywjsvKyjJBQUHWHRvzc+nSJdOoUSOHY99++23TqlUrEx4ebtq2bWu+++47q23UqFGmfv36xtfX11x//fWmadOmVtvIkSNNy5YtTdOmTc2QIUMc3id+fn4mKCjIREREWP/98ccfxpic9/yYMWNM+/btTdOmTc24ceNMdnb2VdtyW7BggZFkDh48aIzJeQ/Wrl3b2j/3nRuV666G1113nTlw4IAxxph169aZiIgIExoaaoYPH24iIiKsf6N79+41PXv2NOHh4SYiIsLh3+sff/xhbDabmTVrljEm53WX5PBeSE9PN/Xr17fuxAgAAHLYjHFyixMAAAB4tTfeeEOSnK5R5WnR0dEaO3asbrvtNk+XUmymTZum5ORkh7txAgAApvIBAACUS48//rjL65rh2vn4+GjChAmeLgMAgFKHEVMAAAAAAADwCEZMAQAAAAAAwCMIpgAAAAAAAOARBFMAAAAAAADwCIIpAAAAAAAAeATBFAAAAAAAADyCYAoAAAAAAAAeQTAFAAAAAAAAjyCYAgAAAAAAgEcQTAEAAAAAAMAj/g/QG1Ymj+wymgAAAABJRU5ErkJggg==", + "image/png": "", "text/plain": [ "
" ] @@ -321,8 +353,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 15.1 s, sys: 683 ms, total: 15.8 s\n", - "Wall time: 18.1 s\n" + "CPU times: user 21.5 s, sys: 395 ms, total: 21.9 s\n", + "Wall time: 24.7 s\n" ] } ], @@ -1230,7 +1262,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.8.15" } }, "nbformat": 4, diff --git a/examples/borzoi_example_paqtl_chr1_236763042_A_G.ipynb b/examples/borzoi_example_paqtl_chr1_236763042_A_G.ipynb index bbafdcf..13b5ea0 100644 --- a/examples/borzoi_example_paqtl_chr1_236763042_A_G.ipynb +++ b/examples/borzoi_example_paqtl_chr1_236763042_A_G.ipynb @@ -10,9 +10,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-26 14:38:55.869262: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-09-26 18:01:09.612911: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-09-26 18:01:09.612989: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-09-26 18:01:09.614154: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-09-26 18:01:09.622849: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-09-26 14:38:59.736627: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-09-26 18:01:11.655064: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -49,24 +52,21 @@ "scrolled": true }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "bash: /home/jlinder/anaconda3/envs/borzoi_py39_2/lib/libtinfo.so.6: no version information available (required by bash)\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "f0 model already exists.\n", - "f1 model already exists.\n", - "f2 model already exists.\n", - "f3 model already exists.\n", - "Annotation already exists.\n", - "Splice sites already exist.\n", - "PolyA sites already exist.\n", + "f3c0 model already exists.\n", + "f3c1 model already exists.\n", + "f3c2 model already exists.\n", + "f3c3 model already exists.\n", + "Gene annotation already exists.\n", + "Gene annotation (no read-through, protein-coding) already exists.\n", + "Gene annotation (protein-coding) already exists.\n", + "TSS annotation already exists.\n", + "Splice site annotation already exist.\n", + "Splice site annotation already exist.\n", + "PolyA site annotation already exist.\n", "Human genome FASTA already exists.\n" ] } @@ -74,40 +74,71 @@ "source": [ "%%bash\n", "\n", - "#Download model weights\n", - "for rep in f0 f1 f2 f3; do\n", - " mkdir -p \"saved_models/$rep/\"\n", - " local_model=\"saved_models/$rep/model0_best.h5\"\n", + "#Download model weights (data fold 3, 4 replicates)\n", + "for rep in f3c0,f0 f3c1,f1 f3c2,f2 f3c3,f3; do IFS=\",\"; set -- $rep; \n", + " mkdir -p \"saved_models/$1/train\"\n", + " local_model=\"saved_models/$1/train/model0_best.h5\"\n", " if [ -f \"$local_model\" ]; then\n", - " echo \"$rep model already exists.\"\n", + " echo \"$1 model already exists.\"\n", " else\n", - " wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$rep/model0_best.h5\" -O \"$local_model\"\n", + " wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$2/model0_best.h5\" -O \"$local_model\"\n", " fi\n", "done\n", "\n", "#Download and uncompress annotation files\n", - "if [ -f gencode41_basic_nort.gtf ]; then\n", - " echo \"Annotation already exists.\"\n", + "mkdir -p hg38/genes/gencode41\n", + "mkdir -p hg38/genes/polyadb\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_nort.gtf ]; then\n", + " echo \"Gene annotation already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_nort.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_nort_protein.gtf ]; then\n", + " echo \"Gene annotation (no read-through, protein-coding) already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort_protein.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_nort_protein.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein.gtf ]; then\n", + " echo \"Gene annotation (protein-coding) already exists.\"\n", "else\n", - " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > gencode41_basic_nort.gtf\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_protein.gtf\n", "fi\n", - "if [ -f gencode41_basic_protein_splice.csv.gz ]; then\n", - " echo \"Splice sites already exist.\"\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_tss2.bed ]; then\n", + " echo \"TSS annotation already exists.\"\n", "else\n", - " wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_tss2.bed.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_tss2.bed\n", "fi\n", - "if [ -f polyadb_human_v3.csv.gz ]; then\n", - " echo \"PolyA sites already exist.\"\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz ]; then\n", + " echo \"Splice site annotation already exist.\"\n", "else\n", - " wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz\n", + " wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz -O hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein_splice.gff ]; then\n", + " echo \"Splice site annotation already exist.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.gff.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_protein_splice.gff\n", + "fi\n", + "\n", + "if [ -f hg38/genes/polyadb/polyadb_human_v3.csv.gz ]; then\n", + " echo \"PolyA site annotation already exist.\"\n", + "else\n", + " wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz -O hg38/genes/polyadb/polyadb_human_v3.csv.gz\n", "fi\n", "\n", "#Download and index hg38 genome\n", - "if [ -f hg38.fa ]; then\n", + "mkdir -p hg38/assembly/ucsc\n", + "\n", + "if [ -f hg38/assembly/ucsc/hg38.fa ]; then\n", " echo \"Human genome FASTA already exists.\"\n", "else\n", - " wget -O - http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38.fa\n", - "fi" + " wget -O - http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38/assembly/ucsc/hg38.fa\n", + "fi\n" ] }, { @@ -119,7 +150,7 @@ { "data": { "text/plain": [ - "Faidx(\"hg38.fa\")" + "Faidx(\"hg38/assembly/ucsc/hg38.fa\")" ] }, "execution_count": 3, @@ -128,7 +159,7 @@ } ], "source": [ - "pyfaidx.Faidx('hg38.fa')" + "pyfaidx.Faidx('hg38/assembly/ucsc/hg38.fa')" ] }, { @@ -141,7 +172,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-26 14:39:51.526063: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10372 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:81:00.0, compute capability: 6.1\n" + "2024-09-25 10:56:52.699671: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10232 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1\n" ] } ], @@ -183,7 +214,7 @@ "models = []\n", "for rep_ix in range(n_reps) :\n", " \n", - " model_file = \"saved_models/f\" + str(rep_ix) + \"/model0_best.h5\"\n", + " model_file = \"saved_models/f3c\" + str(rep_ix) + \"/train/model0_best.h5\"\n", "\n", " seqnn_model = seqnn.SeqNN(params_model)\n", " seqnn_model.restore(model_file, 0)\n", @@ -214,11 +245,11 @@ "source": [ "#Initialize fasta sequence extractor\n", "\n", - "fasta_open = pysam.Fastafile('hg38.fa')\n", + "fasta_open = pysam.Fastafile('hg38/assembly/ucsc/hg38.fa')\n", "\n", "#Load APA atlas\n", "\n", - "apa_df = pd.read_csv('polyadb_human_v3.csv.gz', sep='\\t', compression='gzip')\n", + "apa_df = pd.read_csv('hg38/genes/polyadb/polyadb_human_v3.csv.gz', sep='\\t', compression='gzip')\n", "apa_df = apa_df[['pas_id', 'gene', 'chrom', 'position_hg38', 'strand', 'site_num', 'num_sites', 'site_type', 'pas_type', 'total_count']]\n", "\n", "apa_df.loc[apa_df['pas_type'] == 'NoPAS', 'pas_type'] = 'No_CSE'\n", @@ -265,8 +296,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-26 14:41:44.072667: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600\n", - "2023-09-26 14:41:44.604587: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" + "2024-09-25 10:57:41.889919: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", + "2024-09-25 10:57:41.984645: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", + "2024-09-25 10:57:42.317045: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" ] }, { @@ -281,7 +313,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -300,7 +332,7 @@ }, { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAABKYAAAC+CAYAAAAP1AcDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAzAUlEQVR4nO3deXyNZ/7/8fc5J2kssdMalKgtZDvEvjWJ1BJaqohaWlQp05ZpTS1FVX1b/XZa08pvqgaxtMZSalRpCVKMUCoxFLGmFUp9bUmsSc79+yPjHmkiEnJykng9Hw+PJPd93df9Odc5EXm7rutYDMMwBAAAAAAAABQwq6sLAAAAAAAAwIOJYAoAAAAAAAAuQTAFAAAAAAAAlyCYAgAAAAAAgEsQTAEAAAAAAMAlCKYAAAAAAADgEgRTAAAAAAAAcAmCKQAAAAAAALgEwRQAAAAAAABcgmAKAIAHhN1ul91uV6NGjWSz2cyvw8PD77vv6Oho2e32O56PiIjQ9OnTc+wjIiJC7777bq7vmZKSotGjR6tu3bry8/NTQECABgwYoBMnTigsLMx8fBaLRX5+frLb7WrXrp0kZTp268/58+dzfW9JSktL09tvvy1vb2/5+vrKbrdr2LBhunTpUp76Kex69+6tmJgYSdKUKVNksVj01VdfmecNw1Dt2rVVvnx5p9zfYrHkakzbtWunEydOOKUGAADgPG6uLgAAABSMuLg4SVJCQoLsdrv59e3S0tLk5pa//zy4du2aPvroI+3bty/HdsOGDVPDhg31xz/+UeXKlcuxrWEYCgsLU8OGDbVv3z6VLFlSDodDX375pY4dO6a1a9eabS0Wi7Zu3ZolOMnuWF688MILunDhgmJiYlShQgUZhqEvv/xSFy5ccFpIc4sznqfs/PDDD7pw4YJatWplHgsMDNS8efP09NNPS5I2btyoypUr6+LFi06vJyevv/663nrrLS1cuNCldQAAgLxhxhQAAA84Ly8vjR07Vs2bN9fzzz+vM2fOKDg4WIGBgfLx8dHLL78sh8Nhtn///ffNGUotW7bU1atXM/WXlJSkjh07aurUqZKkL7/8Um3atFHp0qUlSTt27FBgYKDsdrt8fX316aefSpIeeughdezYUYsXL75rzRs3blRCQoIiIiJUsmRJSZLValWfPn0UGhp6z2OxZcsW1alTRxcuXJAkvfzyy3rxxReztDt69KiWL1+uyMhIVahQQVJGANa7d2899thjkqQPPvhAPj4+8vPzU//+/XX58mVdvXpVlSpV0pkzZ8y+pkyZoj/96U+SpCNHjqhr165q1qyZ/P39FRERYbazWCx666231KxZM40fP1779u1T27Zt1aRJEzVq1EjTpk0z2yYnJys8PFze3t5q166dhg8frkGDBpnn//KXv6h58+Zq0qSJOnfurJ9//jnb8fjss8/Ur1+/TMfatm2rY8eOmY9h3rx5GjJkSKY2v5/lVLlyZSUkJMjhcOjll19Ww4YNFRAQoMDAQF2/fl2S9M0336hZs2YKCAiQ3W7Xzp07s9ST0/h07dpV69at0+XLl7N9LAAAoHBixhQAAND58+e1c+dOWSwWXb9+XV9//bU8PT2Vnp6u7t27a9myZerbt68WLFigFStWaNu2bSpXrpwuXrwoDw8Ps5+TJ0+qR48eGjVqlJ577jlJGcv8WrRoYbZ57733NGbMGD377LOSlGmmTatWrbR69WqNGDFCUsbyw7Vr16patWqZ6t2zZ48aN24sd3f3e37M7dq1k81mkyRVqFBBmzdvVvv27TV06FANGjRIAwYM0LZt27Rjx44s1+7Zs0f16tVT5cqVs+173bp1mjdvnmJiYlS+fHkNGzZM48aN06effqpnnnlGn3/+ucaMGSPDMLRgwQKtXr1a6enpevbZZ/X555/L29tbV69eVcuWLdWiRQs1a9ZMkmSz2bRr1y5JGeHTxo0b5eHhoWvXrql169YKDQ1Vy5YtNXXqVJUsWVIHDx5USkqKWrdurcDAQEnS4sWLFR8fr5iYGNlsNi1atEgjR47UN998k+VxREdHm6HZ7QYMGKAFCxZo+PDh2rVrl6ZNm6bx48ffdcz37t2rjRs36qeffpLVatXly5f10EMP6fDhwxo8eLC2bNkib29vpaamZgk87zY+7u7u8vPz09atW9WtW7e71gIAAAoHgikAAKBBgwbJYrFIkhwOh8aOHatt27bJMAz99ttv8vX1Vd++fbVmzRq99NJL5lK7W7OFJOns2bNq37695syZow4dOpjHExMT1blzZ/Pr4OBgvfPOOzpy5IhCQkLUtm1b81zVqlWVmJhofp3dcsPsbN26Va+88opSUlLUr18/c7bW3a7JbsnduHHj1KVLFw0bNkw//PCDSpQokasabhcVFaXw8HCz/xEjRqh3796SpMGDB2vo0KEaM2aMoqOjValSJfn5+enAgQP66aef1LdvX7Of5ORkHThwwAymbp+ZdO3aNY0cOVJxcXGyWq06efKk4uLi1LJlS23cuFEzZsyQxWJRmTJlFB4erqNHj0qSVq1apV27dplBVXp6+h0fR2Jioh555JEsx59//nk98cQT8vT0VJ8+fWS15m4S/mOPPaa0tDQNGTJEwcHB6tq1q6xWqzZs2KDOnTvL29tbkuTu7p5lOWd8fPxdx+f3rx8AAFD4EUwBAAB5enqan3/00Uf67bfftHPnTpUoUUKvvfaaudwqJ+XLl1fdunW1Zs0ahYSEmEFXqVKlMl0/evRode/eXVFRUZowYYJ8fX31t7/9TZJ0/fp1c2leTho3bqyIiAilpqbK3d1d7dq1U1xcnKZMmXLfm48nJyfr+PHjKl26tM6dO6f69etnadOkSRMdOXJE58+fV6VKle7a562xkDJmhTkcDv3www+aP3++Bg8eLClj36yKFSvmGMbd/jxNmDBBlStXVmxsrNzc3NSzZ887Pk+3398wDI0fP17Dhg27a92/f+5uqV69umrVqqW3335b27dvz3LeZrNlCrxu9VGuXDnt379f33//vTZv3qzx48dry5Ytd63jVt13G5/cvn4AAEDhwR5TAAAgk4sXL6pq1aoqUaKEzpw5o+XLl5vnnnrqKc2aNcvcx+fSpUtmAOHh4aGVK1fq9OnTevHFF819qfz9/RUfH2/2ER8fr9q1a+vFF1/UhAkTMi2VO3jwoAICAu5aY2hoqB599FGNGjVK165dM49fuXLl/h68MjY179+/v5YtW6aBAwdm+259devW1TPPPKMXXnjBDMIMw9CKFSt0/PhxhYaGatmyZUpKSpKUsVdTx44dzesHDx6smTNn6ptvvjH3cGrQoIHKli2ryMhIs93Ro0fN/a5+7+LFi6pRo4bc3NwUHx+vDRs2mOdCQkK0YMECGYahlJQULVu2zDzXo0cPzZo1y+w3NTVVsbGx2d7j98/d7d555x1NmzZNdevWzXZ8bu0RtXLlSvN5OXfunK5cuaKOHTvq3XfflZeXlw4cOKBOnTrpu+++06FDh8yafr9XVG7GJ7evHwAAUHgQTAEAgExGjRqlnTt3ysfHRwMHDsy0mfjAgQP1zDPPqHXr1goICFBYWJhu3Lhhnnd3d9fixYuVnp6u/v37Ky0tTb169dJ3331ntomIiJCPj48aN26siRMn6sMPPzTPffvtt+rVq5f5td1u1+nTp7PUaLFYtG7dOrm5ucnX11f+/v5q06aNzpw5k6uZQFLGHlN2u938Ex8fr4iICF24cEGTJk1SmzZtNGLECD333HMyDCPL9fPmzVNAQIBatGghHx8fNWrUSOvXr1fFihXVpUsXDR48WK1atZKfn5+SkpL03nvvZRrHJUuWKDQ01FwO6ebmpjVr1mjlypXy9/eXj4+PXnjhhUzB2+0mTpyoyMhI+fv7a9y4cQoJCTHPTZ48WcnJyWrYsKE6d+6sgIAAc1lh//79NWjQIAUHB5sbjW/atCnbe/z+ubtd06ZN7zjWM2bM0KhRo9SkSRPFxsaas8pOnjypJ554Qv7+/vL19ZWvr6+6dOmiunXrKjIyUgMGDDDH9PeB2N3GJyEhQenp6QRTAAAUMRYju39pAQAA5KOuXbtqypQp5l5A2Tlw4ICGDx+urVu3FmBlxVNqaqrS09NVokQJXblyRZ06ddIrr7yi8PDwPPVza+P0mJgY810VC6tx48apbt26Gjp0qKtLAQAAecCMKQAA4HSffPKJzp49m2ObkydP6rPPPiugioq3ixcvqk2bNrLb7QoMDFSbNm3Up0+fPPfj6empGTNm6MSJE06oMn9Vq1Yt0+bwAACgaGDGFAAAAAAAAFyCGVMAAAAAAABwCYIpAAAAAAAAuATBFAAAAAAAAFyCYAoAAAAAAAAu4VYQN3E4HDp9+rTKlCkji8VSELcEAAAAAACACxiGoeTkZFWrVk1Wa85zogokmDp9+rQeffTRgrgVAAAAAAAACoGTJ0+qRo0aObYpkGCqTJkyZkFly5YtiFsCAABJEZKSJZWR9LKLaynuiupYF9W6gaKE7zMUFrwWUVCSkpL06KOPmnlQTgokmLq1fK9s2bIEUwAAFKASkm7+5yM/gZ2rqI51Ua0bKEr4PkNhwWsRBS032zmx+TkAAAAAAABcgmAKAAAAAAAALlEgS/kAAAAAAACKu/T0dKWmprq6jALj7u4um812X30QTAEAAAAAANynlJQUJSYmyjAMV5dSYCwWi2rUqCFPT8977oNgCgCA+zFlSv62AwAAQJGTnp6uxMRElSpVSlWqVMnVpt+3S0hI0JNPPqmvv/5aXl5ezikynxmGoXPnzikxMVH16tW755lTBFMAAAAAAAD3ITU1VYZhqEqVKipZsmSer1+xYoWOHj2qlStXasKECU6o0DmqVKmihIQEpaam3nMwxebnAAAAAAAA+SCvM6VuWbJkSaaPRcW9Pt7bEUwBAAAAAAC4yPHjx7V//35J0r59+3T8+PF863vlypUKDAyU3W6Xt7e3QkJC5HA4FBQUpEqVKuny5ctm2169emn+/PmSpPnz56tcuXKy2+3y8fFRly5d9Msvv+RbXbdjKR8AAAUhN3tMsQ8VAADAA2fFihWyWq1yOByyWq1auXKlxowZc9/9/vrrrxo2bJh+/PFH1apVS5K0Z88ec5ZT2bJlNX36dL333nvZXh8cHKxVq1ZJkkaNGqU//elPWrFixX3X9XsEUwAAAAAAAE62e/duzZkzJ8vxdevWme/kZxiGZs6cqaNHj2ZpN3ToUDVt2jTX9zt79qxsNpsqVqxoHmvSpIn5+dixYzV58mS98sorqlatWo59derUSW+88Uau750XBFMAAAAAAABOduzYMc2ePVuGYchms8lqzdhdyTCMTMHU6dOnNW/ePEmSw+FQenq6LBaLgoOD8xRM+fv7q23btqpVq5Yef/xxtW7dWv369VP16tUlSVWrVtXw4cP11ltv6e9///sd+0lPT9fy5csVGBh4rw89R+wxBQAAAAAA4GTh4eHauHGjHn74YUkZ7+SXmpqqtLS0TO3S0tLMc5L08MMPa9OmTQoPD8/T/axWq1asWKHt27erc+fO+te//iUfH59Ms7H+/Oc/a82aNTp06FCW6zdv3iy73a7AwEBZLBZ9+OGHeX3IucKMKQAAAAAAgAIQHBysn376SYMHD9aaNWvu2r5Lly6KjIxU5cqV7/me3t7e8vb21vDhw9W5c2etXr3aPFe2bFmNHTtW48ePl81my1LrrT2mnIlgCgCAwoIN0gEAAIq9ypUra/Xq1Zo+fbomTJhwx3bvvvuuxo0bZ25WnlenTp1SQkKC2rRpI0m6ePGiTpw4oTp16mRqN2LECH388ceSpG7dut3Tve4HS/kAAAAAAAAKkMViUYUKFe4YOlksFlWsWPGeQykpY0ng1KlTVb9+fdntdrVr107PP/+8unfvnqmdh4eHpk6dqoSEhHu+1/1gxhQAAAAAAEABW7ZsmSwWiwzDyPbj8uXLNXz48Hvuv1atWvruu++yPRcdHZ3p64EDB2rgwIHm14MGDdKgQYPu+d55wYwpAAAAAACAAnT+/Hl9//33cjgccnNzk6enpyZNmiRPT0+5ubnJ4XAoOjpaFy5ccHWpTkcwBQAAAAAAUIBWr14th8MhSWrWrJn279+vqVOnat++fWratKkkKT09PdNG5cUVwRQAAAAAAEABWrdunaxWq9566y1t2bJFNWvWlJSx/G7r1q2aPHmyrFar1q1b5+JKnY89pgAAAAAAAArQ6NGj9frrr6tFixZZzrm5uentt99WWFiY0tPTXVBdwSKYAgDgTqZMcXUFAAAAKIZat2591zbZhVbFUdEPpnLzSwO/WAAAAAAAABQ67DEFAAAAAAAAlyj6M6YAALgXzKYFAACAk02JnuKcfoNy16+Xl5c8PDxUsmRJXbt2TYMHD9a4ceNyvGbHjh168cUX5ebmpunTp6tTp075UPGdEUwBAHAfohOic9UuyCvIqXUAAAAA2Vm6dKnsdrtOnTqlRo0aKSQkRM2bN79j+wULFqhfv34aP358gdTHUj4AAAAAAIBirnr16vL29tbPP/+sM2fOqE+fPmrevLn8/Pw0ceJESdL06dO1dOlSRUREyG6369KlS06vixlTAAAAAAAAxdyhQ4d0/vx5BQUFacCAAZowYYIef/xxpaWlqVu3blq+fLnGjRunQ4cOyW63a/To0QVSF8EUAAAAAABAMRUeHi6r1ar4+HjNmDFDpUqV0saNG3X27FmzTUpKiuLj411SX+EOptiYFgAAAAAA4J7d2mMqKipKTz75pEJCQiRlbHJeokQJF1dX2IMpAACKidxsks4G6QAAAHCW0NBQjRgxQhMnTlRwcLCmT5+uKf+ZEHT69Gk5HA7VqFGjwOsimAIAAAAAAHgATJo0SXXr1tXatWs1c+ZM+fr6ymKxqHTp0vrss88IpgAAAAAAAIqLKUFTXHr/hISETF9XqFBB58+flyS1aNEi22vmz5/v5Koysxbo3QAAAAAAAID/IJgCAAAAAACAS7huKR/vuAcAAAAAAPBAezD2mMpNCEZQBgAAAAAAUKCKfDDF228DAIqLXP1Mc3oVAAAAQMFhjykAAAAAAAC4RJGfMQUAgLPkZgYTAAAAUJglJyfrD3/4g8LDwzV37lxJ0vz587Vq1SqtWrVKCQkJ+vbbb/XSSy+5pD6CKQAAAAAAAGdw1n7Weeh36dKlCgwM1MqVK/Xxxx/L09Mz0/mEhATNmjXLZcEUS/kAAAAAAACKqblz52rs2LFq3769li5dmuX8Sy+9pPj4eNntdj311FMFXl+hnjHFEgoAgLPwMwYAAADF3YEDB3Ty5El16tRJaWlpmj59ul544YVMbWbNmqXRo0crLi7OJTUyYwoAAAAAAKAYmjt3rp577jnZbDaFhYXpxIkTOnjwoKvLyqRQz5gCAAAAAABA3qWmpmrRokVyd3fX4sWLJUlXr17V3Llz5evr6+Lq/osZUwAAAAAAAMXM6tWr9dhjj+nUqVNKSEhQQkKCduzYoUWLFik1NdVsV7ZsWV2+fNlldRJMAQAAAAAAFDNz585V//79Mx1r2LChqlevruTkZPOYv7+/fHx85Ovr+2Btfs6mswAAAAAAoFibMsVlt167dm22x/fs2SNJeu211yRJbm5uWrNmTYHV9XvMmAIAAAAAAIBLPBCbn+dmdlaQ06sAACAf5OZ/3Vz4P3MAAABAXjBjCgAAAAAAAC5BMAUAAAAAAACXIJgCAAAAAADIB4ZhuLqEApUfjzfXe0x98sknOZ5/9dVX77sYAADyBXssAQAAoAC5u7vLYrHo3LlzqlKliiwWi6tLcjrDMHTu3DlZLBa5u7vfcz+5DqZiY2PveO5BGHAAAAAAAIDs2Gw21ahRQ4mJiUpISHB1OQXGYrGoRo0astls99xHroOpyMjIe74JAAAAAABAcebp6al69eopNTXV1aUUGHd39/sKpaQ8BFO3O336tPbv36/r16+bx5566qn7KgQAAAAAAKAos9ls9x3UPGjyHEzNmzdPU6dO1YULF1SvXj3t3btXLVu2JJgCAAAAAABAnuT5XflmzJih2NhY1alTRz/++KM2bdqk+vXrO6M2AAAAAAAAFGN5DqYeeughVahQQWlpaZKk9u3bKy4uLr/rAgAAAAAAQDGX56V8Hh4eMgxD9evX11//+lfVqlVLKSkpzqgNAAAAAAAAxVieg6lp06YpKSlJ//u//6uXXnpJly5d0t/+9jdn1AYAAAAAAIBiLM/BVEhIiCSpXLly2rBhQ74XBAAAAAAAgAdDnoOptLQ0rVixQseOHTP3mZKkyZMn52thAAAAAAAAKN7yHEz17dtXZ86cUfPmzWWz2ZxREwAAAAAAAB4AeQ6m9u3bp0OHDslisTijHgAAAAAAADwg8hxMPfroo7p586Y8PDycUQ8AALhfU6aYn7aSdENSlp/at7UBAAAAXCXPwVTdunUVFBSkp59+WiVKlDCPv/rqq/laGAAA9yo6IdrVJQAAAADIhTwHUzdu3JC3t7cOHjxoHmNZHwAAAAAAAPIqz8FUZGSkM+oAAAC5kJvZYEFeQU6vAwAAAMgPeQ6mFi5cmOVY+fLlFRgYqOrVq+dLUQAAwMlys8cU+1ABAADAyfIcTH3xxRfasmWL2rZtK4vFom3btql58+Y6fPiw/vrXv6pPnz7OqBMAAAAAAADFjDWvF3h6eio2NlYbNmzQ+vXrFRsbq4oVK2r79u2aNm2aM2oEAAAAAABAMZTnGVOHDx+Wt7e3+XWDBg109OhReXl5yWrNc84FAAAKK5b7AQAAwMnynCSVKVNGCxculGEYMgxDCxculKenpzNqAwAAAAAAQDGW52AqMjJSERER8vDwUIkSJRQREaG5c+fqypUr+uCDD5xRIwAAAAAAAIqhPC/la9CggX744QclJydLyphBdcsTTzyRf5UBAAAAAACgWMt1MHXkyBHVq1dP//73v7M97+/vn29FAQAAAAAAoPjLdTA1evRoffPNN+revbt5zGKxyDAMWSwWHT9+3CkFAgAAAAAAoHjKdTC1Zs0aSdKJEyckSceOHdPq1atVt25dPfnkk86pDgAAAAAAAMVWrjc/f+KJJxQXFydJOn36tJo1a6b169frjTfe0Pvvv++s+gAAAAAAAFBM5TqYOnXqlOx2uyRp8eLFevzxx7Vu3Tpt375dX3zxhbPqAwAAAAAAQDGV62CqZMmS5ufbt29XWFiYJKlChQpyc8vzm/sBAAAAAADgAZfrYMpqtSoxMVEpKSn6/vvv9fjjj5vnrl696pTiAAAAAAAAUHzleqrThAkT1LhxY7m5uSk4OFj169eXlDF7ysvLy1n1AQCAwmzKlPxpAwAAgAdSroOpnj17qnXr1jp79qz8/f3N415eXpo9e7ZTigMAAHkXnRBtfn5SUrokm6Qbt7UJ8goq0JoAAACA7ORpc6iqVauqatWqmY5Vq1YtXwsCAAAAAADAgyHXe0wBAAAAAAAA+YlgCgAAAAAAAC6Rp6V8AACgeLh9H6o7YR8qAAAAOBvBFAAAKD5y+w6AvFMgAABAoUAwBQAAssWsKgAAADgbe0wBAAAAAADAJQimAAAAAAAA4BIEUwAAAAAAAHAJ9pgCAADOlZuNxvOrDQAAAIoUZkwBAAAAAADAJQimAAAAAAAA4BIs5QMAAABQaE2JnnL3NkF3bwMAKJwIpgAAwD2LToi+a5sgryCn13FLbuqRpCD2tAIAACgUWMoHAAAAAAAAl2DGFAAAcD1mJwEAADyQCKYAAAAAFGnsQwUARRdL+QAAAAAAAOASzJgCAABFQm43Ns+vvoLy7W4AAAC4E4IpAAAAAC6RmyV4BXkvlvsBQMFjKR8AAAAAAABcghlTAADAqXK1bM4ryOl1AAAAoPAhmAIAAACAYo6ljAAKK4KpW6ZMyd92AAAAAFCEEF4BcAWCKQAAAKAQy68NwgkUAACFUcEGU++9J3l4FOgtAQBA4ZebfagA3J+Cng1TkO+4BwAoupgxBQAAAABiKVtuMEYA8hvBFAAAAAAUYcxOA1CUEUwBAAAAyBOCEABAfiGY+o/c7m0R5NQqAABAYcFyFRQEAp6ih78bACB/FWgwtfXnrSr9EFkYAAAAUBgRlAEAChopEQAAQDaC5kfnopGzqyj+chuEFMUZKIQ8yAkzrwAgA8EUAAAAAOSj34dOMZJuSPKQlHQf/QBAcUQwBQAAUEBOnDihTp066bvvvlPt2rVdXU6RwuwSANnJr/COvz8A1yGYAgAAKCCLFy/WkSNH9I9//EMTJkxwdTkA4BRFMUguijUDxQXBFAAAQAFZsmSJ+ZFgqmhjiRXgfIXt+6w474kHuBLBFAAAQAE4fvy49u/fL0nat2+fjh8/rscee8zFVTlXYfulUiqcNQEA8CAjmAIAACgAK1askNVqlcPhkNVq1cqVKzVmzBhXl3XPCmPAUxhrAgAAOSOYAgAAyEcnd+/WS3PmZDm+bt06GYYhSTIMQzNnztTRo0eztBs6dKiaNm3q9DqBoiJofvRd20QPCnJ6HchfxTlIZr8qIG8IpgAAAPLR/x07pi9mz5ZhGLLZbLJarZIywqjbg6nTp09r3rx5kiSHw6H09HRZLBYFBwcTTAF5RHgFAEUXwRQAAMA9uv1/xWMk3ZDk8Yg08MOBWvHOCl29fFXpqenZXpuWlmZ+brPZ9PDDD2vp0qUKCgpyas3Ag4rwCgAKJ4IpAACAfFa7cW2NjBypf77/Tx2OOXzX9l26dFFkZKQqV658X/e9l6UxZqAmKelWPywxQQHJTVgEACjeCKYAAACcoFS5Uur7P321bfE2bZqz6Y7tQoaGaPXs1bJYLDn2V5D7sRTnvV+AB1V+hYDMKssf7EMF/BfBFAAAgJNYLBaVLFMyxzYly5S8aygFoGCw3A8ACh7BFAAAgBP9FP2TLFaLDIchWSQZMj9arBYd+P4AM5SQo4Jc7pafoUtxXaZXGMOrghzrwvj4iytmVeFBQTAFAADgJFcvX9XPe3+W4TBktVnl9pCbWvZqqR1f7lDazTQ50h1KiEvQtaRrKlk255lVQGFSXEOngsQYAkAGgikAQNEyZYqrKwByLX57fMZMKUnVvKup16ReKvdIOTUOa6wV76xQ4oFEGQ5D8dvjZe9sd22xcInCFk4UtnqKqt+PYx1J6ZJskh51QT0POmZ5AYUbwRQAoEiJToh2dQmA6fZfdrL7xTM6+idZLBa1f6692g9sL6vNKkkqX7W8Bn8yWN8v/F5bF23VkZ1HHthgKrdBSFH8pZGQBw+q/AqCCtsSxdwqin9fAa5EMAUAAOAkvRrVUN03uqtGoxpZzlltVgUPDla9FvXMWVVFSUGHLoQ8QPFSnL+nmaEF5E2BBFOGkfGPraupaQVxO6dKSkpydQkA8EC7crPo/ywpSNf031k8V1xcS3GX3VjXLl9ap2pV0Y0rN+54XZVaVSQpxza51faLrXm+ppr+W3f1PFzH6wnIPf4uxu81mx2VL/2Mz8XPjvHtxpufX5d04z8f+c0WznQrO7mVB+XEYuSm1X1KTEzUo4+ymhoAAAAAAOBBcfLkSdWokXXm+O0KJJhyOBw6ffq0ypQpI4vF4uzbAQAAAAAAwEUMw1BycrKqVasmq9WaY9sCCaYAAAAAAACA38s5tgIAAAAAAACchGAKAAAAAAAALkEwBQAAAAAAAJcgmAIAwAW8vLwUFxeX6/a9evVStWrVZLFYdOnSpTu2W7Jkiex2u3x9feXr66sPP/zQPBcTEyO73S673S4fHx8NHz5cN278922m9+3bp6CgIDVs2FANGzbUypUrJUmRkZHmdXa7XZUrV1bPnj3N63755Rc9+eSTatCggRo1aqSZM2ea59asWSNvb2/Vq1dPPXv2NN86WJI++OAD+fr6qlGjRnr66aczPa5FixYpICBAvr6+6tChg3755ZcsjzUyMlIWi0WrVq2643gMHjxY9evXV0BAgNq0aaNdu3aZ59588035+fmZj2vJkiWZrv3b3/6mhg0bys/PTwEBAbp+/bok6emnn840HlarVatXrzavW7Fihfz8/MznICEhQZI0ZcoUValSxbyuf//+5jUOh0OvvPKK6tSpo7p16yoiIsI8d/bsWfXs2VP+/v5q2LCh/vrXv2aqc9q0aapTp47q1KmjN998845jIUm9e/dWTExMpmO//fabHnnkEfXo0SNX/V65ckWDBw+Wn5+fvL29NW7cuExvBX2n19H8+fOz3MNZhg4dqs2bN+f5uoiICA0aNChfalizZo2GDRuWL30BAFCsGQAAoMDVqlXLiI2NvWu71NRUwzAMY8OGDcbZs2cNScbFixfv2H7btm3Gr7/+ahiGYVy6dMmoU6eOsXnzZsMwDOPKlSvGzZs3DcMwjPT0dKNHjx7GRx99ZJ6rXbu2sXXrVsMwDCMtLc347bffsr2Hj4+P8eWXXxqGYRgOh8No0qSJsWzZMvP8mTNnDMMwjOTkZOPhhx82Dh48aBiGYfzxj380xowZYxiGYaxfv95o2LChkZSUZBiGYbzzzjvGyJEjDcMwjIMHDxqPPPKIcfr0acMwDGPRokVGWFhYphpOnDhhtGrVymjZsqXx1Vdf3XE8/vnPf5pj+PXXXxu1atUyz90+jomJiUaZMmWMc+fOGYZhGKtWrTJat25tXLp0yTAMw/jtt9+MtLS0LP3v2rXLqFSpknHjxg3DMAxjz549RoMGDYxTp04ZhmEYSUlJxpUrVwzDMIy33nrLGDVqVLZ1LliwwAgJCTHS0tKM8+fPGzVr1jT2799vGIZh9OvXz3jzzTcNwzCMlJQUIyAgwPjhhx8MwzCM77//3mjUqJGRkpJiXL9+3QgMDDTWrFmT7T127txphISEZDneo0cPY8iQIUb37t3NYzn1O2HCBGPgwIGGw+Ewbt68aXTu3Nl8/nN6HUVGRma6R2E0c+ZM4/nnn8+3/po0aWIcPnw43/oDAKA4YsYUAABOFBMTo7Zt2yogIED+/v765z//aZ5buXKlWrVqpdq1a2vatGnm8aCgIL366qtq1aqVOnbsKEkKDQ3Vww8/fNf7tWnTRlWrVpUklStXTt7e3uaMnVKlSsnd3V2SdPPmTV27dk0Wi0WStHjxYrVs2VJt27aVJNlsNlWpUiVL/zt37tRvv/2mp556SpK0ceNGeXh4qHfv3mabRx55RJK0bt06NW7cWN7e3pKkkSNH6h//+Ickae/evWrbtq3KlCkjSQoLC9OiRYskSfv375e/v7/+8Ic/mOfWrVun8+fPS8qYXTR06FDNnDlTHh4eOY7HU089JTc3N0lSy5YtderUKaWlpUmSypcvb7ZLSUmRYRhyOBySMmZzvfXWWypXrpwkqUqVKrLZbFn6nzt3rgYMGKCHHnpIkvThhx/qtddeU7Vq1SRJZcqUUalSpXKsUZKWLl2qF198UTabTRUrVlR4eHimsQoLC5MklS5dWu3btzfHaunSpRo4cKBKly4tDw8PDRkyxLzu9z777DP169cvS/21a9dWu3btstRzp3737t2rzp07y2KxyN3dXU888YRZz91eR0lJSXrqqafUqFEjtW/f3nxtzp8/XyEhIdmeu9369evN74mkpCS5u7tr9uzZkqSFCxdqyJAhkjK+h27NpBs0aJCGDx+uDh06qH79+urZs6du3rwpSUpOTlZ4eLgaNGigtm3bat++fea90tPT9ec//9mc+fbKK6/o5s2bunLliipWrKjU1FRJUvPmzc1x/eWXX1SnTh2zjz59+mjOnDnZPh8AACADwRQAAE5y4cIF9ejRQ++995727t2ruLi4TAHApUuXFBMTo127dumDDz7QqVOnzHOHDx/Wli1btGnTprveJywsTLt3785y/MCBA4qJiVFoaKh5LCEhQQEBAapcubLKlSunkSNHmm09PDzUrVs32e12Pffcczp37lyWPufOnauBAweaAdeBAwdUpUoV9e3bV40bN9bTTz+t48ePS8r4Jb1WrVrmtV5eXvr111+VlpamwMBARUVF6cyZMzIMQ1988YWSk5N14cIFBQQEaM+ePTp8+LAk6fPPP5dhGPr5558lSR999JHatGmjwMDAu47N7T7++GOFhYWZQZUkffLJJ2rQoIGaNGmi2bNnm+HfgQMHtHv3brVp00ZNmzbVJ598kqW/a9eu6R//+IdeeOGFTGP+yy+/6PHHH1fjxo01adIkpaenm+eXL1+ugIAAhYSEZFpqlt1Y3Vq+GBgYqMWLF8vhcOjcuXP67rvvzNAmp+t+Lzo6Wi1atDC/PnHihGbNmqX/+Z//ydL2bvUsX75cN27cUEpKilatWmXWc7fX0b/+9S+9//77OnDggLp165ZpqVtO525p166ddu/erRs3bmjz5s1q1qyZoqKiJEkbNmzI9Fq/XVxcnL7++msdPHhQZ8+e1YoVKyRJU6dOlYeHhw4dOqRvvvlGW7ZsMa+ZPXu2du3apR9//FFxcXE6duyYZsyYodKlS8vX11cxMTG6ePGibt68qV27dskwDG3YsEEdOnQw+2jVqpU2btyYbU0AACADwRQAAE4SExOjBg0amGGU1WpVxYoVzfO3ZllUrlxZjz32mE6cOGGeGzBggBn+3M3atWvVtGnTTMcSExPVvXt3zZo1SzVq1DCPe3l5ae/evTpz5oxu3Lhh7v+TlpamqKgoffbZZ4qNjVX16tU1YsSITH1euXJFS5YsyRTEpKWladOmTZo0aZJiY2PVqVMn9enT5641BwcHa8yYMerWrZtatmxpzqpxc3NTvXr1NGvWLD333HNq2rSpzp8/r/Lly8vNzU379+/XihUrNHHixFyNzS2ff/65li1bZs6uueXVV19VfHy8tm/frnfffdeclZWWlqYTJ05oy5Yt+vbbb/XZZ59pzZo1ma798ssvVb9+ffn5+WUaj9jYWH377bfatm2btm/frk8//VSS9NJLLykhIUF79+7VO++8o/DwcDNsy8mHH36olJQUNW7cWP369VNQUFCmcC23EhMTzdlshmFoyJAhioiIUMmSJfPUz7hx41SzZk21aNFCXbt2VfPmzc167vY6at26tRo2bChJGjZsmKKjo83gLqdzt5QsWVJ2u13/+te/FBUVpXHjxmnPnj1yOBzatGmTQkJCsq356aefVqlSpWSz2dS8eXMdO3ZMUsaMvxdeeEEWi0XlypXLNKMsKipKgwYNkoeHh9zc3PTiiy9qw4YNkjJmMEZFRWnTpk3q2LGjvL29tW/fPkVFRWUKpqpWrarExMQ8jS8AAA8agikAAFykRIkS5uc2m81cYiZJnp6e99zv6dOnFRoaqokTJ2ZaYnc7T09P9e3bV1988YUkqWbNmgoODlb16tVlsVg0YMAA7dixI9M1y5cvl4+Pjxo1amQeq1mzpho3biwfHx9J0sCBA7Vnzx6lpqaqZs2amYKXhIQE/eEPfzBDjJEjR2r37t3auXOngoKCVKNGDZUtW1ZSxmbvO3bs0O7duzVixAhdu3ZNdevW1datW5WQkKB69erJy8tLO3bs0LBhw/Tpp58qKirK3Fj89llAS5cu1dtvv60NGzaYwczvBQQEqHr16oqOjjYf17PPPiubzabKlSsrLCwsy3jMnTs3U0h367pnnnlGJUuWVOnSpdWzZ0/zuqpVq5phY5s2bdS4cWNzplt2Y1WzZk1JGcHl/PnztXfvXm3YsEEWi8Uc75yu+71SpUqZG7gnJSXp3//+t8LDw+Xl5aUxY8Zo/fr1ZqiSU78lS5bUxx9/rLi4OH3//feqXLlypnru9jq6X7dCoS1btqhDhw7y8/PT559/rgoVKpjLWH8vp++1291a2nq3c7dqiIqKUmhoqEJDQ7VhwwZt3rw5Uzh2/fr1PAd/AAA8aAimAABwktatW+vIkSPaunWrpIy9kS5cuODUe/7666/q0KGDxo4dq+effz7TuaNHj5r74ty8eVNfffWV/P39JWXshbNr1y7zXfPWrl2rgICATNdnF8R06dJFiYmJ5jLEtWvXqmHDhnJ3d1fnzp21Z88eHTp0SFLGu9z17ds3U62SdPXqVU2ePFlvvPFGlnPp6ekaO3as/vjHP6pUqVIaMWKEfv31VyUkJCghIUEtW7bU7NmzNWLECIWGhiouLk5xcXHmu8gtW7ZMEydOVFRUVJbA5sCBA+bnx44dU2xsrBm69evXT99++62kjCV70dHRmcbj6NGj2r17t5599tlMffbr10/r16+Xw+FQWlqa1q9fb153+8yZI0eOKC4uzpxt1bt3b/39739Xenq6Lly4oKVLlyo8PFySdP78efN5i42N1apVq8wlmL1799aiRYt05coV3bhxQ/Pmzcs0xrfz9/dXfHy8pIz9x86fP2+O41/+8hd17NjRXHaWU79JSUm6evWqpIzlgJ9++qlef/11SXd/HcXExJivhzlz5ig4ONjcuyunc7cLDQ3V4sWLVb58eZUuXVqhoaGaPHnyHZfx5SQ0NFSRkZEyDENJSUmZ9ucKDQ3VwoULdfPmTaWlpWnOnDnm/lbNmzdXfHy8NmzYoHbt2ik0NFQzZ85U1apVM+2pdfDgwSzfRwAAILO8zwMHAAC5UqFCBX311Vd6/fXXlZycLKvVqnfeeUdPPvlknvvq2rWr9u7dK0ny8fFRvXr1zNk9YWFhmjp1qpo2barJkyfrl19+0ccff6yPP/5YkjRq1CgNHjxYmzZt0ieffGLOGOnQoYMmTZokKWOmy4QJE9S6dWtZrVZVr14907K3+Ph4xcXFae3atZnqKl26tGbNmqWuXbvKMAyVK1dOS5YskZSx8fecOXPUo0cPpaWlydfXVwsWLDCv7dixoxwOh27evKmBAwfq5ZdfNs8NGTJEP//8s27cuKGuXbvq3XffzfOYSVL//v1VtWpVde/e3Ty2ceNGVapUSW+88YZOnDghd3d3ubm5KSIiwlxK9tprr2n48OFq1KiRLBaLnnnmmUyzz+bNm6dnnnnGnOF1S9++fbVnzx75+PjIZrOpXbt2GjVqlCTpzTff1I8//ig3NzfZbDb9v//3/1S/fn1JGTPNdu3apXr16slisei1114zQ6sffvhBr776qtzc3FSmTBktW7bM3Bg+KChI4eHhZtvw8HB169Yt27Ho1auXvvvuu1wFODn1e/z4cfXp00dubm5yc3PTjBkzZLfbJd39ddS6dWuNHTtWR48eVaVKlbRw4cJcnbtd06ZNdfnyZXN21xNPPKGXX3450xK63Jo0aZKGDh0qb29vValSRW3bttWNGzckZSwnPHbsmJo0aWKOyejRoyVlLDlt06aNUlJSVLJkSfn4+Cg1NTVLDd9++6169eqV57oAAHiQWAzDMFxdBAAAAJwrJSVFrVu3VkxMjEqXLu3qcjKZP3++Vq1aZb6TXnHwf//3fwoJCdHu3bvNd20EAABZsZQPAADgAeDp6akZM2Zk2mQfznPs2DHNmjWLUAoAgLtgxhQAAAAAAABcghlTAAAAAAAAcAmCKQAAAAAAALgEwRQAAAAAAABcgmAKAAAAAAAALkEwBQAAAAAAAJcgmAIAAAAAAIBLEEwBAAAAAADAJQimAAAAAAAA4BIEUwAAAAAAAHCJ/w+ZDSRJEk4dwgAAAABJRU5ErkJggg==", "text/plain": [ "
" ] @@ -319,7 +351,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -331,8 +363,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 13.7 s, sys: 731 ms, total: 14.4 s\n", - "Wall time: 16.8 s\n" + "CPU times: user 20.2 s, sys: 443 ms, total: 20.7 s\n", + "Wall time: 23.5 s\n" ] } ], @@ -991,7 +1023,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.8.15" } }, "nbformat": 4, diff --git a/examples/borzoi_example_sqtl_chr9_135548708_G_C.ipynb b/examples/borzoi_example_sqtl_chr9_135548708_G_C.ipynb index 08badbc..c6ef04b 100644 --- a/examples/borzoi_example_sqtl_chr9_135548708_G_C.ipynb +++ b/examples/borzoi_example_sqtl_chr9_135548708_G_C.ipynb @@ -10,9 +10,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-26 19:11:42.111709: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-09-26 18:01:12.492280: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-09-26 18:01:12.493991: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-09-26 18:01:12.495545: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-09-26 18:01:12.508859: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-09-26 19:11:45.658475: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-09-26 18:01:14.676533: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -47,24 +50,21 @@ "id": "a4238bad", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "bash: /home/jlinder/anaconda3/envs/borzoi_py39_2/lib/libtinfo.so.6: no version information available (required by bash)\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "f0 model already exists.\n", - "f1 model already exists.\n", - "f2 model already exists.\n", - "f3 model already exists.\n", - "Annotation already exists.\n", - "Splice sites already exist.\n", - "PolyA sites already exist.\n", + "f3c0 model already exists.\n", + "f3c1 model already exists.\n", + "f3c2 model already exists.\n", + "f3c3 model already exists.\n", + "Gene annotation already exists.\n", + "Gene annotation (no read-through, protein-coding) already exists.\n", + "Gene annotation (protein-coding) already exists.\n", + "TSS annotation already exists.\n", + "Splice site annotation already exist.\n", + "Splice site annotation already exist.\n", + "PolyA site annotation already exist.\n", "Human genome FASTA already exists.\n" ] } @@ -72,40 +72,71 @@ "source": [ "%%bash\n", "\n", - "#Download model weights\n", - "for rep in f0 f1 f2 f3; do\n", - " mkdir -p \"saved_models/$rep/\"\n", - " local_model=\"saved_models/$rep/model0_best.h5\"\n", + "#Download model weights (data fold 3, 4 replicates)\n", + "for rep in f3c0,f0 f3c1,f1 f3c2,f2 f3c3,f3; do IFS=\",\"; set -- $rep; \n", + " mkdir -p \"saved_models/$1/train\"\n", + " local_model=\"saved_models/$1/train/model0_best.h5\"\n", " if [ -f \"$local_model\" ]; then\n", - " echo \"$rep model already exists.\"\n", + " echo \"$1 model already exists.\"\n", " else\n", - " wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$rep/model0_best.h5\" -O \"$local_model\"\n", + " wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$2/model0_best.h5\" -O \"$local_model\"\n", " fi\n", "done\n", "\n", "#Download and uncompress annotation files\n", - "if [ -f gencode41_basic_nort.gtf ]; then\n", - " echo \"Annotation already exists.\"\n", + "mkdir -p hg38/genes/gencode41\n", + "mkdir -p hg38/genes/polyadb\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_nort.gtf ]; then\n", + " echo \"Gene annotation already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_nort.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_nort_protein.gtf ]; then\n", + " echo \"Gene annotation (no read-through, protein-coding) already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort_protein.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_nort_protein.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein.gtf ]; then\n", + " echo \"Gene annotation (protein-coding) already exists.\"\n", "else\n", - " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > gencode41_basic_nort.gtf\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_protein.gtf\n", "fi\n", - "if [ -f gencode41_basic_protein_splice.csv.gz ]; then\n", - " echo \"Splice sites already exist.\"\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_tss2.bed ]; then\n", + " echo \"TSS annotation already exists.\"\n", "else\n", - " wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_tss2.bed.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_tss2.bed\n", "fi\n", - "if [ -f polyadb_human_v3.csv.gz ]; then\n", - " echo \"PolyA sites already exist.\"\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz ]; then\n", + " echo \"Splice site annotation already exist.\"\n", "else\n", - " wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz\n", + " wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz -O hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein_splice.gff ]; then\n", + " echo \"Splice site annotation already exist.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.gff.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_protein_splice.gff\n", + "fi\n", + "\n", + "if [ -f hg38/genes/polyadb/polyadb_human_v3.csv.gz ]; then\n", + " echo \"PolyA site annotation already exist.\"\n", + "else\n", + " wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz -O hg38/genes/polyadb/polyadb_human_v3.csv.gz\n", "fi\n", "\n", "#Download and index hg38 genome\n", - "if [ -f hg38.fa ]; then\n", + "mkdir -p hg38/assembly/ucsc\n", + "\n", + "if [ -f hg38/assembly/ucsc/hg38.fa ]; then\n", " echo \"Human genome FASTA already exists.\"\n", "else\n", - " wget -O - http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38.fa\n", - "fi" + " wget -O - http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38/assembly/ucsc/hg38.fa\n", + "fi\n" ] }, { @@ -117,7 +148,7 @@ { "data": { "text/plain": [ - "Faidx(\"hg38.fa\")" + "Faidx(\"hg38/assembly/ucsc/hg38.fa\")" ] }, "execution_count": 3, @@ -126,7 +157,7 @@ } ], "source": [ - "pyfaidx.Faidx('hg38.fa')" + "pyfaidx.Faidx('hg38/assembly/ucsc/hg38.fa')" ] }, { @@ -139,7 +170,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-26 19:11:59.651232: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10372 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:81:00.0, compute capability: 6.1\n" + "2024-09-25 10:53:57.292179: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10232 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1\n" ] } ], @@ -181,7 +212,7 @@ "models = []\n", "for rep_ix in range(n_reps) :\n", " \n", - " model_file = \"saved_models/f\" + str(rep_ix) + \"/model0_best.h5\"\n", + " model_file = \"saved_models/f3c\" + str(rep_ix) + \"/train/model0_best.h5\"\n", "\n", " seqnn_model = seqnn.SeqNN(params_model)\n", " seqnn_model.restore(model_file, 0)\n", @@ -210,11 +241,11 @@ "source": [ "#Initialize fasta sequence extractor\n", "\n", - "fasta_open = pysam.Fastafile('hg38.fa')\n", + "fasta_open = pysam.Fastafile('hg38/assembly/ucsc/hg38.fa')\n", "\n", "#Load splice site annotation\n", "\n", - "splice_df = pd.read_csv('gencode41_basic_protein_splice.csv.gz', sep='\\t', compression='gzip')\n", + "splice_df = pd.read_csv('hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz', sep='\\t', compression='gzip')\n", "\n", "print(\"len(splice_df) = \" + str(len(splice_df)))\n" ] @@ -253,8 +284,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-09-26 19:13:40.227238: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600\n", - "2023-09-26 19:13:40.745579: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" + "2024-09-25 10:54:21.155675: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", + "2024-09-25 10:54:21.243945: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", + "2024-09-25 10:54:21.567826: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" ] }, { @@ -269,7 +301,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -288,7 +320,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -300,8 +332,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 13.1 s, sys: 716 ms, total: 13.9 s\n", - "Wall time: 16.3 s\n" + "CPU times: user 19.5 s, sys: 437 ms, total: 20 s\n", + "Wall time: 22.8 s\n" ] } ], @@ -912,7 +944,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.8.15" } }, "nbformat": 4, diff --git a/examples/params.json b/examples/params.json new file mode 100644 index 0000000..4fe232c --- /dev/null +++ b/examples/params.json @@ -0,0 +1,87 @@ +{ + "train": { + "batch_size": 1, + "shuffle_buffer": 256, + "optimizer": "adam", + "learning_rate": 0.00006, + "loss": "poisson_mn", + "total_weight": 0.2, + "warmup_steps": 20000, + "global_clipnorm": 0.15, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "patience": 30, + "train_epochs_min": 130, + "train_epochs_max": 180 + }, + "model": { + "seq_length": 524288, + "augment_rc": true, + "augment_shift": 3, + "activation": "gelu", + "norm_type": "batch-sync", + "bn_momentum": 0.9, + "kernel_initializer": "lecun_normal", + "l2_scale": 2.0e-8, + "trunk": [ + { + "name": "conv_dna", + "filters": 512, + "kernel_size": 15, + "norm_type": null, + "activation": "linear", + "pool_size": 2 + }, + { + "name": "res_tower", + "filters_init": 608, + "filters_end": 1536, + "divisible_by": 32, + "kernel_size": 5, + "num_convs": 1, + "pool_size": 2, + "repeat": 6 + }, + { + "name": "transformer_tower", + "key_size": 64, + "heads": 8, + "num_position_features": 32, + "dropout": 0.2, + "mha_l2_scale": 1.0e-8, + "l2_scale": 1.0e-8, + "kernel_initializer": "he_normal", + "repeat": 8 + }, + { + "name": "unet_conv", + "kernel_size": 3, + "upsample_conv": true + }, + { + "name": "unet_conv", + "kernel_size": 3, + "upsample_conv": true + }, + { + "name": "Cropping1D", + "cropping": 5120 + }, + { + "name": "conv_nac", + "filters": 1920, + "dropout": 0.1 + } + ], + "head_human": { + "name": "final", + "units": 7611, + "activation": "softplus" + }, + "head_mouse": { + "name": "final", + "units": 2608, + "activation": "softplus" + } + } +} diff --git a/examples/params_pred.json b/examples/params_pred.json index 4811257..5a432dc 100644 --- a/examples/params_pred.json +++ b/examples/params_pred.json @@ -1,45 +1,45 @@ { "train": { "batch_size": 2, - "shuffle_buffer": 256, + "shuffle_buffer": 256, "optimizer": "adam", - "learning_rate": 0.00006, - "loss": "poisson_mn", - "total_weight": 0.2, + "learning_rate": 0.00006, + "loss": "poisson_mn", + "total_weight": 0.2, "warmup_steps": 20000, "global_clipnorm": 0.15, "adam_beta1": 0.9, "adam_beta2": 0.999, "patience": 30, - "train_epochs_min": 130, - "train_epochs_max": 180 + "train_epochs_min": 130, + "train_epochs_max": 180 }, "model": { - "verbose": false, + "verbose": false, "seq_length": 524288, "augment_rc": true, "augment_shift": 3, "activation": "gelu", "norm_type": "batch-sync", "bn_momentum": 0.9, - "kernel_initializer": "lecun_normal", - "l2_scale": 2.0e-8, + "kernel_initializer": "lecun_normal", + "l2_scale": 2.0e-8, "trunk": [ { "name": "conv_dna", "filters": 512, "kernel_size": 15, - "norm_type": null, - "activation": "linear", + "norm_type": null, + "activation": "linear", "pool_size": 2 }, { "name": "res_tower", "filters_init": 608, - "filters_end": 1536, - "divisible_by": 32, + "filters_end": 1536, + "divisible_by": 32, "kernel_size": 5, - "num_convs": 1, + "num_convs": 1, "pool_size": 2, "repeat": 6 }, @@ -49,21 +49,21 @@ "heads": 8, "num_position_features": 32, "dropout": 0.2, - "mha_l2_scale": 1.0e-8, - "l2_scale": 1.0e-8, - "kernel_initializer": "he_normal", - "repeat": 8 + "mha_l2_scale": 1.0e-8, + "l2_scale": 1.0e-8, + "kernel_initializer": "he_normal", + "repeat": 8 + }, + { + "name": "unet_conv", + "kernel_size": 3, + "upsample_conv": true + }, + { + "name": "unet_conv", + "kernel_size": 3, + "upsample_conv": true }, - { - "name": "unet_conv", - "kernel_size": 3, - "upsample_conv": true - }, - { - "name": "unet_conv", - "kernel_size": 3, - "upsample_conv": true - }, { "name": "Cropping1D", "cropping": 16 @@ -74,7 +74,7 @@ "dropout": 0.1 } ], - "head_human": { + "head_human": { "name": "final", "units": 7611, "activation": "softplus" diff --git a/examples/targets_gtex_liver.txt b/examples/targets_gtex_liver.txt new file mode 100644 index 0000000..4740108 --- /dev/null +++ b/examples/targets_gtex_liver.txt @@ -0,0 +1,4 @@ + identifier file clip clip_soft scale sum_stat strand_pair description +7563 GTEX-11EQ9-0526-SM-5A5JZ.1 /home/drk/tillage/datasets/human/rna/recount3/liver/GTEX-11EQ9-0526-SM-5A5JZ.1/coverage.w5 768 384 0.01 sum_sqrt 7563 RNA:liver +7564 GTEX-1QP66-0226-SM-DPRXS.1 /home/drk/tillage/datasets/human/rna/recount3/liver/GTEX-1QP66-0226-SM-DPRXS.1/coverage.w5 768 384 0.01 sum_sqrt 7564 RNA:liver +7565 GTEX-ZYT6-0626-SM-5E45V.1 /home/drk/tillage/datasets/human/rna/recount3/liver/GTEX-ZYT6-0626-SM-5E45V.1/coverage.w5 768 384 0.01 sum_sqrt 7565 RNA:liver diff --git a/examples/targets_mouse.txt b/examples/targets_mouse.txt new file mode 100644 index 0000000..affb228 --- /dev/null +++ b/examples/targets_mouse.txt @@ -0,0 +1,2609 @@ + identifier file clip clip_soft scale sum_stat strand_pair description +0 CNhs10464+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10464/summary/coverage+.w5 768 384 1.0 sum 1 CAGE:placenta, adult pregnant day17 +1 CNhs10464- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10464/summary/coverage-.w5 768 384 1.0 sum 0 CAGE:placenta, adult pregnant day17 +2 CNhs10465+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10465/summary/coverage+.w5 768 384 1.0 sum 3 CAGE:spleen, adult +3 CNhs10465- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10465/summary/coverage-.w5 768 384 1.0 sum 2 CAGE:spleen, adult +4 CNhs10466+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10466/summary/coverage+.w5 768 384 1.0 sum 5 CAGE:liver, adult pregnant day01 +5 CNhs10466- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10466/summary/coverage-.w5 768 384 1.0 sum 4 CAGE:liver, adult pregnant day01 +6 CNhs10467+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10467/summary/coverage+.w5 768 384 1.0 sum 7 CAGE:cecum, adult +7 CNhs10467- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10467/summary/coverage-.w5 768 384 1.0 sum 6 CAGE:cecum, adult +8 CNhs10468+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10468/summary/coverage+.w5 768 384 1.0 sum 9 CAGE:colon, adult +9 CNhs10468- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10468/summary/coverage-.w5 768 384 1.0 sum 8 CAGE:colon, adult +10 CNhs10469+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10469/summary/coverage+.w5 768 384 1.0 sum 11 CAGE:submandibular gland, adult +11 CNhs10469- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10469/summary/coverage-.w5 768 384 1.0 sum 10 CAGE:submandibular gland, adult +12 CNhs10470+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10470/summary/coverage+.w5 768 384 1.0 sum 13 CAGE:prostate, adult +13 CNhs10470- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10470/summary/coverage-.w5 768 384 1.0 sum 12 CAGE:prostate, adult +14 CNhs10471+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10471/summary/coverage+.w5 768 384 1.0 sum 15 CAGE:thymus, adult +15 CNhs10471- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10471/summary/coverage-.w5 768 384 1.0 sum 14 CAGE:thymus, adult +16 CNhs10472+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10472/summary/coverage+.w5 768 384 1.0 sum 17 CAGE:placenta, adult pregnant day10 +17 CNhs10472- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10472/summary/coverage-.w5 768 384 1.0 sum 16 CAGE:placenta, adult pregnant day10 +18 CNhs10473+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10473/summary/coverage+.w5 768 384 1.0 sum 19 CAGE:cortex, adult +19 CNhs10473- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10473/summary/coverage-.w5 768 384 1.0 sum 18 CAGE:cortex, adult +20 CNhs10474+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10474/summary/coverage+.w5 768 384 1.0 sum 21 CAGE:lung, adult +21 CNhs10474- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10474/summary/coverage-.w5 768 384 1.0 sum 20 CAGE:lung, adult +22 CNhs10475+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10475/summary/coverage+.w5 768 384 1.0 sum 23 CAGE:accessory axillary lymph node, adult +23 CNhs10475- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10475/summary/coverage-.w5 768 384 1.0 sum 22 CAGE:accessory axillary lymph node, adult +24 CNhs10476+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10476/summary/coverage+.w5 768 384 1.0 sum 25 CAGE:mammary gland, adult pregnant day19 +25 CNhs10476- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10476/summary/coverage-.w5 768 384 1.0 sum 24 CAGE:mammary gland, adult pregnant day19 +26 CNhs10477+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10477/summary/coverage+.w5 768 384 1.0 sum 27 CAGE:medulla oblongata, adult +27 CNhs10477- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10477/summary/coverage-.w5 768 384 1.0 sum 26 CAGE:medulla oblongata, adult +28 CNhs10478+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10478/summary/coverage+.w5 768 384 1.0 sum 29 CAGE:hippocampus, adult +29 CNhs10478- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10478/summary/coverage-.w5 768 384 1.0 sum 28 CAGE:hippocampus, adult +30 CNhs10480+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10480/summary/coverage+.w5 768 384 1.0 sum 31 CAGE:mammary gland, adult lactating day02 +31 CNhs10480- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10480/summary/coverage-.w5 768 384 1.0 sum 30 CAGE:mammary gland, adult lactating day02 +32 CNhs10481+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10481/summary/coverage+.w5 768 384 1.0 sum 33 CAGE:urinary bladder, adult +33 CNhs10481- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10481/summary/coverage-.w5 768 384 1.0 sum 32 CAGE:urinary bladder, adult +34 CNhs10482+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10482/summary/coverage+.w5 768 384 1.0 sum 35 CAGE:diencephalon, adult +35 CNhs10482- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10482/summary/coverage-.w5 768 384 1.0 sum 34 CAGE:diencephalon, adult +36 CNhs10483+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10483/summary/coverage+.w5 768 384 1.0 sum 37 CAGE:bone (os femoris), adult +37 CNhs10483- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10483/summary/coverage-.w5 768 384 1.0 sum 36 CAGE:bone (os femoris), adult +38 CNhs10484+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10484/summary/coverage+.w5 768 384 1.0 sum 39 CAGE:eyeball, adult +39 CNhs10484- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10484/summary/coverage-.w5 768 384 1.0 sum 38 CAGE:eyeball, adult +40 CNhs10486+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10486/summary/coverage+.w5 768 384 1.0 sum 41 CAGE:pancreas, adult +41 CNhs10486- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10486/summary/coverage-.w5 768 384 1.0 sum 40 CAGE:pancreas, adult +42 CNhs10487+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10487/summary/coverage+.w5 768 384 1.0 sum 43 CAGE:corpus striatum, adult +43 CNhs10487- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10487/summary/coverage-.w5 768 384 1.0 sum 42 CAGE:corpus striatum, adult +44 CNhs10488+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10488/summary/coverage+.w5 768 384 1.0 sum 45 CAGE:amnion, adult pregnant day17.5 +45 CNhs10488- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10488/summary/coverage-.w5 768 384 1.0 sum 44 CAGE:amnion, adult pregnant day17.5 +46 CNhs10489+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10489/summary/coverage+.w5 768 384 1.0 sum 47 CAGE:olfactory brain, adult +47 CNhs10489- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10489/summary/coverage-.w5 768 384 1.0 sum 46 CAGE:olfactory brain, adult +48 CNhs10490+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10490/summary/coverage+.w5 768 384 1.0 sum 49 CAGE:epididymis, adult +49 CNhs10490- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10490/summary/coverage-.w5 768 384 1.0 sum 48 CAGE:epididymis, adult +50 CNhs10491+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10491/summary/coverage+.w5 768 384 1.0 sum 51 CAGE:vesicular gland, adult +51 CNhs10491- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10491/summary/coverage-.w5 768 384 1.0 sum 50 CAGE:vesicular gland, adult +52 CNhs10492+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10492/summary/coverage+.w5 768 384 1.0 sum 53 CAGE:skin, adult +53 CNhs10492- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10492/summary/coverage-.w5 768 384 1.0 sum 52 CAGE:skin, adult +54 CNhs10493+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10493/summary/coverage+.w5 768 384 1.0 sum 55 CAGE:pituitary gland, adult +55 CNhs10493- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10493/summary/coverage-.w5 768 384 1.0 sum 54 CAGE:pituitary gland, adult +56 CNhs10494+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10494/summary/coverage+.w5 768 384 1.0 sum 57 CAGE:cerebellum, adult +57 CNhs10494- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10494/summary/coverage-.w5 768 384 1.0 sum 56 CAGE:cerebellum, adult +58 CNhs10496+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10496/summary/coverage+.w5 768 384 1.0 sum 59 CAGE:intestine, adult +59 CNhs10496- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10496/summary/coverage-.w5 768 384 1.0 sum 58 CAGE:intestine, adult +60 CNhs10497+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10497/summary/coverage+.w5 768 384 1.0 sum 61 CAGE:uterus, adult pregnant day19 +61 CNhs10497- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10497/summary/coverage-.w5 768 384 1.0 sum 60 CAGE:uterus, adult pregnant day19 +62 CNhs10498+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10498/summary/coverage+.w5 768 384 1.0 sum 63 CAGE:aorta, adult +63 CNhs10498- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10498/summary/coverage-.w5 768 384 1.0 sum 62 CAGE:aorta, adult +64 CNhs10499+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10499/summary/coverage+.w5 768 384 1.0 sum 65 CAGE:tongue, adult +65 CNhs10499- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10499/summary/coverage-.w5 768 384 1.0 sum 64 CAGE:tongue, adult +66 CNhs10500+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10500/summary/coverage+.w5 768 384 1.0 sum 67 CAGE:oviduct, adult pregnant day01 +67 CNhs10500- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10500/summary/coverage-.w5 768 384 1.0 sum 66 CAGE:oviduct, adult pregnant day01 +68 CNhs10501+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10501/summary/coverage+.w5 768 384 1.0 sum 69 CAGE:corpora quadrigemina, adult +69 CNhs10501- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10501/summary/coverage-.w5 768 384 1.0 sum 68 CAGE:corpora quadrigemina, adult +70 CNhs10502+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10502/summary/coverage+.w5 768 384 1.0 sum 71 CAGE:vagina, adult +71 CNhs10502- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10502/summary/coverage-.w5 768 384 1.0 sum 70 CAGE:vagina, adult +72 CNhs10503+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10503/summary/coverage+.w5 768 384 1.0 sum 73 CAGE:stomach, adult +73 CNhs10503- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10503/summary/coverage-.w5 768 384 1.0 sum 72 CAGE:stomach, adult +74 CNhs10504+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10504/summary/coverage+.w5 768 384 1.0 sum 75 CAGE:testis, adult +75 CNhs10504- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10504/summary/coverage-.w5 768 384 1.0 sum 74 CAGE:testis, adult +76 CNhs10505+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10505/summary/coverage+.w5 768 384 1.0 sum 77 CAGE:spinal cord, adult +77 CNhs10505- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10505/summary/coverage-.w5 768 384 1.0 sum 76 CAGE:spinal cord, adult +78 CNhs10506+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10506/summary/coverage+.w5 768 384 1.0 sum 79 CAGE:intestinal mucosa, adult +79 CNhs10506- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10506/summary/coverage-.w5 768 384 1.0 sum 78 CAGE:intestinal mucosa, adult +80 CNhs10507+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10507/summary/coverage+.w5 768 384 1.0 sum 81 CAGE:ovary, adult +81 CNhs10507- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10507/summary/coverage-.w5 768 384 1.0 sum 80 CAGE:ovary, adult +82 CNhs10508+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10508/summary/coverage+.w5 768 384 1.0 sum 83 CAGE:adrenal gland, adult +83 CNhs10508- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10508/summary/coverage-.w5 768 384 1.0 sum 82 CAGE:adrenal gland, adult +84 CNhs10509+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10509/summary/coverage+.w5 768 384 1.0 sum 85 CAGE:uterus, adult +85 CNhs10509- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10509/summary/coverage-.w5 768 384 1.0 sum 84 CAGE:uterus, adult +86 CNhs10510+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10510/summary/coverage+.w5 768 384 1.0 sum 87 CAGE:liver, embryo E17 +87 CNhs10510- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10510/summary/coverage-.w5 768 384 1.0 sum 86 CAGE:liver, embryo E17 +88 CNhs10512+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10512/summary/coverage+.w5 768 384 1.0 sum 89 CAGE:whole body, embryo E13 +89 CNhs10512- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10512/summary/coverage-.w5 768 384 1.0 sum 88 CAGE:whole body, embryo E13 +90 CNhs10513+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10513/summary/coverage+.w5 768 384 1.0 sum 91 CAGE:whole body, embryo E17.5 +91 CNhs10513- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10513/summary/coverage-.w5 768 384 1.0 sum 90 CAGE:whole body, embryo E17.5 +92 CNhs10514+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10514/summary/coverage+.w5 768 384 1.0 sum 93 CAGE:whole body, embryo E16 +93 CNhs10514- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10514/summary/coverage-.w5 768 384 1.0 sum 92 CAGE:whole body, embryo E16 +94 CNhs10515+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10515/summary/coverage+.w5 768 384 1.0 sum 95 CAGE:whole body, neonate N06 +95 CNhs10515- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10515/summary/coverage-.w5 768 384 1.0 sum 94 CAGE:whole body, neonate N06 +96 CNhs10516+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10516/summary/coverage+.w5 768 384 1.0 sum 97 CAGE:whole body, embryo E18 +97 CNhs10516- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10516/summary/coverage-.w5 768 384 1.0 sum 96 CAGE:whole body, embryo E18 +98 CNhs10517+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10517/summary/coverage+.w5 768 384 1.0 sum 99 CAGE:whole body, embryo E17 +99 CNhs10517- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10517/summary/coverage-.w5 768 384 1.0 sum 98 CAGE:whole body, embryo E17 +100 CNhs10518+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10518/summary/coverage+.w5 768 384 1.0 sum 101 CAGE:whole body, neonate N10 +101 CNhs10518- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10518/summary/coverage-.w5 768 384 1.0 sum 100 CAGE:whole body, neonate N10 +102 CNhs10519+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10519/summary/coverage+.w5 768 384 1.0 sum 103 CAGE:whole body, embryo E14 +103 CNhs10519- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10519/summary/coverage-.w5 768 384 1.0 sum 102 CAGE:whole body, embryo E14 +104 CNhs10520+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10520/summary/coverage+.w5 768 384 1.0 sum 105 CAGE:liver, embryo E15 +105 CNhs10520- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10520/summary/coverage-.w5 768 384 1.0 sum 104 CAGE:liver, embryo E15 +106 CNhs10521+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10521/summary/coverage+.w5 768 384 1.0 sum 107 CAGE:eyeball, embryo E14 +107 CNhs10521- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10521/summary/coverage-.w5 768 384 1.0 sum 106 CAGE:eyeball, embryo E14 +108 CNhs10522+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10522/summary/coverage+.w5 768 384 1.0 sum 109 CAGE:lung, embryo E12 +109 CNhs10522- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10522/summary/coverage-.w5 768 384 1.0 sum 108 CAGE:lung, embryo E12 +110 CNhs10523+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10523/summary/coverage+.w5 768 384 1.0 sum 111 CAGE:liver, embryo E16 +111 CNhs10523- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10523/summary/coverage-.w5 768 384 1.0 sum 110 CAGE:liver, embryo E16 +112 CNhs10524+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10524/summary/coverage+.w5 768 384 1.0 sum 113 CAGE:liver, embryo E13 +113 CNhs10524- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10524/summary/coverage-.w5 768 384 1.0 sum 112 CAGE:liver, embryo E13 +114 CNhs10525+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10525/summary/coverage+.w5 768 384 1.0 sum 115 CAGE:whole body, neonate N00 +115 CNhs10525- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10525/summary/coverage-.w5 768 384 1.0 sum 114 CAGE:whole body, neonate N00 +116 CNhs10526+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10526/summary/coverage+.w5 768 384 1.0 sum 117 CAGE:intestine, embryo E18 +117 CNhs10526- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10526/summary/coverage-.w5 768 384 1.0 sum 116 CAGE:intestine, embryo E18 +118 CNhs10576+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10576/summary/coverage+.w5 768 384 1.0 sum 119 CAGE:whole body, neonate N01 +119 CNhs10576- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10576/summary/coverage-.w5 768 384 1.0 sum 118 CAGE:whole body, neonate N01 +120 CNhs10577+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10577/summary/coverage+.w5 768 384 1.0 sum 121 CAGE:forelimb, embryo E14 +121 CNhs10577- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10577/summary/coverage-.w5 768 384 1.0 sum 120 CAGE:forelimb, embryo E14 +122 CNhs10578+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10578/summary/coverage+.w5 768 384 1.0 sum 123 CAGE:whole body, embryo E14.5 +123 CNhs10578- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10578/summary/coverage-.w5 768 384 1.0 sum 122 CAGE:whole body, embryo E14.5 +124 CNhs10579+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10579/summary/coverage+.w5 768 384 1.0 sum 125 CAGE:liver, embryo E18 +125 CNhs10579- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10579/summary/coverage-.w5 768 384 1.0 sum 124 CAGE:liver, embryo E18 +126 CNhs10580+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10580/summary/coverage+.w5 768 384 1.0 sum 127 CAGE:pancreas, embryo E18 +127 CNhs10580- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10580/summary/coverage-.w5 768 384 1.0 sum 126 CAGE:pancreas, embryo E18 +128 CNhs10581+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10581/summary/coverage+.w5 768 384 1.0 sum 129 CAGE:thymus, embryo E17 +129 CNhs10581- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10581/summary/coverage-.w5 768 384 1.0 sum 128 CAGE:thymus, embryo E17 +130 CNhs10582+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10582/summary/coverage+.w5 768 384 1.0 sum 131 CAGE:intestine, embryo E17 +131 CNhs10582- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10582/summary/coverage-.w5 768 384 1.0 sum 130 CAGE:intestine, embryo E17 +132 CNhs10583+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10583/summary/coverage+.w5 768 384 1.0 sum 133 CAGE:lung, embryo E18 +133 CNhs10583- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10583/summary/coverage-.w5 768 384 1.0 sum 132 CAGE:lung, embryo E18 +134 CNhs10584+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10584/summary/coverage+.w5 768 384 1.0 sum 135 CAGE:kidney, embryo E16 +135 CNhs10584- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10584/summary/coverage-.w5 768 384 1.0 sum 134 CAGE:kidney, embryo E16 +136 CNhs10585+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10585/summary/coverage+.w5 768 384 1.0 sum 137 CAGE:intestine, embryo E16 +137 CNhs10585- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10585/summary/coverage-.w5 768 384 1.0 sum 136 CAGE:intestine, embryo E16 +138 CNhs10586+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10586/summary/coverage+.w5 768 384 1.0 sum 139 CAGE:heart, embryo E11 +139 CNhs10586- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10586/summary/coverage-.w5 768 384 1.0 sum 138 CAGE:heart, embryo E11 +140 CNhs10587+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10587/summary/coverage+.w5 768 384 1.0 sum 141 CAGE:whole body, embryo E12 +141 CNhs10587- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10587/summary/coverage-.w5 768 384 1.0 sum 140 CAGE:whole body, embryo E12 +142 CNhs10588+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10588/summary/coverage+.w5 768 384 1.0 sum 143 CAGE:stomach, embryo E12 +143 CNhs10588- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10588/summary/coverage-.w5 768 384 1.0 sum 142 CAGE:stomach, embryo E12 +144 CNhs10589+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10589/summary/coverage+.w5 768 384 1.0 sum 145 CAGE:forelimb, embryo E13 +145 CNhs10589- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10589/summary/coverage-.w5 768 384 1.0 sum 144 CAGE:forelimb, embryo E13 +146 CNhs10592+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10592/summary/coverage+.w5 768 384 1.0 sum 147 CAGE:pituitary gland, embryo E15 +147 CNhs10592- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10592/summary/coverage-.w5 768 384 1.0 sum 146 CAGE:pituitary gland, embryo E15 +148 CNhs10593+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10593/summary/coverage+.w5 768 384 1.0 sum 149 CAGE:eyeball, embryo E15 +149 CNhs10593- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10593/summary/coverage-.w5 768 384 1.0 sum 148 CAGE:eyeball, embryo E15 +150 CNhs10594+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10594/summary/coverage+.w5 768 384 1.0 sum 151 CAGE:liver, embryo E14 +151 CNhs10594- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10594/summary/coverage-.w5 768 384 1.0 sum 150 CAGE:liver, embryo E14 +152 CNhs10595+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10595/summary/coverage+.w5 768 384 1.0 sum 153 CAGE:thymus, embryo E18 +153 CNhs10595- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10595/summary/coverage-.w5 768 384 1.0 sum 152 CAGE:thymus, embryo E18 +154 CNhs10596+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10596/summary/coverage+.w5 768 384 1.0 sum 155 CAGE:forelimb, embryo E11 +155 CNhs10596- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10596/summary/coverage-.w5 768 384 1.0 sum 154 CAGE:forelimb, embryo E11 +156 CNhs10597+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10597/summary/coverage+.w5 768 384 1.0 sum 157 CAGE:heart, embryo E14 +157 CNhs10597- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10597/summary/coverage-.w5 768 384 1.0 sum 156 CAGE:heart, embryo E14 +158 CNhs10598+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10598/summary/coverage+.w5 768 384 1.0 sum 159 CAGE:forelimb, embryo E17 +159 CNhs10598- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10598/summary/coverage-.w5 768 384 1.0 sum 158 CAGE:forelimb, embryo E17 +160 CNhs10599+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10599/summary/coverage+.w5 768 384 1.0 sum 161 CAGE:pancreas, embryo E17 +161 CNhs10599- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10599/summary/coverage-.w5 768 384 1.0 sum 160 CAGE:pancreas, embryo E17 +162 CNhs10600+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10600/summary/coverage+.w5 768 384 1.0 sum 163 CAGE:forelimb, embryo E12 +163 CNhs10600- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10600/summary/coverage-.w5 768 384 1.0 sum 162 CAGE:forelimb, embryo E12 +164 CNhs10601+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10601/summary/coverage+.w5 768 384 1.0 sum 165 CAGE:liver, embryo E12 +165 CNhs10601- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10601/summary/coverage-.w5 768 384 1.0 sum 164 CAGE:liver, embryo E12 +166 CNhs10602+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10602/summary/coverage+.w5 768 384 1.0 sum 167 CAGE:intestine, embryo E15 +167 CNhs10602- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10602/summary/coverage-.w5 768 384 1.0 sum 166 CAGE:intestine, embryo E15 +168 CNhs10603+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10603/summary/coverage+.w5 768 384 1.0 sum 169 CAGE:stomach, embryo E15 +169 CNhs10603- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10603/summary/coverage-.w5 768 384 1.0 sum 168 CAGE:stomach, embryo E15 +170 CNhs10604+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10604/summary/coverage+.w5 768 384 1.0 sum 171 CAGE:lung, embryo E14 +171 CNhs10604- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10604/summary/coverage-.w5 768 384 1.0 sum 170 CAGE:lung, embryo E14 +172 CNhs10605+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10605/summary/coverage+.w5 768 384 1.0 sum 173 CAGE:lung, embryo E17 +173 CNhs10605- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10605/summary/coverage-.w5 768 384 1.0 sum 172 CAGE:lung, embryo E17 +174 CNhs10606+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10606/summary/coverage+.w5 768 384 1.0 sum 175 CAGE:kidney, embryo E14 +175 CNhs10606- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10606/summary/coverage-.w5 768 384 1.0 sum 174 CAGE:kidney, embryo E14 +176 CNhs10609+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10609/summary/coverage+.w5 768 384 1.0 sum 177 CAGE:Clontech Mouse Universal Reference Total RNA, pool1 +177 CNhs10609- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10609/summary/coverage-.w5 768 384 1.0 sum 176 CAGE:Clontech Mouse Universal Reference Total RNA, pool1 +178 CNhs10611+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10611/summary/coverage+.w5 768 384 1.0 sum 179 CAGE:SABiosciences XpressRef Mouse Universal Total RNA, pool1 +179 CNhs10611- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10611/summary/coverage-.w5 768 384 1.0 sum 178 CAGE:SABiosciences XpressRef Mouse Universal Total RNA, pool1 +180 CNhs10613+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10613/summary/coverage+.w5 768 384 1.0 sum 181 CAGE:Universal RNA - Mouse Normal Tissues Biochain, pool1 +181 CNhs10613- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10613/summary/coverage-.w5 768 384 1.0 sum 180 CAGE:Universal RNA - Mouse Normal Tissues Biochain, pool1 +182 CNhs10997+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10997/summary/coverage+.w5 768 384 1.0 sum 183 CAGE:kidney, embryo E15 +183 CNhs10997- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10997/summary/coverage-.w5 768 384 1.0 sum 182 CAGE:kidney, embryo E15 +184 CNhs10998+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10998/summary/coverage+.w5 768 384 1.0 sum 185 CAGE:lung, embryo E16 +185 CNhs10998- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10998/summary/coverage-.w5 768 384 1.0 sum 184 CAGE:lung, embryo E16 +186 CNhs10999+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10999/summary/coverage+.w5 768 384 1.0 sum 187 CAGE:stomach, embryo E18 +187 CNhs10999- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs10999/summary/coverage-.w5 768 384 1.0 sum 186 CAGE:stomach, embryo E18 +188 CNhs11001+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11001/summary/coverage+.w5 768 384 1.0 sum 189 CAGE:kidney, embryo E18 +189 CNhs11001- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11001/summary/coverage-.w5 768 384 1.0 sum 188 CAGE:kidney, embryo E18 +190 CNhs11002+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11002/summary/coverage+.w5 768 384 1.0 sum 191 CAGE:thymus, embryo E16 +191 CNhs11002- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11002/summary/coverage-.w5 768 384 1.0 sum 190 CAGE:thymus, embryo E16 +192 CNhs11003+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11003/summary/coverage+.w5 768 384 1.0 sum 193 CAGE:pancreas, embryo E16 +193 CNhs11003- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11003/summary/coverage-.w5 768 384 1.0 sum 192 CAGE:pancreas, embryo E16 +194 CNhs11004+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11004/summary/coverage+.w5 768 384 1.0 sum 195 CAGE:adrenal gland, embryo E16 +195 CNhs11004- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11004/summary/coverage-.w5 768 384 1.0 sum 194 CAGE:adrenal gland, embryo E16 +196 CNhs11005+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11005/summary/coverage+.w5 768 384 1.0 sum 197 CAGE:thymus, embryo E15 +197 CNhs11005- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11005/summary/coverage-.w5 768 384 1.0 sum 196 CAGE:thymus, embryo E15 +198 CNhs11006+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11006/summary/coverage+.w5 768 384 1.0 sum 199 CAGE:stomach, embryo E17 +199 CNhs11006- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11006/summary/coverage-.w5 768 384 1.0 sum 198 CAGE:stomach, embryo E17 +200 CNhs11007+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11007/summary/coverage+.w5 768 384 1.0 sum 201 CAGE:forelimb, embryo E15 +201 CNhs11007- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11007/summary/coverage-.w5 768 384 1.0 sum 200 CAGE:forelimb, embryo E15 +202 CNhs11008+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11008/summary/coverage+.w5 768 384 1.0 sum 203 CAGE:forelimb, embryo E18 +203 CNhs11008- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11008/summary/coverage-.w5 768 384 1.0 sum 202 CAGE:forelimb, embryo E18 +204 CNhs11009+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11009/summary/coverage+.w5 768 384 1.0 sum 205 CAGE:pituitary gland, embryo E13 +205 CNhs11009- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11009/summary/coverage-.w5 768 384 1.0 sum 204 CAGE:pituitary gland, embryo E13 +206 CNhs11010+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11010/summary/coverage+.w5 768 384 1.0 sum 207 CAGE:intestine, embryo E13 +207 CNhs11010- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11010/summary/coverage-.w5 768 384 1.0 sum 206 CAGE:intestine, embryo E13 +208 CNhs11011+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11011/summary/coverage+.w5 768 384 1.0 sum 209 CAGE:spleen, embryo E18 +209 CNhs11011- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11011/summary/coverage-.w5 768 384 1.0 sum 208 CAGE:spleen, embryo E18 +210 CNhs11012+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11012/summary/coverage+.w5 768 384 1.0 sum 211 CAGE:pancreas, embryo E14 +211 CNhs11012- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11012/summary/coverage-.w5 768 384 1.0 sum 210 CAGE:pancreas, embryo E14 +212 CNhs11013+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11013/summary/coverage+.w5 768 384 1.0 sum 213 CAGE:heart, embryo E13 +213 CNhs11013- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11013/summary/coverage-.w5 768 384 1.0 sum 212 CAGE:heart, embryo E13 +214 CNhs11014+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11014/summary/coverage+.w5 768 384 1.0 sum 215 CAGE:whole body, embryo E11 +215 CNhs11014- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11014/summary/coverage-.w5 768 384 1.0 sum 214 CAGE:whole body, embryo E11 +216 CNhs11015+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11015/summary/coverage+.w5 768 384 1.0 sum 217 CAGE:heart, embryo E12 +217 CNhs11015- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11015/summary/coverage-.w5 768 384 1.0 sum 216 CAGE:heart, embryo E12 +218 CNhs11016+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11016/summary/coverage+.w5 768 384 1.0 sum 219 CAGE:eyeball, embryo E12 +219 CNhs11016- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11016/summary/coverage-.w5 768 384 1.0 sum 218 CAGE:eyeball, embryo E12 +220 CNhs11017+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11017/summary/coverage+.w5 768 384 1.0 sum 221 CAGE:heart, embryo E15 +221 CNhs11017- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11017/summary/coverage-.w5 768 384 1.0 sum 220 CAGE:heart, embryo E15 +222 CNhs11018+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11018/summary/coverage+.w5 768 384 1.0 sum 223 CAGE:pituitary gland, embryo E12 +223 CNhs11018- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11018/summary/coverage-.w5 768 384 1.0 sum 222 CAGE:pituitary gland, embryo E12 +224 CNhs11019+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11019/summary/coverage+.w5 768 384 1.0 sum 225 CAGE:intestine, embryo E12 +225 CNhs11019- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11019/summary/coverage-.w5 768 384 1.0 sum 224 CAGE:intestine, embryo E12 +226 CNhs11020+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11020/summary/coverage+.w5 768 384 1.0 sum 227 CAGE:lung, embryo E15 +227 CNhs11020- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11020/summary/coverage-.w5 768 384 1.0 sum 226 CAGE:lung, embryo E15 +228 CNhs11021+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11021/summary/coverage+.w5 768 384 1.0 sum 229 CAGE:heart, embryo E16 +229 CNhs11021- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11021/summary/coverage-.w5 768 384 1.0 sum 228 CAGE:heart, embryo E16 +230 CNhs11022+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11022/summary/coverage+.w5 768 384 1.0 sum 231 CAGE:stomach, embryo E16 +231 CNhs11022- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11022/summary/coverage-.w5 768 384 1.0 sum 230 CAGE:stomach, embryo E16 +232 CNhs11023+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11023/summary/coverage+.w5 768 384 1.0 sum 233 CAGE:eyeball, embryo E17 +233 CNhs11023- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11023/summary/coverage-.w5 768 384 1.0 sum 232 CAGE:eyeball, embryo E17 +234 CNhs11025+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11025/summary/coverage+.w5 768 384 1.0 sum 235 CAGE:heart, embryo E17 +235 CNhs11025- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11025/summary/coverage-.w5 768 384 1.0 sum 234 CAGE:heart, embryo E17 +236 CNhs11026+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11026/summary/coverage+.w5 768 384 1.0 sum 237 CAGE:adrenal gland, embryo E18 +237 CNhs11026- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11026/summary/coverage-.w5 768 384 1.0 sum 236 CAGE:adrenal gland, embryo E18 +238 CNhs11027+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11027/summary/coverage+.w5 768 384 1.0 sum 239 CAGE:testis, embryo E18 +239 CNhs11027- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11027/summary/coverage-.w5 768 384 1.0 sum 238 CAGE:testis, embryo E18 +240 CNhs11028+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11028/summary/coverage+.w5 768 384 1.0 sum 241 CAGE:kidney, embryo E17 +241 CNhs11028- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11028/summary/coverage-.w5 768 384 1.0 sum 240 CAGE:kidney, embryo E17 +242 CNhs11029+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11029/summary/coverage+.w5 768 384 1.0 sum 243 CAGE:testis, embryo E17 +243 CNhs11029- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11029/summary/coverage-.w5 768 384 1.0 sum 242 CAGE:testis, embryo E17 +244 CNhs11030+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11030/summary/coverage+.w5 768 384 1.0 sum 245 CAGE:heart, embryo E18 +245 CNhs11030- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11030/summary/coverage-.w5 768 384 1.0 sum 244 CAGE:heart, embryo E18 +246 CNhs11031+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11031/summary/coverage+.w5 768 384 1.0 sum 247 CAGE:testis, embryo E13 +247 CNhs11031- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11031/summary/coverage-.w5 768 384 1.0 sum 246 CAGE:testis, embryo E13 +248 CNhs11033+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11033/summary/coverage+.w5 768 384 1.0 sum 249 CAGE:testis, embryo E16 +249 CNhs11033- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11033/summary/coverage-.w5 768 384 1.0 sum 248 CAGE:testis, embryo E16 +250 CNhs11034+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11034/summary/coverage+.w5 768 384 1.0 sum 251 CAGE:testis, embryo E15 +251 CNhs11034- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11034/summary/coverage-.w5 768 384 1.0 sum 250 CAGE:testis, embryo E15 +252 CNhs11035+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11035/summary/coverage+.w5 768 384 1.0 sum 253 CAGE:spleen, embryo E16 +253 CNhs11035- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11035/summary/coverage-.w5 768 384 1.0 sum 252 CAGE:spleen, embryo E16 +254 CNhs11036+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11036/summary/coverage+.w5 768 384 1.0 sum 255 CAGE:pituitary gland, embryo E16 +255 CNhs11036- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11036/summary/coverage-.w5 768 384 1.0 sum 254 CAGE:pituitary gland, embryo E16 +256 CNhs11037+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11037/summary/coverage+.w5 768 384 1.0 sum 257 CAGE:pituitary gland, embryo E14 +257 CNhs11037- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11037/summary/coverage-.w5 768 384 1.0 sum 256 CAGE:pituitary gland, embryo E14 +258 CNhs11038+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11038/summary/coverage+.w5 768 384 1.0 sum 259 CAGE:adrenal gland, embryo E14 +259 CNhs11038- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11038/summary/coverage-.w5 768 384 1.0 sum 258 CAGE:adrenal gland, embryo E14 +260 CNhs11039+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11039/summary/coverage+.w5 768 384 1.0 sum 261 CAGE:pituitary gland, embryo E17 +261 CNhs11039- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11039/summary/coverage-.w5 768 384 1.0 sum 260 CAGE:pituitary gland, embryo E17 +262 CNhs11040+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11040/summary/coverage+.w5 768 384 1.0 sum 263 CAGE:ovary, embryo E18 +263 CNhs11040- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11040/summary/coverage-.w5 768 384 1.0 sum 262 CAGE:ovary, embryo E18 +264 CNhs11041+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11041/summary/coverage+.w5 768 384 1.0 sum 265 CAGE:thymus, embryo E14 +265 CNhs11041- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11041/summary/coverage-.w5 768 384 1.0 sum 264 CAGE:thymus, embryo E14 +266 CNhs11042+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11042/summary/coverage+.w5 768 384 1.0 sum 267 CAGE:pancreas, embryo E15 +267 CNhs11042- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11042/summary/coverage-.w5 768 384 1.0 sum 266 CAGE:pancreas, embryo E15 +268 CNhs11043+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11043/summary/coverage+.w5 768 384 1.0 sum 269 CAGE:adrenal gland, embryo E17 +269 CNhs11043- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11043/summary/coverage-.w5 768 384 1.0 sum 268 CAGE:adrenal gland, embryo E17 +270 CNhs11044+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11044/summary/coverage+.w5 768 384 1.0 sum 271 CAGE:gonad, embryo E13 +271 CNhs11044- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11044/summary/coverage-.w5 768 384 1.0 sum 270 CAGE:gonad, embryo E13 +272 CNhs11056+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11055/summary/coverage+.w5 768 384 1.0 sum 273 CAGE:Mouse Aortic Smooth Muscle cells - differentiated, biol_ +273 CNhs11056- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11055/summary/coverage-.w5 768 384 1.0 sum 272 CAGE:Mouse Aortic Smooth Muscle cells - differentiated, biol_ +274 CNhs11093+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11093/summary/coverage+.w5 768 384 1.0 sum 275 CAGE:mouse fibroblast cell line: CRL-1658 NIH/3T3 +275 CNhs11093- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11093/summary/coverage-.w5 768 384 1.0 sum 274 CAGE:mouse fibroblast cell line: CRL-1658 NIH/3T3 +276 CNhs11094+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11094/summary/coverage+.w5 768 384 1.0 sum 277 CAGE:pancreas, neonate N25 +277 CNhs11094- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11094/summary/coverage-.w5 768 384 1.0 sum 276 CAGE:pancreas, neonate N25 +278 CNhs11095+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11095/summary/coverage+.w5 768 384 1.0 sum 279 CAGE:intestine, neonate N07 +279 CNhs11095- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11095/summary/coverage-.w5 768 384 1.0 sum 278 CAGE:intestine, neonate N07 +280 CNhs11097+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11097/summary/coverage+.w5 768 384 1.0 sum 281 CAGE:skin, neonate N06 +281 CNhs11097- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11097/summary/coverage-.w5 768 384 1.0 sum 280 CAGE:skin, neonate N06 +282 CNhs11098+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11098/summary/coverage+.w5 768 384 1.0 sum 283 CAGE:intestine, neonate N10 +283 CNhs11098- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11098/summary/coverage-.w5 768 384 1.0 sum 282 CAGE:intestine, neonate N10 +284 CNhs11099+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11099/summary/coverage+.w5 768 384 1.0 sum 285 CAGE:spleen, neonate N25 +285 CNhs11099- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11099/summary/coverage-.w5 768 384 1.0 sum 284 CAGE:spleen, neonate N25 +286 CNhs11101+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11101/summary/coverage+.w5 768 384 1.0 sum 287 CAGE:liver, neonate N06 +287 CNhs11101- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11101/summary/coverage-.w5 768 384 1.0 sum 286 CAGE:liver, neonate N06 +288 CNhs11102+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11102/summary/coverage+.w5 768 384 1.0 sum 289 CAGE:intestine, neonate N06 +289 CNhs11102- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11102/summary/coverage-.w5 768 384 1.0 sum 288 CAGE:intestine, neonate N06 +290 CNhs11103+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11103/summary/coverage+.w5 768 384 1.0 sum 291 CAGE:liver, neonate N07 +291 CNhs11103- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11103/summary/coverage-.w5 768 384 1.0 sum 290 CAGE:liver, neonate N07 +292 CNhs11104+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11104/summary/coverage+.w5 768 384 1.0 sum 293 CAGE:stomach, neonate N25 +293 CNhs11104- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11104/summary/coverage-.w5 768 384 1.0 sum 292 CAGE:stomach, neonate N25 +294 CNhs11105+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11105/summary/coverage+.w5 768 384 1.0 sum 295 CAGE:pancreas, neonate N00 +295 CNhs11105- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11105/summary/coverage-.w5 768 384 1.0 sum 294 CAGE:pancreas, neonate N00 +296 CNhs11106+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11106/summary/coverage+.w5 768 384 1.0 sum 297 CAGE:liver, neonate N30 +297 CNhs11106- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11106/summary/coverage-.w5 768 384 1.0 sum 296 CAGE:liver, neonate N30 +298 CNhs11107+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11107/summary/coverage+.w5 768 384 1.0 sum 299 CAGE:cortex, neonate N30 +299 CNhs11107- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11107/summary/coverage-.w5 768 384 1.0 sum 298 CAGE:cortex, neonate N30 +300 CNhs11108+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11108/summary/coverage+.w5 768 384 1.0 sum 301 CAGE:skin, neonate N10 +301 CNhs11108- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11108/summary/coverage-.w5 768 384 1.0 sum 300 CAGE:skin, neonate N10 +302 CNhs11109+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11109/summary/coverage+.w5 768 384 1.0 sum 303 CAGE:lung, neonate N20 +303 CNhs11109- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11109/summary/coverage-.w5 768 384 1.0 sum 302 CAGE:lung, neonate N20 +304 CNhs11110+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11110/summary/coverage+.w5 768 384 1.0 sum 305 CAGE:testis, neonate N20 +305 CNhs11110- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11110/summary/coverage-.w5 768 384 1.0 sum 304 CAGE:testis, neonate N20 +306 CNhs11111+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11111/summary/coverage+.w5 768 384 1.0 sum 307 CAGE:lung, neonate N07 +307 CNhs11111- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11111/summary/coverage-.w5 768 384 1.0 sum 306 CAGE:lung, neonate N07 +308 CNhs11112+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11112/summary/coverage+.w5 768 384 1.0 sum 309 CAGE:spleen, neonate N20 +309 CNhs11112- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11112/summary/coverage-.w5 768 384 1.0 sum 308 CAGE:spleen, neonate N20 +310 CNhs11113+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11113/summary/coverage+.w5 768 384 1.0 sum 311 CAGE:kidney, neonate N20 +311 CNhs11113- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11113/summary/coverage-.w5 768 384 1.0 sum 310 CAGE:kidney, neonate N20 +312 CNhs11114+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11114/summary/coverage+.w5 768 384 1.0 sum 313 CAGE:small intestine, neonate N16 +313 CNhs11114- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11114/summary/coverage-.w5 768 384 1.0 sum 312 CAGE:small intestine, neonate N16 +314 CNhs11115+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11115/summary/coverage+.w5 768 384 1.0 sum 315 CAGE:liver, neonate N10 +315 CNhs11115- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11115/summary/coverage-.w5 768 384 1.0 sum 314 CAGE:liver, neonate N10 +316 CNhs11116+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11116/summary/coverage+.w5 768 384 1.0 sum 317 CAGE:spleen, neonate N10 +317 CNhs11116- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11116/summary/coverage-.w5 768 384 1.0 sum 316 CAGE:spleen, neonate N10 +318 CNhs11117+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11117/summary/coverage+.w5 768 384 1.0 sum 319 CAGE:liver, neonate N00 +319 CNhs11117- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11117/summary/coverage-.w5 768 384 1.0 sum 318 CAGE:liver, neonate N00 +320 CNhs11118+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11118/summary/coverage+.w5 768 384 1.0 sum 321 CAGE:heart, neonate N10 +321 CNhs11118- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11118/summary/coverage-.w5 768 384 1.0 sum 320 CAGE:heart, neonate N10 +322 CNhs11119+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11119/summary/coverage+.w5 768 384 1.0 sum 323 CAGE:lung, neonate N25 +323 CNhs11119- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11119/summary/coverage-.w5 768 384 1.0 sum 322 CAGE:lung, neonate N25 +324 CNhs11121+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11121/summary/coverage+.w5 768 384 1.0 sum 325 CAGE:intestine, neonate N25 +325 CNhs11121- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11121/summary/coverage-.w5 768 384 1.0 sum 324 CAGE:intestine, neonate N25 +326 CNhs11122+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11122/summary/coverage+.w5 768 384 1.0 sum 327 CAGE:kidney, neonate N25 +327 CNhs11122- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11122/summary/coverage-.w5 768 384 1.0 sum 326 CAGE:kidney, neonate N25 +328 CNhs11123+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11123/summary/coverage+.w5 768 384 1.0 sum 329 CAGE:liver, neonate N03 +329 CNhs11123- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11123/summary/coverage-.w5 768 384 1.0 sum 328 CAGE:liver, neonate N03 +330 CNhs11124+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11124/summary/coverage+.w5 768 384 1.0 sum 331 CAGE:skin, neonate N00 +331 CNhs11124- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11124/summary/coverage-.w5 768 384 1.0 sum 330 CAGE:skin, neonate N00 +332 CNhs11125+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11125/summary/coverage+.w5 768 384 1.0 sum 333 CAGE:thymus, neonate N25 +333 CNhs11125- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11125/summary/coverage-.w5 768 384 1.0 sum 332 CAGE:thymus, neonate N25 +334 CNhs11126+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11126/summary/coverage+.w5 768 384 1.0 sum 335 CAGE:intestine, neonate N00 +335 CNhs11126- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11126/summary/coverage-.w5 768 384 1.0 sum 334 CAGE:intestine, neonate N00 +336 CNhs11127+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11127/summary/coverage+.w5 768 384 1.0 sum 337 CAGE:heart, neonate N20 +337 CNhs11127- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11127/summary/coverage-.w5 768 384 1.0 sum 336 CAGE:heart, neonate N20 +338 CNhs11129+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11129/summary/coverage+.w5 768 384 1.0 sum 339 CAGE:muscle (biceps femoris), neonate N30 +339 CNhs11129- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11129/summary/coverage-.w5 768 384 1.0 sum 338 CAGE:muscle (biceps femoris), neonate N30 +340 CNhs11130+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11130/summary/coverage+.w5 768 384 1.0 sum 341 CAGE:testis, neonate N30 +341 CNhs11130- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11130/summary/coverage-.w5 768 384 1.0 sum 340 CAGE:testis, neonate N30 +342 CNhs11131+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11131/summary/coverage+.w5 768 384 1.0 sum 343 CAGE:intestine, neonate N30 +343 CNhs11131- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11131/summary/coverage-.w5 768 384 1.0 sum 342 CAGE:intestine, neonate N30 +344 CNhs11132+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11132/summary/coverage+.w5 768 384 1.0 sum 345 CAGE:thymus, neonate N30 +345 CNhs11132- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11132/summary/coverage-.w5 768 384 1.0 sum 344 CAGE:thymus, neonate N30 +346 CNhs11133+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11133/summary/coverage+.w5 768 384 1.0 sum 347 CAGE:lung, neonate N30 +347 CNhs11133- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11133/summary/coverage-.w5 768 384 1.0 sum 346 CAGE:lung, neonate N30 +348 CNhs11134+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11134/summary/coverage+.w5 768 384 1.0 sum 349 CAGE:stomach, neonate N30 +349 CNhs11134- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11134/summary/coverage-.w5 768 384 1.0 sum 348 CAGE:stomach, neonate N30 +350 CNhs11135+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11135/summary/coverage+.w5 768 384 1.0 sum 351 CAGE:cerebellum, neonate N30 +351 CNhs11135- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11135/summary/coverage-.w5 768 384 1.0 sum 350 CAGE:cerebellum, neonate N30 +352 CNhs11136+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11136/summary/coverage+.w5 768 384 1.0 sum 353 CAGE:pancreas, neonate N16 +353 CNhs11136- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11136/summary/coverage-.w5 768 384 1.0 sum 352 CAGE:pancreas, neonate N16 +354 CNhs11137+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11137/summary/coverage+.w5 768 384 1.0 sum 355 CAGE:thymus, neonate N03 +355 CNhs11137- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11137/summary/coverage-.w5 768 384 1.0 sum 354 CAGE:thymus, neonate N03 +356 CNhs11138+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11138/summary/coverage+.w5 768 384 1.0 sum 357 CAGE:pancreas, neonate N01 +357 CNhs11138- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11138/summary/coverage-.w5 768 384 1.0 sum 356 CAGE:pancreas, neonate N01 +358 CNhs11139+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11139/summary/coverage+.w5 768 384 1.0 sum 359 CAGE:pancreas, neonate N02 +359 CNhs11139- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11139/summary/coverage-.w5 768 384 1.0 sum 358 CAGE:pancreas, neonate N02 +360 CNhs11140+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11140/summary/coverage+.w5 768 384 1.0 sum 361 CAGE:eyeball, neonate N01 +361 CNhs11140- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11140/summary/coverage-.w5 768 384 1.0 sum 360 CAGE:eyeball, neonate N01 +362 CNhs11181+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11181/summary/coverage+.w5 768 384 1.0 sum 363 CAGE:thymus, neonate N02 +363 CNhs11181- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11181/summary/coverage-.w5 768 384 1.0 sum 362 CAGE:thymus, neonate N02 +364 CNhs11182+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11182/summary/coverage+.w5 768 384 1.0 sum 365 CAGE:pancreas, neonate N30 +365 CNhs11182- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11182/summary/coverage-.w5 768 384 1.0 sum 364 CAGE:pancreas, neonate N30 +366 CNhs11186+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11186/summary/coverage+.w5 768 384 1.0 sum 367 CAGE:thymus, neonate N20 +367 CNhs11186- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11186/summary/coverage-.w5 768 384 1.0 sum 366 CAGE:thymus, neonate N20 +368 CNhs11187+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11187/summary/coverage+.w5 768 384 1.0 sum 369 CAGE:intestine, neonate N20 +369 CNhs11187- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11187/summary/coverage-.w5 768 384 1.0 sum 368 CAGE:intestine, neonate N20 +370 CNhs11188+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11188/summary/coverage+.w5 768 384 1.0 sum 371 CAGE:eyeball, neonate N16 +371 CNhs11188- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11188/summary/coverage-.w5 768 384 1.0 sum 370 CAGE:eyeball, neonate N16 +372 CNhs11189+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11189/summary/coverage+.w5 768 384 1.0 sum 373 CAGE:testis, neonate N00 +373 CNhs11189- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11189/summary/coverage-.w5 768 384 1.0 sum 372 CAGE:testis, neonate N00 +374 CNhs11190+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11190/summary/coverage+.w5 768 384 1.0 sum 375 CAGE:pituitary gland, neonate N00 +375 CNhs11190- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11190/summary/coverage-.w5 768 384 1.0 sum 374 CAGE:pituitary gland, neonate N00 +376 CNhs11191+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11191/summary/coverage+.w5 768 384 1.0 sum 377 CAGE:adrenal gland, neonate N00 +377 CNhs11191- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11191/summary/coverage-.w5 768 384 1.0 sum 376 CAGE:adrenal gland, neonate N00 +378 CNhs11192+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11192/summary/coverage+.w5 768 384 1.0 sum 379 CAGE:intestine, neonate N01 +379 CNhs11192- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11192/summary/coverage-.w5 768 384 1.0 sum 378 CAGE:intestine, neonate N01 +380 CNhs11193+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11193/summary/coverage+.w5 768 384 1.0 sum 381 CAGE:stomach, neonate N03 +381 CNhs11193- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11193/summary/coverage-.w5 768 384 1.0 sum 380 CAGE:stomach, neonate N03 +382 CNhs11194+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11194/summary/coverage+.w5 768 384 1.0 sum 383 CAGE:thymus, neonate N10 +383 CNhs11194- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11194/summary/coverage-.w5 768 384 1.0 sum 382 CAGE:thymus, neonate N10 +384 CNhs11195+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11195/summary/coverage+.w5 768 384 1.0 sum 385 CAGE:bone (os femoris), neonate N20 +385 CNhs11195- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11195/summary/coverage-.w5 768 384 1.0 sum 384 CAGE:bone (os femoris), neonate N20 +386 CNhs11196+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11196/summary/coverage+.w5 768 384 1.0 sum 387 CAGE:heart, neonate N25 +387 CNhs11196- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11196/summary/coverage-.w5 768 384 1.0 sum 386 CAGE:heart, neonate N25 +388 CNhs11197+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11197/summary/coverage+.w5 768 384 1.0 sum 389 CAGE:thymus, neonate N06 +389 CNhs11197- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11197/summary/coverage-.w5 768 384 1.0 sum 388 CAGE:thymus, neonate N06 +390 CNhs11198+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11198/summary/coverage+.w5 768 384 1.0 sum 391 CAGE:liver, neonate N25 +391 CNhs11198- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11198/summary/coverage-.w5 768 384 1.0 sum 390 CAGE:liver, neonate N25 +392 CNhs11199+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11199/summary/coverage+.w5 768 384 1.0 sum 393 CAGE:epididymis and seminiferous tubule, neonate N30 +393 CNhs11199- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11199/summary/coverage-.w5 768 384 1.0 sum 392 CAGE:epididymis and seminiferous tubule, neonate N30 +394 CNhs11200+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11200/summary/coverage+.w5 768 384 1.0 sum 395 CAGE:medulla oblongata, neonate N30 +395 CNhs11200- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11200/summary/coverage-.w5 768 384 1.0 sum 394 CAGE:medulla oblongata, neonate N30 +396 CNhs11201+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11201/summary/coverage+.w5 768 384 1.0 sum 397 CAGE:diencephalon, neonate N30 +397 CNhs11201- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11201/summary/coverage-.w5 768 384 1.0 sum 396 CAGE:diencephalon, neonate N30 +398 CNhs11202+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11202/summary/coverage+.w5 768 384 1.0 sum 399 CAGE:heart, neonate N30 +399 CNhs11202- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11202/summary/coverage-.w5 768 384 1.0 sum 398 CAGE:heart, neonate N30 +400 CNhs11203+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11203/summary/coverage+.w5 768 384 1.0 sum 401 CAGE:kidney, neonate N30 +401 CNhs11203- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11203/summary/coverage-.w5 768 384 1.0 sum 400 CAGE:kidney, neonate N30 +402 CNhs11204+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11204/summary/coverage+.w5 768 384 1.0 sum 403 CAGE:testis, neonate N10 +403 CNhs11204- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11204/summary/coverage-.w5 768 384 1.0 sum 402 CAGE:testis, neonate N10 +404 CNhs11205+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11205/summary/coverage+.w5 768 384 1.0 sum 405 CAGE:eyeball, neonate N02 +405 CNhs11205- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11205/summary/coverage-.w5 768 384 1.0 sum 404 CAGE:eyeball, neonate N02 +406 CNhs11206+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11206/summary/coverage+.w5 768 384 1.0 sum 407 CAGE:kidney, neonate N10 +407 CNhs11206- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11206/summary/coverage-.w5 768 384 1.0 sum 406 CAGE:kidney, neonate N10 +408 CNhs11207+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11207/summary/coverage+.w5 768 384 1.0 sum 409 CAGE:eyeball, neonate N00 +409 CNhs11207- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11207/summary/coverage-.w5 768 384 1.0 sum 408 CAGE:eyeball, neonate N00 +410 CNhs11209+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11209/summary/coverage+.w5 768 384 1.0 sum 411 CAGE:heart, neonate N16 +411 CNhs11209- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11209/summary/coverage-.w5 768 384 1.0 sum 410 CAGE:heart, neonate N16 +412 CNhs11210+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11210/summary/coverage+.w5 768 384 1.0 sum 413 CAGE:stomach, neonate N07 +413 CNhs11210- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11210/summary/coverage-.w5 768 384 1.0 sum 412 CAGE:stomach, neonate N07 +414 CNhs11211+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11211/summary/coverage+.w5 768 384 1.0 sum 415 CAGE:thymus, neonate N07 +415 CNhs11211- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11211/summary/coverage-.w5 768 384 1.0 sum 414 CAGE:thymus, neonate N07 +416 CNhs11212+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11212/summary/coverage+.w5 768 384 1.0 sum 417 CAGE:lung, neonate N06 +417 CNhs11212- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11212/summary/coverage-.w5 768 384 1.0 sum 416 CAGE:lung, neonate N06 +418 CNhs11213+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11213/summary/coverage+.w5 768 384 1.0 sum 419 CAGE:heart, neonate N00 +419 CNhs11213- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11213/summary/coverage-.w5 768 384 1.0 sum 418 CAGE:heart, neonate N00 +420 CNhs11214+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11214/summary/coverage+.w5 768 384 1.0 sum 421 CAGE:kidney, neonate N00 +421 CNhs11214- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11214/summary/coverage-.w5 768 384 1.0 sum 420 CAGE:kidney, neonate N00 +422 CNhs11215+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11215/summary/coverage+.w5 768 384 1.0 sum 423 CAGE:skin, neonate N03 +423 CNhs11215- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11215/summary/coverage-.w5 768 384 1.0 sum 422 CAGE:skin, neonate N03 +424 CNhs11217+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11217/summary/coverage+.w5 768 384 1.0 sum 425 CAGE:ovary, neonate N00 +425 CNhs11217- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11217/summary/coverage-.w5 768 384 1.0 sum 424 CAGE:ovary, neonate N00 +426 CNhs11218+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11218/summary/coverage+.w5 768 384 1.0 sum 427 CAGE:epididymis and seminiferous tubule, neonate N00 +427 CNhs11218- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11218/summary/coverage-.w5 768 384 1.0 sum 426 CAGE:epididymis and seminiferous tubule, neonate N00 +428 CNhs11219+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11219/summary/coverage+.w5 768 384 1.0 sum 429 CAGE:lung, neonate N10 +429 CNhs11219- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11219/summary/coverage-.w5 768 384 1.0 sum 428 CAGE:lung, neonate N10 +430 CNhs11220+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11220/summary/coverage+.w5 768 384 1.0 sum 431 CAGE:liver, neonate N20 +431 CNhs11220- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11220/summary/coverage-.w5 768 384 1.0 sum 430 CAGE:liver, neonate N20 +432 CNhs11221+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11221/summary/coverage+.w5 768 384 1.0 sum 433 CAGE:heart, neonate N03 +433 CNhs11221- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11221/summary/coverage-.w5 768 384 1.0 sum 432 CAGE:heart, neonate N03 +434 CNhs11222+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11222/summary/coverage+.w5 768 384 1.0 sum 435 CAGE:testis, neonate N07 +435 CNhs11222- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11222/summary/coverage-.w5 768 384 1.0 sum 434 CAGE:testis, neonate N07 +436 CNhs11223+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11223/summary/coverage+.w5 768 384 1.0 sum 437 CAGE:adrenal gland, neonate N25 +437 CNhs11223- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11223/summary/coverage-.w5 768 384 1.0 sum 436 CAGE:adrenal gland, neonate N25 +438 CNhs11224+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11224/summary/coverage+.w5 768 384 1.0 sum 439 CAGE:lung, neonate N00 +439 CNhs11224- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11224/summary/coverage-.w5 768 384 1.0 sum 438 CAGE:lung, neonate N00 +440 CNhs11225+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11225/summary/coverage+.w5 768 384 1.0 sum 441 CAGE:bone (os femoris), neonate N16 +441 CNhs11225- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11225/summary/coverage-.w5 768 384 1.0 sum 440 CAGE:bone (os femoris), neonate N16 +442 CNhs11226+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11226/summary/coverage+.w5 768 384 1.0 sum 443 CAGE:corpus striatum, neonate N00 +443 CNhs11226- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11226/summary/coverage-.w5 768 384 1.0 sum 442 CAGE:corpus striatum, neonate N00 +444 CNhs11227+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11227/summary/coverage+.w5 768 384 1.0 sum 445 CAGE:bone (os femoris), neonate N02 +445 CNhs11227- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11227/summary/coverage-.w5 768 384 1.0 sum 444 CAGE:bone (os femoris), neonate N02 +446 CNhs11228+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11228/summary/coverage+.w5 768 384 1.0 sum 447 CAGE:hippocampus, neonate N00 +447 CNhs11228- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11228/summary/coverage-.w5 768 384 1.0 sum 446 CAGE:hippocampus, neonate N00 +448 CNhs11297+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11297/summary/coverage+.w5 768 384 1.0 sum 449 CAGE:Mouse Aortic Smooth Muscle cells, +449 CNhs11297- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11297/summary/coverage-.w5 768 384 1.0 sum 448 CAGE:Mouse Aortic Smooth Muscle cells, +450 CNhs11915+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11915/summary/coverage+.w5 768 384 1.0 sum 451 CAGE:Astrocytes, +451 CNhs11915- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11915/summary/coverage-.w5 768 384 1.0 sum 450 CAGE:Astrocytes, +452 CNhs11928+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11928/summary/coverage+.w5 768 384 1.0 sum 453 CAGE:granulocyte macrophage progenitor GMP, biol_ +453 CNhs11928- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11928/summary/coverage-.w5 768 384 1.0 sum 452 CAGE:granulocyte macrophage progenitor GMP, biol_ +454 CNhs11929+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11929/summary/coverage+.w5 768 384 1.0 sum 455 CAGE:Inner ear stem cells, 1st generation stem cells, pool1 +455 CNhs11929- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11929/summary/coverage-.w5 768 384 1.0 sum 454 CAGE:Inner ear stem cells, 1st generation stem cells, pool1 +456 CNhs11947+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11947/summary/coverage+.w5 768 384 1.0 sum 457 CAGE:Mouse Neurons - cortical, +457 CNhs11947- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs11947/summary/coverage-.w5 768 384 1.0 sum 456 CAGE:Mouse Neurons - cortical, +458 CNhs12076+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12076/summary/coverage+.w5 768 384 1.0 sum 459 CAGE:Mouse Astrocytes - cerebellar, +459 CNhs12076- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12076/summary/coverage-.w5 768 384 1.0 sum 458 CAGE:Mouse Astrocytes - cerebellar, +460 CNhs12077+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12077/summary/coverage+.w5 768 384 1.0 sum 461 CAGE:Mouse Astrocytes - hippocampus, +461 CNhs12077- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12077/summary/coverage-.w5 768 384 1.0 sum 460 CAGE:Mouse Astrocytes - hippocampus, +462 CNhs12107+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12078/summary/coverage+.w5 768 384 1.0 sum 463 CAGE:Mouse Astrocytes, +463 CNhs12107- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12078/summary/coverage-.w5 768 384 1.0 sum 462 CAGE:Mouse Astrocytes, +464 CNhs12108+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12108/summary/coverage+.w5 768 384 1.0 sum 465 CAGE:Mouse Granule cells, +465 CNhs12108- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12108/summary/coverage-.w5 768 384 1.0 sum 464 CAGE:Mouse Granule cells, +466 CNhs12109+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12109/summary/coverage+.w5 768 384 1.0 sum 467 CAGE:Mouse Meningeal cells, +467 CNhs12109- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12109/summary/coverage-.w5 768 384 1.0 sum 466 CAGE:Mouse Meningeal cells, +468 CNhs12110+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12110/summary/coverage+.w5 768 384 1.0 sum 469 CAGE:Mouse Neurons - hippocampal, +469 CNhs12110- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12110/summary/coverage-.w5 768 384 1.0 sum 468 CAGE:Mouse Neurons - hippocampal, +470 CNhs12134+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12111/summary/coverage+.w5 768 384 1.0 sum 471 CAGE:Mouse Neurons - striatal, +471 CNhs12134- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12111/summary/coverage-.w5 768 384 1.0 sum 470 CAGE:Mouse Neurons - striatal, +472 CNhs12632+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12113/summary/coverage+.w5 768 384 1.0 sum 473 CAGE:Mouse Neurons - ventral spinal cord, +473 CNhs12632- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12113/summary/coverage-.w5 768 384 1.0 sum 472 CAGE:Mouse Neurons - ventral spinal cord, +474 CNhs12507+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12115/summary/coverage+.w5 768 384 1.0 sum 475 CAGE:Mouse Schwann, +475 CNhs12507- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12115/summary/coverage-.w5 768 384 1.0 sum 474 CAGE:Mouse Schwann, +476 CNhs12130+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12130/summary/coverage+.w5 768 384 1.0 sum 477 CAGE:Mouse Embryonic fibroblasts, +477 CNhs12130- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12130/summary/coverage-.w5 768 384 1.0 sum 476 CAGE:Mouse Embryonic fibroblasts, +478 CNhs12189+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12189/summary/coverage+.w5 768 384 1.0 sum 479 CAGE:Inner ear stem cells, differentiation, day03, pool1 +479 CNhs12189- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12189/summary/coverage-.w5 768 384 1.0 sum 478 CAGE:Inner ear stem cells, differentiation, day03, pool1 +480 CNhs12190+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12190/summary/coverage+.w5 768 384 1.0 sum 481 CAGE:Inner ear stem cells, differentiation, day10, pool1 +481 CNhs12190- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12190/summary/coverage-.w5 768 384 1.0 sum 480 CAGE:Inner ear stem cells, differentiation, day10, pool1 +482 CNhs12198+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12198/summary/coverage+.w5 768 384 1.0 sum 483 CAGE:MC1+Gr1+ myeloid-derived suppressor cells cancer, (0127 3LL) +483 CNhs12198- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12198/summary/coverage-.w5 768 384 1.0 sum 482 CAGE:MC1+Gr1+ myeloid-derived suppressor cells cancer, (0127 3LL) +484 CNhs12203+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12203/summary/coverage+.w5 768 384 1.0 sum 485 CAGE:common myeloid progenitor CMP, biol_ +485 CNhs12203- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12203/summary/coverage-.w5 768 384 1.0 sum 484 CAGE:common myeloid progenitor CMP, biol_ +486 CNhs12355+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12353/summary/coverage+.w5 768 384 1.0 sum 487 CAGE:Mouse Cardiac Myocytes, +487 CNhs12355- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12353/summary/coverage-.w5 768 384 1.0 sum 486 CAGE:Mouse Cardiac Myocytes, +488 CNhs12508+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12508/summary/coverage+.w5 768 384 1.0 sum 489 CAGE:natural helper cells, IL2 treated, day15, biol_, tech_rep1 +489 CNhs12508- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12508/summary/coverage-.w5 768 384 1.0 sum 488 CAGE:natural helper cells, IL2 treated, day15, biol_, tech_rep1 +490 CNhs12515+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12515/summary/coverage+.w5 768 384 1.0 sum 491 CAGE:natural helper cells, naive, biol_, tech_rep1 +491 CNhs12515- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12515/summary/coverage-.w5 768 384 1.0 sum 490 CAGE:natural helper cells, naive, biol_, tech_rep1 +492 CNhs12532+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12532/summary/coverage+.w5 768 384 1.0 sum 493 CAGE:Sox2+ Supporting cells - organ of corti, pool1 +493 CNhs12532- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12532/summary/coverage-.w5 768 384 1.0 sum 492 CAGE:Sox2+ Supporting cells - organ of corti, pool1 +494 CNhs12533+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12533/summary/coverage+.w5 768 384 1.0 sum 495 CAGE:Atoh1+ Inner ear hair cells - organ of corti, pool1 +495 CNhs12533- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12533/summary/coverage-.w5 768 384 1.0 sum 494 CAGE:Atoh1+ Inner ear hair cells - organ of corti, pool1 +496 CNhs12534+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12534/summary/coverage+.w5 768 384 1.0 sum 497 CAGE:stem cell (cKit+ Sca1- lineage-) KSL, biol_ +497 CNhs12534- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12534/summary/coverage-.w5 768 384 1.0 sum 496 CAGE:stem cell (cKit+ Sca1- lineage-) KSL, biol_ +498 CNhs12539+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12539/summary/coverage+.w5 768 384 1.0 sum 499 CAGE:MC1+Gr1+ myeloid-derived suppressor cells cancer, (10,11,12 3LL) +499 CNhs12539- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12539/summary/coverage-.w5 768 384 1.0 sum 498 CAGE:MC1+Gr1+ myeloid-derived suppressor cells cancer, (10,11,12 3LL) +500 CNhs12540+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12540/summary/coverage+.w5 768 384 1.0 sum 501 CAGE:MC1+Gr1+ myeloid-derived suppressor cells control, (4,5,6PBS) +501 CNhs12540- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12540/summary/coverage-.w5 768 384 1.0 sum 500 CAGE:MC1+Gr1+ myeloid-derived suppressor cells control, (4,5,6PBS) +502 CNhs12541+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12541/summary/coverage+.w5 768 384 1.0 sum 503 CAGE:MC1+Gr1+ myeloid-derived suppressor cells control, (0127 PBS) +503 CNhs12541- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12541/summary/coverage-.w5 768 384 1.0 sum 502 CAGE:MC1+Gr1+ myeloid-derived suppressor cells control, (0127 PBS) +504 CNhs12542+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12542/summary/coverage+.w5 768 384 1.0 sum 505 CAGE:MC1+Gr1+ myeloid-derived suppressor cells cancer, (0128 3LL) +505 CNhs12542- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12542/summary/coverage-.w5 768 384 1.0 sum 504 CAGE:MC1+Gr1+ myeloid-derived suppressor cells cancer, (0128 3LL) +506 CNhs12543+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12543/summary/coverage+.w5 768 384 1.0 sum 507 CAGE:MC1+Gr1+ myeloid-derived suppressor cells control, (0128 PBS) +507 CNhs12543- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12543/summary/coverage-.w5 768 384 1.0 sum 506 CAGE:MC1+Gr1+ myeloid-derived suppressor cells control, (0128 PBS) +508 CNhs12544+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12544/summary/coverage+.w5 768 384 1.0 sum 509 CAGE:Inner ear stem cells, 4th generation stem cells, pool1 +509 CNhs12544- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12544/summary/coverage-.w5 768 384 1.0 sum 508 CAGE:Inner ear stem cells, 4th generation stem cells, pool1 +510 CNhs12555+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12555/summary/coverage+.w5 768 384 1.0 sum 511 CAGE:Lgr5 positive intestinal stem cells, pool1 +511 CNhs12555- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12555/summary/coverage-.w5 768 384 1.0 sum 510 CAGE:Lgr5 positive intestinal stem cells, pool1 +512 CNhs12556+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12556/summary/coverage+.w5 768 384 1.0 sum 513 CAGE:Lgr5 positive intestinal stem cells, pool2 +513 CNhs12556- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12556/summary/coverage-.w5 768 384 1.0 sum 512 CAGE:Lgr5 positive intestinal stem cells, pool2 +514 CNhs12557+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12557/summary/coverage+.w5 768 384 1.0 sum 515 CAGE:Lgr5 positive intestinal stem cells, pool3 +515 CNhs12557- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12557/summary/coverage-.w5 768 384 1.0 sum 514 CAGE:Lgr5 positive intestinal stem cells, pool3 +516 CNhs14334+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12576/summary/coverage+.w5 768 384 1.0 sum 517 CAGE:natural helper cells, IL2 treated, day15, biol_, tech_ +517 CNhs14334- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12576/summary/coverage-.w5 768 384 1.0 sum 516 CAGE:natural helper cells, IL2 treated, day15, biol_, tech_ +518 CNhs12578+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12578/summary/coverage+.w5 768 384 1.0 sum 519 CAGE:natural helper cells, IL33 treated, day02, biol_, tech_rep1 +519 CNhs12578- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12578/summary/coverage-.w5 768 384 1.0 sum 518 CAGE:natural helper cells, IL33 treated, day02, biol_, tech_rep1 +520 CNhs12579+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12579/summary/coverage+.w5 768 384 1.0 sum 521 CAGE:natural helper cells, IL33 treated, 01hr, biol_, tech_ +521 CNhs12579- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12579/summary/coverage-.w5 768 384 1.0 sum 520 CAGE:natural helper cells, IL33 treated, 01hr, biol_, tech_ +522 CNhs12821+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12612/summary/coverage+.w5 768 384 1.0 sum 523 CAGE:Mouse Neurons - substantia nigra, , tech_ +523 CNhs12821- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12612/summary/coverage-.w5 768 384 1.0 sum 522 CAGE:Mouse Neurons - substantia nigra, , tech_ +524 CNhs12643+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12614/summary/coverage+.w5 768 384 1.0 sum 525 CAGE:Mouse Neurons - substantia nigra, +525 CNhs12643- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12614/summary/coverage-.w5 768 384 1.0 sum 524 CAGE:Mouse Neurons - substantia nigra, +526 CNhs14554+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12615/summary/coverage+.w5 768 384 1.0 sum 527 CAGE:Mouse hepatocyte, +527 CNhs14554- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12615/summary/coverage-.w5 768 384 1.0 sum 526 CAGE:Mouse hepatocyte, +528 CNhs12628+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12616/summary/coverage+.w5 768 384 1.0 sum 529 CAGE:Mouse Mesenchymal stem cells - bone marrow derived, +529 CNhs12628- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12616/summary/coverage-.w5 768 384 1.0 sum 528 CAGE:Mouse Mesenchymal stem cells - bone marrow derived, +530 CNhs12629+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12617/summary/coverage+.w5 768 384 1.0 sum 531 CAGE:Mouse Microglia, +531 CNhs12629- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12617/summary/coverage-.w5 768 384 1.0 sum 530 CAGE:Mouse Microglia, +532 CNhs12618+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12618/summary/coverage+.w5 768 384 1.0 sum 533 CAGE:Mouse Neurons - dorsal spinal cord, +533 CNhs12618- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12618/summary/coverage-.w5 768 384 1.0 sum 532 CAGE:Mouse Neurons - dorsal spinal cord, +534 CNhs12631+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12619/summary/coverage+.w5 768 384 1.0 sum 535 CAGE:Mouse Neurons - raphe, +535 CNhs12631- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12619/summary/coverage-.w5 768 384 1.0 sum 534 CAGE:Mouse Neurons - raphe, +536 CNhs12818+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12818/summary/coverage+.w5 768 384 1.0 sum 537 CAGE:cerebellum, embryo E17, biol_ (E17R1) +537 CNhs12818- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12818/summary/coverage-.w5 768 384 1.0 sum 536 CAGE:cerebellum, embryo E17, biol_ (E17R1) +538 CNhs12819+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12819/summary/coverage+.w5 768 384 1.0 sum 539 CAGE:cerebellum, neonate N06, biol_ (P6R1) +539 CNhs12819- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12819/summary/coverage-.w5 768 384 1.0 sum 538 CAGE:cerebellum, neonate N06, biol_ (P6R1) +540 CNhs12820+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12820/summary/coverage+.w5 768 384 1.0 sum 541 CAGE:cerebellum, neonate N09, biol_ (P9R1) +541 CNhs12820- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12820/summary/coverage-.w5 768 384 1.0 sum 540 CAGE:cerebellum, neonate N09, biol_ (P9R1) +542 CNhs12923+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12923/summary/coverage+.w5 768 384 1.0 sum 543 CAGE:Neurons - substantia nigra, , tech_ +543 CNhs12923- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12923/summary/coverage-.w5 768 384 1.0 sum 542 CAGE:Neurons - substantia nigra, , tech_ +544 CNhs12956+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12956/summary/coverage+.w5 768 384 1.0 sum 545 CAGE:cerebellum, embryo E11, biol_ (E11R1) +545 CNhs12956- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12956/summary/coverage-.w5 768 384 1.0 sum 544 CAGE:cerebellum, embryo E11, biol_ (E11R1) +546 CNhs12957+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12957/summary/coverage+.w5 768 384 1.0 sum 547 CAGE:cerebellum, embryo E12, biol_ (E12R1) +547 CNhs12957- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12957/summary/coverage-.w5 768 384 1.0 sum 546 CAGE:cerebellum, embryo E12, biol_ (E12R1) +548 CNhs12958+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12958/summary/coverage+.w5 768 384 1.0 sum 549 CAGE:cerebellum, embryo E13, biol_ (E13R1) +549 CNhs12958- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12958/summary/coverage-.w5 768 384 1.0 sum 548 CAGE:cerebellum, embryo E13, biol_ (E13R1) +550 CNhs12960+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12960/summary/coverage+.w5 768 384 1.0 sum 551 CAGE:cerebellum, embryo E14, biol_ (E14R1) +551 CNhs12960- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12960/summary/coverage-.w5 768 384 1.0 sum 550 CAGE:cerebellum, embryo E14, biol_ (E14R1) +552 CNhs12961+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12961/summary/coverage+.w5 768 384 1.0 sum 553 CAGE:cerebellum, embryo E15, biol_ (E15R1) +553 CNhs12961- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12961/summary/coverage-.w5 768 384 1.0 sum 552 CAGE:cerebellum, embryo E15, biol_ (E15R1) +554 CNhs12962+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12962/summary/coverage+.w5 768 384 1.0 sum 555 CAGE:cerebellum, embryo E18, biol_ (E18R1) +555 CNhs12962- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12962/summary/coverage-.w5 768 384 1.0 sum 554 CAGE:cerebellum, embryo E18, biol_ (E18R1) +556 CNhs12963+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12963/summary/coverage+.w5 768 384 1.0 sum 557 CAGE:cerebellum, neonate N00, biol_ (P0R1) +557 CNhs12963- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs12963/summary/coverage-.w5 768 384 1.0 sum 556 CAGE:cerebellum, neonate N00, biol_ (P0R1) +558 CNhs13000+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13000/summary/coverage+.w5 768 384 1.0 sum 559 CAGE:cerebellum, embryo E16, biol_ (E16R1) +559 CNhs13000- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13000/summary/coverage-.w5 768 384 1.0 sum 558 CAGE:cerebellum, embryo E16, biol_ (E16R1) +560 CNhs13001+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13001/summary/coverage+.w5 768 384 1.0 sum 561 CAGE:cerebellum, neonate N03, biol_ (P3R1) +561 CNhs13001- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13001/summary/coverage-.w5 768 384 1.0 sum 560 CAGE:cerebellum, neonate N03, biol_ (P3R1) +562 CNhs13002+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13002/summary/coverage+.w5 768 384 1.0 sum 563 CAGE:cerebellum, embryo E11, biol_ (E11R2) +563 CNhs13002- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13002/summary/coverage-.w5 768 384 1.0 sum 562 CAGE:cerebellum, embryo E11, biol_ (E11R2) +564 CNhs13003+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13003/summary/coverage+.w5 768 384 1.0 sum 565 CAGE:cerebellum, embryo E12, biol_ (E12R2) +565 CNhs13003- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13003/summary/coverage-.w5 768 384 1.0 sum 564 CAGE:cerebellum, embryo E12, biol_ (E12R2) +566 CNhs13004+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13004/summary/coverage+.w5 768 384 1.0 sum 567 CAGE:cerebellum, embryo E13, biol_ (E13R2) +567 CNhs13004- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13004/summary/coverage-.w5 768 384 1.0 sum 566 CAGE:cerebellum, embryo E13, biol_ (E13R2) +568 CNhs13005+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13005/summary/coverage+.w5 768 384 1.0 sum 569 CAGE:cerebellum, embryo E14, biol_ (E14R2) +569 CNhs13005- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13005/summary/coverage-.w5 768 384 1.0 sum 568 CAGE:cerebellum, embryo E14, biol_ (E14R2) +570 CNhs13006+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13006/summary/coverage+.w5 768 384 1.0 sum 571 CAGE:cerebellum, embryo E15, biol_ (E15R2) +571 CNhs13006- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13006/summary/coverage-.w5 768 384 1.0 sum 570 CAGE:cerebellum, embryo E15, biol_ (E15R2) +572 CNhs13007+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13007/summary/coverage+.w5 768 384 1.0 sum 573 CAGE:cerebellum, embryo E16, biol_ (E16R2) +573 CNhs13007- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13007/summary/coverage-.w5 768 384 1.0 sum 572 CAGE:cerebellum, embryo E16, biol_ (E16R2) +574 CNhs13008+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13008/summary/coverage+.w5 768 384 1.0 sum 575 CAGE:cerebellum, embryo E17, biol_ (E17R2) +575 CNhs13008- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13008/summary/coverage-.w5 768 384 1.0 sum 574 CAGE:cerebellum, embryo E17, biol_ (E17R2) +576 CNhs13009+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13009/summary/coverage+.w5 768 384 1.0 sum 577 CAGE:cerebellum, embryo E18, biol_ (E18R2) +577 CNhs13009- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13009/summary/coverage-.w5 768 384 1.0 sum 576 CAGE:cerebellum, embryo E18, biol_ (E18R2) +578 CNhs13010+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13010/summary/coverage+.w5 768 384 1.0 sum 579 CAGE:cerebellum, neonate N00, biol_ (P0R2) +579 CNhs13010- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13010/summary/coverage-.w5 768 384 1.0 sum 578 CAGE:cerebellum, neonate N00, biol_ (P0R2) +580 CNhs13011+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13011/summary/coverage+.w5 768 384 1.0 sum 581 CAGE:cerebellum, neonate N03, biol_ (P3R2) +581 CNhs13011- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13011/summary/coverage-.w5 768 384 1.0 sum 580 CAGE:cerebellum, neonate N03, biol_ (P3R2) +582 CNhs13012+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13012/summary/coverage+.w5 768 384 1.0 sum 583 CAGE:cerebellum, neonate N06, biol_ (P6R2) +583 CNhs13012- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13012/summary/coverage-.w5 768 384 1.0 sum 582 CAGE:cerebellum, neonate N06, biol_ (P6R2) +584 CNhs13013+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13013/summary/coverage+.w5 768 384 1.0 sum 585 CAGE:cerebellum, neonate N09, biol_ (P9R2) +585 CNhs13013- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13013/summary/coverage-.w5 768 384 1.0 sum 584 CAGE:cerebellum, neonate N09, biol_ (P9R2) +586 CNhs13014+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13014/summary/coverage+.w5 768 384 1.0 sum 587 CAGE:cerebellum, embryo E11, biol_ (E11R3) +587 CNhs13014- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13014/summary/coverage-.w5 768 384 1.0 sum 586 CAGE:cerebellum, embryo E11, biol_ (E11R3) +588 CNhs13015+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13015/summary/coverage+.w5 768 384 1.0 sum 589 CAGE:cerebellum, embryo E12, biol_ (E12R3) +589 CNhs13015- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13015/summary/coverage-.w5 768 384 1.0 sum 588 CAGE:cerebellum, embryo E12, biol_ (E12R3) +590 CNhs13016+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13016/summary/coverage+.w5 768 384 1.0 sum 591 CAGE:cerebellum, embryo E13, biol_ (E13R3) +591 CNhs13016- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13016/summary/coverage-.w5 768 384 1.0 sum 590 CAGE:cerebellum, embryo E13, biol_ (E13R3) +592 CNhs13017+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13017/summary/coverage+.w5 768 384 1.0 sum 593 CAGE:cerebellum, embryo E14, biol_ (E14R3) +593 CNhs13017- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13017/summary/coverage-.w5 768 384 1.0 sum 592 CAGE:cerebellum, embryo E14, biol_ (E14R3) +594 CNhs13018+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13018/summary/coverage+.w5 768 384 1.0 sum 595 CAGE:cerebellum, embryo E15, biol_ (E15R3) +595 CNhs13018- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13018/summary/coverage-.w5 768 384 1.0 sum 594 CAGE:cerebellum, embryo E15, biol_ (E15R3) +596 CNhs13019+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13019/summary/coverage+.w5 768 384 1.0 sum 597 CAGE:cerebellum, embryo E16, biol_ (E16R3) +597 CNhs13019- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13019/summary/coverage-.w5 768 384 1.0 sum 596 CAGE:cerebellum, embryo E16, biol_ (E16R3) +598 CNhs13020+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13020/summary/coverage+.w5 768 384 1.0 sum 599 CAGE:cerebellum, embryo E17, biol_ (E17R3) +599 CNhs13020- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13020/summary/coverage-.w5 768 384 1.0 sum 598 CAGE:cerebellum, embryo E17, biol_ (E17R3) +600 CNhs13021+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13021/summary/coverage+.w5 768 384 1.0 sum 601 CAGE:cerebellum, embryo E18, biol_ (E18R3) +601 CNhs13021- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13021/summary/coverage-.w5 768 384 1.0 sum 600 CAGE:cerebellum, embryo E18, biol_ (E18R3) +602 CNhs13022+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13022/summary/coverage+.w5 768 384 1.0 sum 603 CAGE:cerebellum, neonate N00, biol_ (P0R3) +603 CNhs13022- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13022/summary/coverage-.w5 768 384 1.0 sum 602 CAGE:cerebellum, neonate N00, biol_ (P0R3) +604 CNhs13024+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13024/summary/coverage+.w5 768 384 1.0 sum 605 CAGE:cerebellum, neonate N03, biol_ (P3R3) +605 CNhs13024- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13024/summary/coverage-.w5 768 384 1.0 sum 604 CAGE:cerebellum, neonate N03, biol_ (P3R3) +606 CNhs13025+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13025/summary/coverage+.w5 768 384 1.0 sum 607 CAGE:cerebellum, neonate N06, biol_ (P6R3) +607 CNhs13025- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13025/summary/coverage-.w5 768 384 1.0 sum 606 CAGE:cerebellum, neonate N06, biol_ (P6R3) +608 CNhs13026+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13026/summary/coverage+.w5 768 384 1.0 sum 609 CAGE:cerebellum, neonate N09, biol_ (P9R3) +609 CNhs13026- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13026/summary/coverage-.w5 768 384 1.0 sum 608 CAGE:cerebellum, neonate N09, biol_ (P9R3) +610 CNhs13032+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13031/summary/coverage+.w5 768 384 1.0 sum 611 CAGE:visual cortex - wildtype, neonate N15, +611 CNhs13032- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13031/summary/coverage-.w5 768 384 1.0 sum 610 CAGE:visual cortex - wildtype, neonate N15, +612 CNhs13034+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13034/summary/coverage+.w5 768 384 1.0 sum 613 CAGE:visual cortex - wildtype, neonate N30, +613 CNhs13034- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13034/summary/coverage-.w5 768 384 1.0 sum 612 CAGE:visual cortex - wildtype, neonate N30, +614 CNhs13038+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13037/summary/coverage+.w5 768 384 1.0 sum 615 CAGE:visual cortex - wildtype, neonate N60-70, +615 CNhs13038- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13037/summary/coverage-.w5 768 384 1.0 sum 614 CAGE:visual cortex - wildtype, neonate N60-70, +616 CNhs13040+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13040/summary/coverage+.w5 768 384 1.0 sum 617 CAGE:visual cortex - Mecp knockout, neonate N15, +617 CNhs13040- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13040/summary/coverage-.w5 768 384 1.0 sum 616 CAGE:visual cortex - Mecp knockout, neonate N15, +618 CNhs13042+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13042/summary/coverage+.w5 768 384 1.0 sum 619 CAGE:visual cortex - Mecp knockout, neonate N30, +619 CNhs13042- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13042/summary/coverage-.w5 768 384 1.0 sum 618 CAGE:visual cortex - Mecp knockout, neonate N30, +620 CNhs13046+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13045/summary/coverage+.w5 768 384 1.0 sum 621 CAGE:visual cortex - Mecp knockout, neonate N60-70, +621 CNhs13046- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13045/summary/coverage-.w5 768 384 1.0 sum 620 CAGE:visual cortex - Mecp knockout, neonate N60-70, +622 CNhs13079+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13079/summary/coverage+.w5 768 384 1.0 sum 623 CAGE:CD41+ megakaryocyte cancer, +623 CNhs13079- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13079/summary/coverage-.w5 768 384 1.0 sum 622 CAGE:CD41+ megakaryocyte cancer, +624 CNhs13083+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13081/summary/coverage+.w5 768 384 1.0 sum 625 CAGE:neurospheres - sympathetic neuron derived, biol_ +625 CNhs13083- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13081/summary/coverage-.w5 768 384 1.0 sum 624 CAGE:neurospheres - sympathetic neuron derived, biol_ +626 CNhs13086+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13084/summary/coverage+.w5 768 384 1.0 sum 627 CAGE:neurospheres - parasympathetic neuron derived, biol_ +627 CNhs13086- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13084/summary/coverage-.w5 768 384 1.0 sum 626 CAGE:neurospheres - parasympathetic neuron derived, biol_ +628 CNhs13089+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13087/summary/coverage+.w5 768 384 1.0 sum 629 CAGE:neurospheres - enteric neuron derived, biol_ +629 CNhs13089- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13087/summary/coverage-.w5 768 384 1.0 sum 628 CAGE:neurospheres - enteric neuron derived, biol_ +630 CNhs13196+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13196/summary/coverage+.w5 768 384 1.0 sum 631 CAGE:Mouse hepatic Stellate Cells (lipocyte), +631 CNhs13196- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13196/summary/coverage-.w5 768 384 1.0 sum 630 CAGE:Mouse hepatic Stellate Cells (lipocyte), +632 CNhs13197+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13197/summary/coverage+.w5 768 384 1.0 sum 633 CAGE:CD326+ enterocyte, pool2 +633 CNhs13197- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13197/summary/coverage-.w5 768 384 1.0 sum 632 CAGE:CD326+ enterocyte, pool2 +634 CNhs13199+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13199/summary/coverage+.w5 768 384 1.0 sum 635 CAGE:Ileum epithelium, pool1 +635 CNhs13199- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13199/summary/coverage-.w5 768 384 1.0 sum 634 CAGE:Ileum epithelium, pool1 +636 CNhs13200+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13200/summary/coverage+.w5 768 384 1.0 sum 637 CAGE:Follicle Associated Epithelium, pool3 +637 CNhs13200- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13200/summary/coverage-.w5 768 384 1.0 sum 636 CAGE:Follicle Associated Epithelium, pool3 +638 CNhs13214+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13201/summary/coverage+.w5 768 384 1.0 sum 639 CAGE:CD41+ megakaryocyte control, +639 CNhs13214- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13201/summary/coverage-.w5 768 384 1.0 sum 638 CAGE:CD41+ megakaryocyte control, +640 CNhs13209+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13209/summary/coverage+.w5 768 384 1.0 sum 641 CAGE:Mouse hepatic Sinusoidal Endothelial Cells, +641 CNhs13209- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13209/summary/coverage-.w5 768 384 1.0 sum 640 CAGE:Mouse hepatic Sinusoidal Endothelial Cells, +642 CNhs13210+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13210/summary/coverage+.w5 768 384 1.0 sum 643 CAGE:CD326++ enterocyte isolated from mice, treated with RANKL, day03, pool2 +643 CNhs13210- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13210/summary/coverage-.w5 768 384 1.0 sum 642 CAGE:CD326++ enterocyte isolated from mice, treated with RANKL, day03, pool2 +644 CNhs13211+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13211/summary/coverage+.w5 768 384 1.0 sum 645 CAGE:Follicle Associated Epithelium, pool2 +645 CNhs13211- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13211/summary/coverage-.w5 768 384 1.0 sum 644 CAGE:Follicle Associated Epithelium, pool2 +646 CNhs13217+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13217/summary/coverage+.w5 768 384 1.0 sum 647 CAGE:CD4+CD25-CD44- naive conventional T cells, pool1 (C57BL_6J) +647 CNhs13217- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13217/summary/coverage-.w5 768 384 1.0 sum 646 CAGE:CD4+CD25-CD44- naive conventional T cells, pool1 (C57BL_6J) +648 CNhs13218+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13218/summary/coverage+.w5 768 384 1.0 sum 649 CAGE:CD4+CD25-CD44- naive conventional T cells, antiCD3_CD28 stimulation, 06hr, pool1 (C57BL_6J) +649 CNhs13218- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13218/summary/coverage-.w5 768 384 1.0 sum 648 CAGE:CD4+CD25-CD44- naive conventional T cells, antiCD3_CD28 stimulation, 06hr, pool1 (C57BL_6J) +650 CNhs13219+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13219/summary/coverage+.w5 768 384 1.0 sum 651 CAGE:CD4+CD25-CD44- naive conventional T cells, PMA and ionomycin stimulation, 02hr, pool1 (C57BL_6J) +651 CNhs13219- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13219/summary/coverage-.w5 768 384 1.0 sum 650 CAGE:CD4+CD25-CD44- naive conventional T cells, PMA and ionomycin stimulation, 02hr, pool1 (C57BL_6J) +652 CNhs13220+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13220/summary/coverage+.w5 768 384 1.0 sum 653 CAGE:CD4+CD25-CD44- naive conventional T cells, pool2 (Balb_cAJcl) +653 CNhs13220- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13220/summary/coverage-.w5 768 384 1.0 sum 652 CAGE:CD4+CD25-CD44- naive conventional T cells, pool2 (Balb_cAJcl) +654 CNhs13221+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13221/summary/coverage+.w5 768 384 1.0 sum 655 CAGE:CD4+CD25+ regulatory T cells, pool2 (Balb_cAJcl) +655 CNhs13221- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13221/summary/coverage-.w5 768 384 1.0 sum 654 CAGE:CD4+CD25+ regulatory T cells, pool2 (Balb_cAJcl) +656 CNhs13225+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13225/summary/coverage+.w5 768 384 1.0 sum 657 CAGE:CD4+CD25-CD44- naive conventional T cells, antiCD3_CD28 stimulation, 06hr, pool2 (Balb_cAJcl) +657 CNhs13225- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13225/summary/coverage-.w5 768 384 1.0 sum 656 CAGE:CD4+CD25-CD44- naive conventional T cells, antiCD3_CD28 stimulation, 06hr, pool2 (Balb_cAJcl) +658 CNhs13226+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13226/summary/coverage+.w5 768 384 1.0 sum 659 CAGE:CD4+CD25-CD44- naive conventional T cells, PMA and ionomycin stimulation, 02hr, pool2 (Balb_cAJcl) +659 CNhs13226- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13226/summary/coverage-.w5 768 384 1.0 sum 658 CAGE:CD4+CD25-CD44- naive conventional T cells, PMA and ionomycin stimulation, 02hr, pool2 (Balb_cAJcl) +660 CNhs13228+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13228/summary/coverage+.w5 768 384 1.0 sum 661 CAGE:GP2+ M cell, pool1 +661 CNhs13228- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13228/summary/coverage-.w5 768 384 1.0 sum 660 CAGE:GP2+ M cell, pool1 +662 CNhs13231+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13231/summary/coverage+.w5 768 384 1.0 sum 663 CAGE:GP2+ M cell, pool2 +663 CNhs13231- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13231/summary/coverage-.w5 768 384 1.0 sum 662 CAGE:GP2+ M cell, pool2 +664 CNhs13232+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13232/summary/coverage+.w5 768 384 1.0 sum 665 CAGE:Ileum epithelium, pool2 +665 CNhs13232- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13232/summary/coverage-.w5 768 384 1.0 sum 664 CAGE:Ileum epithelium, pool2 +666 CNhs13233+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13233/summary/coverage+.w5 768 384 1.0 sum 667 CAGE:CD326+ enterocyte isolated from mice, treated with RANKL, day03, pool2 +667 CNhs13233- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13233/summary/coverage-.w5 768 384 1.0 sum 666 CAGE:CD326+ enterocyte isolated from mice, treated with RANKL, day03, pool2 +668 CNhs13236+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13236/summary/coverage+.w5 768 384 1.0 sum 669 CAGE:CD326++ enterocyte isolated from mice, treated with RANKL, day03, pool1 +669 CNhs13236- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13236/summary/coverage-.w5 768 384 1.0 sum 668 CAGE:CD326++ enterocyte isolated from mice, treated with RANKL, day03, pool1 +670 CNhs13240+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13240/summary/coverage+.w5 768 384 1.0 sum 671 CAGE:GP2+ M cell, pool3 +671 CNhs13240- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13240/summary/coverage-.w5 768 384 1.0 sum 670 CAGE:GP2+ M cell, pool3 +672 CNhs13242+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13242/summary/coverage+.w5 768 384 1.0 sum 673 CAGE:CD326+ enterocyte isolated from mice, treated with RANKL, day03, pool1 +673 CNhs13242- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13242/summary/coverage-.w5 768 384 1.0 sum 672 CAGE:CD326+ enterocyte isolated from mice, treated with RANKL, day03, pool1 +674 CNhs13509+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13509/summary/coverage+.w5 768 384 1.0 sum 675 CAGE:Mouse CD4+ T Cells, +675 CNhs13509- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13509/summary/coverage-.w5 768 384 1.0 sum 674 CAGE:Mouse CD4+ T Cells, +676 CNhs13511+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13511/summary/coverage+.w5 768 384 1.0 sum 677 CAGE:Mouse CD8+ T Cells, +677 CNhs13511- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13511/summary/coverage-.w5 768 384 1.0 sum 676 CAGE:Mouse CD8+ T Cells, +678 CNhs13531+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13531/summary/coverage+.w5 768 384 1.0 sum 679 CAGE:Mouse CD19+ B Cells, +679 CNhs13531- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13531/summary/coverage-.w5 768 384 1.0 sum 678 CAGE:Mouse CD19+ B Cells, +680 CNhs13542+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13542/summary/coverage+.w5 768 384 1.0 sum 681 CAGE:CD326+ enterocyte, pool1 +681 CNhs13542- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13542/summary/coverage-.w5 768 384 1.0 sum 680 CAGE:CD326+ enterocyte, pool1 +682 CNhs13913+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13913/summary/coverage+.w5 768 384 1.0 sum 683 CAGE:CD4+CD25+ regulatory T cells, pool1 (C57BL_6J) +683 CNhs13913- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs13913/summary/coverage-.w5 768 384 1.0 sum 682 CAGE:CD4+CD25+ regulatory T cells, pool1 (C57BL_6J) +684 CNhs14136+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14136/summary/coverage+.w5 768 384 1.0 sum 685 CAGE:macrophage, bone marrow derived +685 CNhs14136- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14136/summary/coverage-.w5 768 384 1.0 sum 684 CAGE:macrophage, bone marrow derived +686 CNhs14137+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14137/summary/coverage+.w5 768 384 1.0 sum 687 CAGE:osteoclast, bone marrow derived +687 CNhs14137- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14137/summary/coverage-.w5 768 384 1.0 sum 686 CAGE:osteoclast, bone marrow derived +688 CNhs14142+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14142/summary/coverage+.w5 768 384 1.0 sum 689 CAGE:CD4+CD25+ regulatory T cells, PMA and ionomycin stimulation, 02hr, pool1 (C57BL 6J) +689 CNhs14142- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14142/summary/coverage-.w5 768 384 1.0 sum 688 CAGE:CD4+CD25+ regulatory T cells, PMA and ionomycin stimulation, 02hr, pool1 (C57BL 6J) +690 CNhs14147+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14147/summary/coverage+.w5 768 384 1.0 sum 691 CAGE:Neurons - spiral ganglion, pool1 +691 CNhs14147- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14147/summary/coverage-.w5 768 384 1.0 sum 690 CAGE:Neurons - spiral ganglion, pool1 +692 CNhs14158+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14158/summary/coverage+.w5 768 384 1.0 sum 693 CAGE:CD4+CD25+ regulatory T cells, antiCD3 CD28 stimulation, 06hr, pool1 (BalbcA) +693 CNhs14158- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14158/summary/coverage-.w5 768 384 1.0 sum 692 CAGE:CD4+CD25+ regulatory T cells, antiCD3 CD28 stimulation, 06hr, pool1 (BalbcA) +694 CNhs14159+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14159/summary/coverage+.w5 768 384 1.0 sum 695 CAGE:CD4+CD25+ regulatory T cells, antiCD3 CD28 stimulation, 06hr, pool1 (C57BL 6J) +695 CNhs14159- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14159/summary/coverage-.w5 768 384 1.0 sum 694 CAGE:CD4+CD25+ regulatory T cells, antiCD3 CD28 stimulation, 06hr, pool1 (C57BL 6J) +696 CNhs14160+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14160/summary/coverage+.w5 768 384 1.0 sum 697 CAGE:CD4+CD25+ regulatory T cells, PMA and ionomycin stimulation, 02hr, pool1 (BalbcA) +697 CNhs14160- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14160/summary/coverage-.w5 768 384 1.0 sum 696 CAGE:CD4+CD25+ regulatory T cells, PMA and ionomycin stimulation, 02hr, pool1 (BalbcA) +698 CNhs14335+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14330/summary/coverage+.w5 768 384 1.0 sum 699 CAGE:natural helper cells, IL2 treated, day15, biol_, tech_rep2 +699 CNhs14335- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14330/summary/coverage-.w5 768 384 1.0 sum 698 CAGE:natural helper cells, IL2 treated, day15, biol_, tech_rep2 +700 CNhs14336+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14331/summary/coverage+.w5 768 384 1.0 sum 701 CAGE:natural helper cells, IL33 treated, day02, biol_, tech_rep2 +701 CNhs14336- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14331/summary/coverage-.w5 768 384 1.0 sum 700 CAGE:natural helper cells, IL33 treated, day02, biol_, tech_rep2 +702 CNhs14332+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14332/summary/coverage+.w5 768 384 1.0 sum 703 CAGE:natural helper cells, IL33 treated, 01hr, biol_, tech_rep2 +703 CNhs14332- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14332/summary/coverage-.w5 768 384 1.0 sum 702 CAGE:natural helper cells, IL33 treated, 01hr, biol_, tech_rep2 +704 CNhs14333+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14333/summary/coverage+.w5 768 384 1.0 sum 705 CAGE:natural helper cells, naive, biol_, tech_rep2 +705 CNhs14333- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14333/summary/coverage-.w5 768 384 1.0 sum 704 CAGE:natural helper cells, naive, biol_, tech_rep2 +706 CNhs14337+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14337/summary/coverage+.w5 768 384 1.0 sum 707 CAGE:natural helper cells, IL33 treated, day02, biol_, tech_ +707 CNhs14337- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14337/summary/coverage-.w5 768 384 1.0 sum 706 CAGE:natural helper cells, IL33 treated, day02, biol_, tech_ +708 CNhs14387+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14341/summary/coverage+.w5 768 384 1.0 sum 709 CAGE:natural helper cells, naive, biol_, tech_ +709 CNhs14387- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14341/summary/coverage-.w5 768 384 1.0 sum 708 CAGE:natural helper cells, naive, biol_, tech_ +710 CNhs14388+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14378/summary/coverage+.w5 768 384 1.0 sum 711 CAGE:natural helper cells, IL2 treated, day15, biol_, tech_rep3 +711 CNhs14388- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14378/summary/coverage-.w5 768 384 1.0 sum 710 CAGE:natural helper cells, IL2 treated, day15, biol_, tech_rep3 +712 CNhs14382+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14381/summary/coverage+.w5 768 384 1.0 sum 713 CAGE:natural helper cells, naive, biol_, tech_rep3 +713 CNhs14382- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14381/summary/coverage-.w5 768 384 1.0 sum 712 CAGE:natural helper cells, naive, biol_, tech_rep3 +714 CNhs14383+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14383/summary/coverage+.w5 768 384 1.0 sum 715 CAGE:natural helper cells, IL33 treated, 01hr, biol_, tech_rep3 +715 CNhs14383- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14383/summary/coverage-.w5 768 384 1.0 sum 714 CAGE:natural helper cells, IL33 treated, 01hr, biol_, tech_rep3 +716 CNhs14386+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14385/summary/coverage+.w5 768 384 1.0 sum 717 CAGE:natural helper cells, IL33 treated, day02, biol_, tech_rep3 +717 CNhs14386- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14385/summary/coverage-.w5 768 384 1.0 sum 716 CAGE:natural helper cells, IL33 treated, day02, biol_, tech_rep3 +718 CNhs14555+ /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14555/summary/coverage+.w5 768 384 1.0 sum 719 CAGE:Mouse Renal epithelial cells, +719 CNhs14555- /home/drk/tillage/datasets/mouse/cage/fantom/CNhs14555/summary/coverage-.w5 768 384 1.0 sum 718 CAGE:Mouse Renal epithelial cells, +720 GSM1437735 /home/drk/tillage/datasets/mouse/gro/geo/GSM1437735/summary/coverage.w5 256 64 1.0 sum 720 GRO:GRO-seq WT Liver ZT1 / liver / . +721 GSM1437736 /home/drk/tillage/datasets/mouse/gro/geo/GSM1437736/summary/coverage.w5 256 64 1.0 sum 721 GRO:GRO-seq WT Liver ZT4 / liver / . +722 GSM1437737 /home/drk/tillage/datasets/mouse/gro/geo/GSM1437737/summary/coverage.w5 256 64 1.0 sum 722 GRO:GRO-seq WT Liver ZT7 / liver / . +723 GSM1437738 /home/drk/tillage/datasets/mouse/gro/geo/GSM1437738/summary/coverage.w5 256 64 1.0 sum 723 GRO:GRO-seq WT Liver ZT10 / liver / . +724 GSM1437739 /home/drk/tillage/datasets/mouse/gro/geo/GSM1437739/summary/coverage.w5 256 64 1.0 sum 724 GRO:GRO-seq WT Liver ZT13 / liver / . +725 GSM1437740 /home/drk/tillage/datasets/mouse/gro/geo/GSM1437740/summary/coverage.w5 256 64 1.0 sum 725 GRO:GRO-seq WT LiverZT16 / liver / . +726 GSM1437741 /home/drk/tillage/datasets/mouse/gro/geo/GSM1437741/summary/coverage.w5 256 64 1.0 sum 726 GRO:GRO-seq WT Liver ZT19 / liver / . +727 GSM1437742 /home/drk/tillage/datasets/mouse/gro/geo/GSM1437742/summary/coverage.w5 256 64 1.0 sum 727 GRO:GRO-seq WT Liver ZT22 / liver / . +728 GSM1437745 /home/drk/tillage/datasets/mouse/gro/geo/GSM1437745/summary/coverage.w5 256 64 1.0 sum 728 GRO:GRO-seq Rev-erbA WT Liver ZT10 / liver / . +729 GSM1437746 /home/drk/tillage/datasets/mouse/gro/geo/GSM1437746/summary/coverage.w5 256 64 1.0 sum 729 GRO:GRO-seq Rev-erbA KO Liver ZT10 / liver / . +730 ENCFF866ZTV /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMK/summary/coverage.w5 128 32 2.0 mean 730 DNASE:B6D2F1/J 416B +731 ENCFF695LHM /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CML/summary/coverage.w5 128 32 2.0 mean 731 DNASE:BALB/cAnN A20 +732 ENCFF079SPZ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMM/summary/coverage.w5 128 32 2.0 mean 732 DNASE:C57BL/6 B cell male adult (8 weeks) +733 ENCFF798VSP /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMN/summary/coverage.w5 128 32 2.0 mean 733 DNASE:C57BL/6 splenic B cell male adult (8 weeks) +734 ENCFF474GND /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMO/summary/coverage.w5 128 32 2.0 mean 734 DNASE:C57BL/6 cerebellum male adult (8 weeks) +735 ENCFF395AJZ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMP/summary/coverage.w5 128 32 2.0 mean 735 DNASE:C57BL/6 telencephalon male adult (8 weeks) +736 ENCFF990TBH /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMR/summary/coverage.w5 128 32 2.0 mean 736 DNASE:CD-1 c-Kit-negative CD71-positive TER-119-positive erythroid progenitor cells male embryo (14.5 days) +737 ENCFF443PWY /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMS/summary/coverage.w5 128 32 2.0 mean 737 DNASE:CD-1 c-Kit-positive CD71-negative TER-119-negative erythroid progenitor cells male embryo (14.5 days) +738 ENCFF680ZTZ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMT/summary/coverage.w5 128 32 2.0 mean 738 DNASE:CD-1 c-Kit-positive CD71-positive TER-119-negative erythroid progenitor cells male embryo (14.5 days) +739 ENCFF649DYS /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMU/summary/coverage.w5 128 32 2.0 mean 739 DNASE:CD-1 c-Kit-positive CD71-positive TER-119-positive erythroid progenitor cells male embryo (14.5 days) +740 ENCFF680PQC /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMV/summary/coverage.w5 128 32 2.0 mean 740 DNASE:129 ES-CJ7 +741 ENCFF872IES /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMW/summary/coverage.w5 128 32 2.0 mean 741 DNASE:129 ES-E14 +742 ENCFF974BRL /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMX/summary/coverage.w5 128 32 2.0 mean 742 DNASE:mixed WW6 +743 ENCFF660XSJ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CMY/summary/coverage.w5 128 32 2.0 mean 743 DNASE:mixed WW6 +744 ENCFF391SPW /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNA/summary/coverage.w5 128 32 2.0 mean 744 DNASE:C57BL/6 fibroblast of lung male adult (8 weeks) +745 ENCFF837OUY /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNB/summary/coverage.w5 128 32 2.0 mean 745 DNASE:CD-1 forelimb bud male embryo (11.5 days) +746 ENCFF090NJT /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNC/summary/coverage.w5 128 32 2.0 mean 746 DNASE:C57BL/6 gonadal fat pad male adult (8 weeks) +747 ENCFF227XDD /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CND/summary/coverage.w5 128 32 2.0 mean 747 DNASE:CD-1 embryo male embryo (11.5 days) depleted in head +748 ENCFF773NKY /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNE/summary/coverage.w5 128 32 2.0 mean 748 DNASE:C57BL/6 heart male adult (8 weeks) +749 ENCFF037VPV /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNF/summary/coverage.w5 128 32 2.0 mean 749 DNASE:CD-1 hindlimb bud male embryo (11.5 days) +750 ENCFF561HSI /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNG/summary/coverage.w5 128 32 2.0 mean 750 DNASE:C57BL/6 kidney male adult (8 weeks) +751 ENCFF650OFZ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNH/summary/coverage.w5 128 32 2.0 mean 751 DNASE:C57BL/6 large intestine male adult (8 weeks) +752 ENCFF395XSI /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNJ/summary/coverage.w5 128 32 2.0 mean 752 DNASE:C57BL/6 liver male embryo (14.5 days) +753 ENCFF906XJY /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNK/summary/coverage.w5 128 32 2.0 mean 753 DNASE:129 liver male embryo (14.5 days) +754 ENCFF435WED /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNL/summary/coverage.w5 128 32 2.0 mean 754 DNASE:129 liver male embryo (14.5 days) +755 ENCFF102MGA /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNM/summary/coverage.w5 128 32 2.0 mean 755 DNASE:C57BL/6 lung male adult (8 weeks) +756 ENCFF990ATO /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNN/summary/coverage.w5 128 32 2.0 mean 756 DNASE:DBA/2 MEL cell line +757 ENCFF673VVB /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNO/summary/coverage.w5 128 32 2.0 mean 757 DNASE:CD-1 mesoderm male embryo (11.5 days) +758 ENCFF223QRV /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNP/summary/coverage.w5 128 32 2.0 mean 758 DNASE:DBA/2 MEL-GATA-1-ER +759 ENCFF516XER /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNQ/summary/coverage.w5 128 32 2.0 mean 759 DNASE:DBA/2 erythroblast originated from MEL-GATA-1-ER treated with 1 uM estradiol for 24 hours +760 ENCFF178FCM /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNR/summary/coverage.w5 128 32 2.0 mean 760 DNASE:DBA/2 erythroblast originated from MEL-GATA-1-ER treated with 1 uM estradiol for 48 hours +761 ENCFF950ALJ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNS/summary/coverage.w5 128 32 2.0 mean 761 DNASE:NIH/Swiss NIH3T3 +762 ENCFF632WLC /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNT/summary/coverage.w5 128 32 2.0 mean 762 DNASE:M.spretus x C57BL/6J Patski +763 ENCFF908NIP /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNU/summary/coverage.w5 128 32 2.0 mean 763 DNASE:C57BL/6 retina male postnatal (7 days) +764 ENCFF261PZG /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNV/summary/coverage.w5 128 32 2.0 mean 764 DNASE:C57BL/6 retina male postnatal (1 day) +765 ENCFF259ZNG /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNW/summary/coverage.w5 128 32 2.0 mean 765 DNASE:C57BL/6 retina male adult (8 weeks) +766 ENCFF731YAA /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNX/summary/coverage.w5 128 32 2.0 mean 766 DNASE:C57BL/6 skeletal muscle tissue male adult (8 weeks) +767 ENCFF546FZT /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000CNY/summary/coverage.w5 128 32 2.0 mean 767 DNASE:C57BL/6 spleen male adult (8 weeks) +768 ENCFF896PZK /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000COA/summary/coverage.w5 128 32 2.0 mean 768 DNASE:C57BL/6 CD4-positive helper T cell male adult (8 weeks) +769 ENCFF540HLI /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000COB/summary/coverage.w5 128 32 2.0 mean 769 DNASE:C57BL/6 thymus male adult (8 weeks) +770 ENCFF242YCJ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000COD/summary/coverage.w5 128 32 2.0 mean 770 DNASE:C57BL/6 induced T-regulatory cell male adult (8 weeks) +771 ENCFF992QCB /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000COE/summary/coverage.w5 128 32 2.0 mean 771 DNASE:C57BL/6 brain male embryo (14.5 days) +772 ENCFF257BKZ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000COF/summary/coverage.w5 128 32 2.0 mean 772 DNASE:C57BL/6 brain male adult (8 weeks) +773 ENCFF333MPS /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000COG/summary/coverage.w5 128 32 2.0 mean 773 DNASE:C57BL/6 brain male embryo (18.5 days) +774 ENCFF424XOH /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000COH/summary/coverage.w5 128 32 2.0 mean 774 DNASE:129 ZHBTc4-mESC +775 ENCFF448ZJM /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000COI/summary/coverage.w5 128 32 2.0 mean 775 DNASE:129 ZHBTc4-mESC treated with 100 ng/mL doxycycline hyclate for 24 hours +776 ENCFF825ZVT /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR000COJ/summary/coverage.w5 128 32 2.0 mean 776 DNASE:129 ZHBTc4-mESC treated with 100 ng/mL doxycycline hyclate for 6 hours +777 ENCFF513QAB /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR005WPU/summary/coverage.w5 128 32 2.0 mean 777 DNASE:C57BL/6 heart postnatal (0 days) +778 ENCFF009ALP /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR014SFF/summary/coverage.w5 128 32 2.0 mean 778 DNASE:C57BL/6 forebrain embryo (11.50 days) +779 ENCFF299KAN /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR066DPD/summary/coverage.w5 128 32 2.0 mean 779 DNASE:C57BL/6 kidney postnatal (0 days) +780 ENCFF417SAZ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR094XAY/summary/coverage.w5 128 32 2.0 mean 780 DNASE:C57BL/6 lung postnatal (0 days) +781 ENCFF829UGS /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR172RHR/summary/coverage.w5 128 32 2.0 mean 781 DNASE:C57BL/6 embryo embryo (10.5 days) +782 ENCFF107NBQ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR179PIH/summary/coverage.w5 128 32 2.0 mean 782 DNASE:C57BL/6 hindbrain embryo (14.5 days) +783 ENCFF373NJX /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR196VDE/summary/coverage.w5 128 32 2.0 mean 783 DNASE:C57BL/6 embryonic facial prominence embryo (11.5 days) +784 ENCFF540VTK /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR216UMD/summary/coverage.w5 128 32 2.0 mean 784 DNASE:C57BL/6 liver postnatal (0 days) +785 ENCFF888LAZ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR289BTM/summary/coverage.w5 128 32 2.0 mean 785 DNASE:C57BL/6 hindbrain embryo (10.50 days) +786 ENCFF906WCV /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR292QBA/summary/coverage.w5 128 32 2.0 mean 786 DNASE:C57BL/6 midbrain embryo (11.5 days) +787 ENCFF649UQO /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR312QVY/summary/coverage.w5 128 32 2.0 mean 787 DNASE:C57BL/6 neural tube embryo (11.5 days) +788 ENCFF729TAB /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR319PWR/summary/coverage.w5 128 32 2.0 mean 788 DNASE:C57BL/6 R1 +789 ENCFF149TTQ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR337EDG/summary/coverage.w5 128 32 2.0 mean 789 DNASE:C57BL/6 forebrain embryo (14.5 days) +790 ENCFF943PHW /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR358ESL/summary/coverage.w5 128 32 2.0 mean 790 DNASE:C57BL/6 hindbrain embryo (11.5 days) +791 ENCFF115OLL /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR367FCW/summary/coverage.w5 128 32 2.0 mean 791 DNASE:C57BL/6 midbrain embryo (14.5 days) +792 ENCFF963PGF /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR372SMH/summary/coverage.w5 128 32 2.0 mean 792 DNASE:DBA/2 MEL cell line treated with 125 uM zinc dichloride for 24 hours +793 ENCFF625SMQ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR380VCU/summary/coverage.w5 128 32 2.0 mean 793 DNASE:C57BL/6 limb embryo (10.50 days) +794 ENCFF155MQS /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR446MUM/summary/coverage.w5 128 32 2.0 mean 794 DNASE:DBA/2 MEL cell line +795 ENCFF283WGI /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR469VGZ/summary/coverage.w5 128 32 2.0 mean 795 DNASE:C57BL/6 hindbrain postnatal (0 days) +796 ENCFF266ZDD /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR488VEQ/summary/coverage.w5 128 32 2.0 mean 796 DNASE:C57BL/6 retina embryo (14.5 days) +797 ENCFF076VZF /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR561FZE/summary/coverage.w5 128 32 2.0 mean 797 DNASE:C57BL/6 liver embryo (11.50 days) +798 ENCFF802BPW /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR636NXY/summary/coverage.w5 128 32 2.0 mean 798 DNASE:C57BL/6 limb embryo (14.5 days) +799 ENCFF622EDP /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR655WKX/summary/coverage.w5 128 32 2.0 mean 799 DNASE:C57BL/6 intestine embryo (14.50 days) +800 ENCFF945EBN /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR661GMU/summary/coverage.w5 128 32 2.0 mean 800 DNASE:C57BL/6 neural tube embryo (10.50 days) +801 ENCFF641OPE /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR661HDP/summary/coverage.w5 128 32 2.0 mean 801 DNASE:C57BL/6 limb embryo (11.5 days) +802 ENCFF586XFN /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR666HFH/summary/coverage.w5 128 32 2.0 mean 802 DNASE:C57BL/6 Muller cell postnatal (12 days) +803 ENCFF435DKT /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR687EAW/summary/coverage.w5 128 32 2.0 mean 803 DNASE:C57BL/6 3T3-L1 +804 ENCFF363ULE /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR687JCD/summary/coverage.w5 128 32 2.0 mean 804 DNASE:C57BL/6 embryo embryo (9.50 days) +805 ENCFF894XDN /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR723IXU/summary/coverage.w5 128 32 2.0 mean 805 DNASE:C57BL/6 embryo embryo (10 days) +806 ENCFF919AUF /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR732IZK/summary/coverage.w5 128 32 2.0 mean 806 DNASE:C57BL/6 MN1 +807 ENCFF043UNG /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR742DUR/summary/coverage.w5 128 32 2.0 mean 807 DNASE:C57BL/6 lung embryo (14.5 days) +808 ENCFF071TLE /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR749ILN/summary/coverage.w5 128 32 2.0 mean 808 DNASE:C57BL/6 heart embryo (10.50 days) +809 ENCFF247KMU /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR756SPS/summary/coverage.w5 128 32 2.0 mean 809 DNASE:C57BL/6 forebrain embryo (10.50 days) +810 ENCFF610ONZ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR767AJS/summary/coverage.w5 128 32 2.0 mean 810 DNASE:C57BL/6 midbrain postnatal (0 days) +811 ENCFF327SCP /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR773SAG/summary/coverage.w5 128 32 2.0 mean 811 DNASE:C57BL/6 midbrain embryo (10.50 days) +812 ENCFF846VXR /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR774TQO/summary/coverage.w5 128 32 2.0 mean 812 DNASE:C57BL/6 embryo embryo (11 day) +813 ENCFF458SNJ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR775YJL/summary/coverage.w5 128 32 2.0 mean 813 DNASE:C57BL/6 embryonic facial prominence embryo (10.50 days) +814 ENCFF684BQC /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR791AJY/summary/coverage.w5 128 32 2.0 mean 814 DNASE:C57BL/6 forebrain postnatal (0 days) +815 ENCFF014NWA /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR855ASN/summary/coverage.w5 128 32 2.0 mean 815 DNASE:C57BL/6 acute myeloid leukemia +816 ENCFF353HPD /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR894HWV/summary/coverage.w5 128 32 2.0 mean 816 DNASE:C57BL/6 yolk sac embryo (10.50 days) +817 ENCFF514HPX /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR898KAX/summary/coverage.w5 128 32 2.0 mean 817 DNASE:C57BL/6 MN1 treated with 10 nM methyltrienolone +818 ENCFF125IRZ /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR935RRY/summary/coverage.w5 128 32 2.0 mean 818 DNASE:C57BL/6 thymus postnatal (0 days) +819 ENCFF580UUE /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR959HKR/summary/coverage.w5 128 32 2.0 mean 819 DNASE:C57BL/6 embryonic facial prominence embryo (14.5 days) +820 ENCFF974HWL /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR964VDK/summary/coverage.w5 128 32 2.0 mean 820 DNASE:C57BL/6 R1 +821 ENCFF197TQR /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR969OPE/summary/coverage.w5 128 32 2.0 mean 821 DNASE:C57BL/6 stomach postnatal (0 days) +822 ENCFF325RFB /home/drk/tillage/datasets/mouse/dnase/encode/ENCSR998KYQ/summary/coverage.w5 128 32 2.0 mean 822 DNASE:C57BL/6 adipocyte +823 GSM1479701 /home/drk/tillage/datasets/mouse/dnase/geo/GSM1479701/summary/coverage.w5 256 64 1.0 sum 823 DNASE:DNase ZT 2 WT / Liver, WT, ZT 2, DNase / . +824 GSM1479702 /home/drk/tillage/datasets/mouse/dnase/geo/GSM1479702/summary/coverage.w5 256 64 1.0 sum 824 DNASE:DNase ZT 6 WT / Liver, WT, ZT 6, DNase / . +825 GSM1479703 /home/drk/tillage/datasets/mouse/dnase/geo/GSM1479703/summary/coverage.w5 256 64 1.0 sum 825 DNASE:DNase ZT 10 WT / Liver, WT, ZT 10, DNase / . +826 GSM1479704 /home/drk/tillage/datasets/mouse/dnase/geo/GSM1479704/summary/coverage.w5 256 64 1.0 sum 826 DNASE:DNase ZT 14 WT / Liver, WT, ZT 14, DNase / . +827 GSM1479705 /home/drk/tillage/datasets/mouse/dnase/geo/GSM1479705/summary/coverage.w5 256 64 1.0 sum 827 DNASE:DNase ZT 18 WT / Liver, WT, ZT 18, DNase / . +828 GSM1479706 /home/drk/tillage/datasets/mouse/dnase/geo/GSM1479706/summary/coverage.w5 256 64 1.0 sum 828 DNASE:DNase ZT 22 WT / Liver, WT, ZT 22, DNase / . +829 GSM1479707 /home/drk/tillage/datasets/mouse/dnase/geo/GSM1479707/summary/coverage.w5 256 64 1.0 sum 829 DNASE:DNase ZT 26 WT / Liver, WT, ZT 26, DNase / . +830 GSM1479708 /home/drk/tillage/datasets/mouse/dnase/geo/GSM1479708/summary/coverage.w5 256 64 1.0 sum 830 DNASE:DNase ZT 6 Bmal1 KO / Liver, Bmal1 KO, ZT 6, DNase / . +831 ENCFF745RFR /home/drk/tillage/datasets/mouse/atac/encode/ENCSR012YAB/summary/coverage.w5 128 32 2.0 mean 831 ATAC:C57BL/6 hindbrain embryo (11.5 days) +832 ENCFF446XWP /home/drk/tillage/datasets/mouse/atac/encode/ENCSR023QZX/summary/coverage.w5 128 32 2.0 mean 832 ATAC:C57BL/6 kidney embryo (15.5 days) +833 ENCFF700IKY /home/drk/tillage/datasets/mouse/atac/encode/ENCSR064IHX/summary/coverage.w5 128 32 2.0 mean 833 ATAC:C57BL/6 megakaryocyte-erythroid progenitor cell male adult (5-6 weeks) +834 ENCFF769BOR /home/drk/tillage/datasets/mouse/atac/encode/ENCSR079GOY/summary/coverage.w5 128 32 2.0 mean 834 ATAC:C57BL/6 intestine postnatal (0 days) +835 ENCFF860QUH /home/drk/tillage/datasets/mouse/atac/encode/ENCSR088UYE/summary/coverage.w5 128 32 2.0 mean 835 ATAC:C57BL/6 hindbrain embryo (12.5 days) +836 ENCFF242FBC /home/drk/tillage/datasets/mouse/atac/encode/ENCSR096JCC/summary/coverage.w5 128 32 2.0 mean 836 ATAC:C57BL/6 midbrain embryo (16.5 days) +837 ENCFF823RLW /home/drk/tillage/datasets/mouse/atac/encode/ENCSR102NGD/summary/coverage.w5 128 32 2.0 mean 837 ATAC:C57BL/6 lung postnatal (0 days) +838 ENCFF570DUW /home/drk/tillage/datasets/mouse/atac/encode/ENCSR136XSY/summary/coverage.w5 128 32 2.0 mean 838 ATAC:C57BL/6 erythroid progenitor cell male adult (5-6 weeks) +839 ENCFF321AYT /home/drk/tillage/datasets/mouse/atac/encode/ENCSR176BYZ/summary/coverage.w5 128 32 2.0 mean 839 ATAC:C57BL/6 hindbrain embryo (13.5 days) +840 ENCFF517RIA /home/drk/tillage/datasets/mouse/atac/encode/ENCSR229QKB/summary/coverage.w5 128 32 2.0 mean 840 ATAC:C57BL/6 megakaryocyte progenitor cell male adult (5-6 weeks) +841 ENCFF408OOS /home/drk/tillage/datasets/mouse/atac/encode/ENCSR257PGU/summary/coverage.w5 128 32 2.0 mean 841 ATAC:C57BL/6 granulocyte monocyte progenitor cell male adult (5-6 weeks) +842 ENCFF060HRT /home/drk/tillage/datasets/mouse/atac/encode/ENCSR273UFV/summary/coverage.w5 128 32 2.0 mean 842 ATAC:C57BL/6 forebrain embryo (11.5 days) +843 ENCFF731NRD /home/drk/tillage/datasets/mouse/atac/encode/ENCSR280ZDP/summary/coverage.w5 128 32 2.0 mean 843 ATAC:C57BL/6 G1E +844 ENCFF643SRL /home/drk/tillage/datasets/mouse/atac/encode/ENCSR302LIV/summary/coverage.w5 128 32 2.0 mean 844 ATAC:C57BL/6 liver embryo (12.5 days) +845 ENCFF092DWB /home/drk/tillage/datasets/mouse/atac/encode/ENCSR310MLB/summary/coverage.w5 128 32 2.0 mean 845 ATAC:C57BL/6 forebrain postnatal (0 days) +846 ENCFF609BWE /home/drk/tillage/datasets/mouse/atac/encode/ENCSR312LQX/summary/coverage.w5 128 32 2.0 mean 846 ATAC:C57BL/6 hindbrain postnatal (0 days) +847 ENCFF161EJL /home/drk/tillage/datasets/mouse/atac/encode/ENCSR351YUI/summary/coverage.w5 128 32 2.0 mean 847 ATAC:C57BL/6 neutrophil adult (5-6 weeks) +848 ENCFF683WEV /home/drk/tillage/datasets/mouse/atac/encode/ENCSR366VBB/summary/coverage.w5 128 32 2.0 mean 848 ATAC:C57BL/6 hematopoietic stem cell adult (5-6 weeks) +849 ENCFF889QAK /home/drk/tillage/datasets/mouse/atac/encode/ENCSR382RUC/summary/coverage.w5 128 32 2.0 mean 849 ATAC:C57BL/6 midbrain embryo (11.5 days) +850 ENCFF513ECX /home/drk/tillage/datasets/mouse/atac/encode/ENCSR428BSK/summary/coverage.w5 128 32 2.0 mean 850 ATAC:C57BL/6 G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +851 ENCFF564GWQ /home/drk/tillage/datasets/mouse/atac/encode/ENCSR453AWR/summary/coverage.w5 128 32 2.0 mean 851 ATAC:C57BL/6 hematopoietic stem cell +852 ENCFF190LKP /home/drk/tillage/datasets/mouse/atac/encode/ENCSR468GUI/summary/coverage.w5 128 32 2.0 mean 852 ATAC:C57BL/6 midbrain embryo (15.5 days) +853 ENCFF047WUY /home/drk/tillage/datasets/mouse/atac/encode/ENCSR498DQA/summary/coverage.w5 128 32 2.0 mean 853 ATAC:C57BL/6 megakaryocyte male adult (5-6 weeks) +854 ENCFF329SPM /home/drk/tillage/datasets/mouse/atac/encode/ENCSR554JQP/summary/coverage.w5 128 32 2.0 mean 854 ATAC:C57BL/6 cerebellum adult +855 ENCFF917SGA /home/drk/tillage/datasets/mouse/atac/encode/ENCSR559FAJ/summary/coverage.w5 128 32 2.0 mean 855 ATAC:C57BL/6 forebrain embryo (12.5 days) +856 ENCFF238AZB /home/drk/tillage/datasets/mouse/atac/encode/ENCSR597BGP/summary/coverage.w5 128 32 2.0 mean 856 ATAC:C57BL/6 stomach postnatal (0 days) +857 ENCFF770CQD /home/drk/tillage/datasets/mouse/atac/encode/ENCSR609OHJ/summary/coverage.w5 128 32 2.0 mean 857 ATAC:C57BL/6 liver postnatal (0 days) +858 ENCFF956ZAH /home/drk/tillage/datasets/mouse/atac/encode/ENCSR623GSD/summary/coverage.w5 128 32 2.0 mean 858 ATAC:C57BL/6 hindbrain embryo (16.5 days) +859 ENCFF196PJB /home/drk/tillage/datasets/mouse/atac/encode/ENCSR732OTZ/summary/coverage.w5 128 32 2.0 mean 859 ATAC:C57BL/6 kidney embryo (16.5 days) +860 ENCFF331KWS /home/drk/tillage/datasets/mouse/atac/encode/ENCSR785NEL/summary/coverage.w5 128 32 2.0 mean 860 ATAC:C57BL/6 liver embryo (11.5 days) +861 ENCFF321OJD /home/drk/tillage/datasets/mouse/atac/encode/ENCSR793RAV/summary/coverage.w5 128 32 2.0 mean 861 ATAC:C57BL/6 erythroblast male adult (5-6 weeks) +862 ENCFF845NRJ /home/drk/tillage/datasets/mouse/atac/encode/ENCSR810HQR/summary/coverage.w5 128 32 2.0 mean 862 ATAC:C57BL/6 forebrain embryo (14.5 days) +863 ENCFF810TCB /home/drk/tillage/datasets/mouse/atac/encode/ENCSR819QOJ/summary/coverage.w5 128 32 2.0 mean 863 ATAC:C57BL/6 midbrain embryo (13.5 days) +864 ENCFF960LTM /home/drk/tillage/datasets/mouse/atac/encode/ENCSR836PUC/summary/coverage.w5 128 32 2.0 mean 864 ATAC:C57BL/6 forebrain embryo (16.5 days) +865 ENCFF320TGF /home/drk/tillage/datasets/mouse/atac/encode/ENCSR862JVD/summary/coverage.w5 128 32 2.0 mean 865 ATAC:C57BL/6 monocyte adult (5-6 weeks) +866 ENCFF878HWK /home/drk/tillage/datasets/mouse/atac/encode/ENCSR889WQX/summary/coverage.w5 128 32 2.0 mean 866 ATAC:C57BL/6 frontal cortex adult +867 ENCFF654YSF /home/drk/tillage/datasets/mouse/atac/encode/ENCSR903GMO/summary/coverage.w5 128 32 2.0 mean 867 ATAC:C57BL/6 forebrain embryo (13.5 days) +868 ENCFF705VCO /home/drk/tillage/datasets/mouse/atac/encode/ENCSR914PYX/summary/coverage.w5 128 32 2.0 mean 868 ATAC:C57BL/6 common myeloid progenitor male adult (5-6 weeks) +869 ENCFF114TFO /home/drk/tillage/datasets/mouse/atac/encode/ENCSR961SMM/summary/coverage.w5 128 32 2.0 mean 869 ATAC:C57BL/6 intestine embryo (15.5 days) +870 ENCFF258MGU /home/drk/tillage/datasets/mouse/atac/encode/ENCSR966ORC/summary/coverage.w5 128 32 2.0 mean 870 ATAC:C57BL/6 intestine embryo (16.5 days) +871 ENCFF487YTM /home/drk/tillage/datasets/mouse/atac/encode/ENCSR976LWP/summary/coverage.w5 128 32 2.0 mean 871 ATAC:C57BL/6 forebrain embryo (15.5 days) +872 ENCFF309CAW /home/drk/tillage/datasets/mouse/atac/encode/ENCSR983JWA/summary/coverage.w5 128 32 2.0 mean 872 ATAC:C57BL/6 neural tube embryo (15.5 days) +873 UW1.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW1.1/summary/Unknown-clusters_1-cluster_1_mm10.w5 128 32 4.0 mean 873 ATAC:Unknown-clusters_1-cluster_1 +874 UW1.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW1.2/summary/Unknown-clusters_1-cluster_2_mm10.w5 128 32 4.0 mean 874 ATAC:Unknown-clusters_1-cluster_2 +875 UW1.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW1.3/summary/Unknown-clusters_1-cluster_3_mm10.w5 128 32 4.0 mean 875 ATAC:Unknown-clusters_1-cluster_3 +876 UW10.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW10.1/summary/Hematopoietic_progenitors-clusters_10-cluster_1_mm10.w5 128 32 4.0 mean 876 ATAC:Hematopoietic_progenitors-clusters_10-cluster_1 +877 UW11.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW11.1/summary/Proximal_tubule-clusters_11-cluster_1_mm10.w5 128 32 4.0 mean 877 ATAC:Proximal_tubule-clusters_11-cluster_1 +878 UW11.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW11.2/summary/Proximal_tubule-clusters_11-cluster_2_mm10.w5 128 32 4.0 mean 878 ATAC:Proximal_tubule-clusters_11-cluster_2 +879 UW11.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW11.3/summary/Proximal_tubule-clusters_11-cluster_3_mm10.w5 128 32 4.0 mean 879 ATAC:Proximal_tubule-clusters_11-cluster_3 +880 UW11.4 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW11.4/summary/Proximal_tubule_S3-clusters_11-cluster_4_mm10.w5 128 32 4.0 mean 880 ATAC:Proximal_tubule_S3-clusters_11-cluster_4 +881 UW11.5 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW11.5/summary/Proximal_tubule-clusters_11-cluster_5_mm10.w5 128 32 4.0 mean 881 ATAC:Proximal_tubule-clusters_11-cluster_5 +882 UW12.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW12.1/summary/T_cells-clusters_12-cluster_1_mm10.w5 128 32 4.0 mean 882 ATAC:T_cells-clusters_12-cluster_1 +883 UW12.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW12.2/summary/Regulatory_T_cells-clusters_12-cluster_2_mm10.w5 128 32 4.0 mean 883 ATAC:Regulatory_T_cells-clusters_12-cluster_2 +884 UW12.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW12.3/summary/NK_cells-clusters_12-cluster_3_mm10.w5 128 32 4.0 mean 884 ATAC:NK_cells-clusters_12-cluster_3 +885 UW12.4 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW12.4/summary/T_cells-clusters_12-cluster_4_mm10.w5 128 32 4.0 mean 885 ATAC:T_cells-clusters_12-cluster_4 +886 UW12.5 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW12.5/summary/T_cells-clusters_12-cluster_5_mm10.w5 128 32 4.0 mean 886 ATAC:T_cells-clusters_12-cluster_5 +887 UW13.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW13.1/summary/Erythroblasts-clusters_13-cluster_1_mm10.w5 128 32 4.0 mean 887 ATAC:Erythroblasts-clusters_13-cluster_1 +888 UW14.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW14.1/summary/Sperm-clusters_14-cluster_1_mm10.w5 128 32 4.0 mean 888 ATAC:Sperm-clusters_14-cluster_1 +889 UW14.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW14.2/summary/Sperm-clusters_14-cluster_2_mm10.w5 128 32 4.0 mean 889 ATAC:Sperm-clusters_14-cluster_2 +890 UW14.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW14.3/summary/Sperm-clusters_14-cluster_3_mm10.w5 128 32 4.0 mean 890 ATAC:Sperm-clusters_14-cluster_3 +891 UW15.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW15.1/summary/Inhibitory_neurons-clusters_15-cluster_1_mm10.w5 128 32 4.0 mean 891 ATAC:Inhibitory_neurons-clusters_15-cluster_1 +892 UW15.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW15.2/summary/Inhibitory_neurons-clusters_15-cluster_2_mm10.w5 128 32 4.0 mean 892 ATAC:Inhibitory_neurons-clusters_15-cluster_2 +893 UW15.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW15.3/summary/SOM+_Interneurons-clusters_15-cluster_3_mm10.w5 128 32 4.0 mean 893 ATAC:SOM+_Interneurons-clusters_15-cluster_3 +894 UW16.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW16.1/summary/B_cells-clusters_16-cluster_1_mm10.w5 128 32 4.0 mean 894 ATAC:B_cells-clusters_16-cluster_1 +895 UW16.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW16.2/summary/Macrophages-clusters_16-cluster_2_mm10.w5 128 32 4.0 mean 895 ATAC:Macrophages-clusters_16-cluster_2 +896 UW16.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW16.3/summary/Microglia-clusters_16-cluster_3_mm10.w5 128 32 4.0 mean 896 ATAC:Microglia-clusters_16-cluster_3 +897 UW17.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW17.1/summary/Dendritic_cells-clusters_17-cluster_1_mm10.w5 128 32 4.0 mean 897 ATAC:Dendritic_cells-clusters_17-cluster_1 +898 UW17.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW17.2/summary/Alveolar_macrophages-clusters_17-cluster_2_mm10.w5 128 32 4.0 mean 898 ATAC:Alveolar_macrophages-clusters_17-cluster_2 +899 UW17.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW17.3/summary/Dendritic_cells-clusters_17-cluster_3_mm10.w5 128 32 4.0 mean 899 ATAC:Dendritic_cells-clusters_17-cluster_3 +900 UW17.4 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW17.4/summary/Collisions-clusters_17-cluster_4_mm10.w5 128 32 4.0 mean 900 ATAC:Collisions-clusters_17-cluster_4 +901 UW17.5 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW17.5/summary/Collisions-clusters_17-cluster_5_mm10.w5 128 32 4.0 mean 901 ATAC:Collisions-clusters_17-cluster_5 +902 UW18.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW18.1/summary/DCT_CD-clusters_18-cluster_1_mm10.w5 128 32 4.0 mean 902 ATAC:DCT_CD-clusters_18-cluster_1 +903 UW18.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW18.2/summary/Loop_of_henle-clusters_18-cluster_2_mm10.w5 128 32 4.0 mean 903 ATAC:Loop_of_henle-clusters_18-cluster_2 +904 UW18.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW18.3/summary/Loop_of_henle-clusters_18-cluster_3_mm10.w5 128 32 4.0 mean 904 ATAC:Loop_of_henle-clusters_18-cluster_3 +905 UW18.4 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW18.4/summary/Distal_convoluted_tubule-clusters_18-cluster_4_mm10.w5 128 32 4.0 mean 905 ATAC:Distal_convoluted_tubule-clusters_18-cluster_4 +906 UW18.5 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW18.5/summary/Collecting_duct-clusters_18-cluster_5_mm10.w5 128 32 4.0 mean 906 ATAC:Collecting_duct-clusters_18-cluster_5 +907 UW19.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW19.1/summary/Astrocytes-clusters_19-cluster_1_mm10.w5 128 32 4.0 mean 907 ATAC:Astrocytes-clusters_19-cluster_1 +908 UW19.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW19.2/summary/Astrocytes-clusters_19-cluster_2_mm10.w5 128 32 4.0 mean 908 ATAC:Astrocytes-clusters_19-cluster_2 +909 UW19.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW19.3/summary/Astrocytes-clusters_19-cluster_3_mm10.w5 128 32 4.0 mean 909 ATAC:Astrocytes-clusters_19-cluster_3 +910 UW19.4 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW19.4/summary/Astrocytes-clusters_19-cluster_4_mm10.w5 128 32 4.0 mean 910 ATAC:Astrocytes-clusters_19-cluster_4 +911 UW2.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW2.1/summary/T_cells-clusters_2-cluster_1_mm10.w5 128 32 4.0 mean 911 ATAC:T_cells-clusters_2-cluster_1 +912 UW20.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW20.1/summary/Type_I_pneumocytes-clusters_20-cluster_1_mm10.w5 128 32 4.0 mean 912 ATAC:Type_I_pneumocytes-clusters_20-cluster_1 +913 UW21.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW21.1/summary/Oligodendrocytes-clusters_21-cluster_1_mm10.w5 128 32 4.0 mean 913 ATAC:Oligodendrocytes-clusters_21-cluster_1 +914 UW21.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW21.2/summary/Oligodendrocytes-clusters_21-cluster_2_mm10.w5 128 32 4.0 mean 914 ATAC:Oligodendrocytes-clusters_21-cluster_2 +915 UW22.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW22.1/summary/Endothelial_I_cells-clusters_22-cluster_1_mm10.w5 128 32 4.0 mean 915 ATAC:Endothelial_I_cells-clusters_22-cluster_1 +916 UW22.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW22.2/summary/Endothelial_I_glomerular-clusters_22-cluster_2_mm10.w5 128 32 4.0 mean 916 ATAC:Endothelial_I_(glomerular)-clusters_22-cluster_2 +917 UW22.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW22.3/summary/Endothelial_I_cells-clusters_22-cluster_3_mm10.w5 128 32 4.0 mean 917 ATAC:Endothelial_I_cells-clusters_22-cluster_3 +918 UW22.4 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW22.4/summary/Endothelial_I_cells-clusters_22-cluster_4_mm10.w5 128 32 4.0 mean 918 ATAC:Endothelial_I_cells-clusters_22-cluster_4 +919 UW23.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW23.1/summary/Endothelial_II_cells-clusters_23-cluster_1_mm10.w5 128 32 4.0 mean 919 ATAC:Endothelial_II_cells-clusters_23-cluster_1 +920 UW23.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW23.2/summary/Unknown-clusters_23-cluster_2_mm10.w5 128 32 4.0 mean 920 ATAC:Unknown-clusters_23-cluster_2 +921 UW24.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW24.1/summary/Monocytes-clusters_24-cluster_1_mm10.w5 128 32 4.0 mean 921 ATAC:Monocytes-clusters_24-cluster_1 +922 UW24.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW24.2/summary/Monocytes-clusters_24-cluster_2_mm10.w5 128 32 4.0 mean 922 ATAC:Monocytes-clusters_24-cluster_2 +923 UW25.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW25.1/summary/Podocytes-clusters_25-cluster_1_mm10.w5 128 32 4.0 mean 923 ATAC:Podocytes-clusters_25-cluster_1 +924 UW25.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW25.2/summary/Endothelial_II_cells-clusters_25-cluster_2_mm10.w5 128 32 4.0 mean 924 ATAC:Endothelial_II_cells-clusters_25-cluster_2 +925 UW25.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW25.3/summary/Endothelial_II_cells-clusters_25-cluster_3_mm10.w5 128 32 4.0 mean 925 ATAC:Endothelial_II_cells-clusters_25-cluster_3 +926 UW26.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW26.1/summary/T_cells-clusters_26-cluster_1_mm10.w5 128 32 4.0 mean 926 ATAC:T_cells-clusters_26-cluster_1 +927 UW26.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW26.2/summary/Collisions-clusters_26-cluster_2_mm10.w5 128 32 4.0 mean 927 ATAC:Collisions-clusters_26-cluster_2 +928 UW26.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW26.3/summary/Collisions-clusters_26-cluster_3_mm10.w5 128 32 4.0 mean 928 ATAC:Collisions-clusters_26-cluster_3 +929 UW27.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW27.1/summary/Purkinje_cells-clusters_27-cluster_1_mm10.w5 128 32 4.0 mean 929 ATAC:Purkinje_cells-clusters_27-cluster_1 +930 UW27.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW27.2/summary/Collisions-clusters_27-cluster_2_mm10.w5 128 32 4.0 mean 930 ATAC:Collisions-clusters_27-cluster_2 +931 UW27.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW27.3/summary/Collisions-clusters_27-cluster_3_mm10.w5 128 32 4.0 mean 931 ATAC:Collisions-clusters_27-cluster_3 +932 UW28.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW28.1/summary/Immature_B_cells-clusters_28-cluster_1_mm10.w5 128 32 4.0 mean 932 ATAC:Immature_B_cells-clusters_28-cluster_1 +933 UW28.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW28.2/summary/Immature_B_cells-clusters_28-cluster_2_mm10.w5 128 32 4.0 mean 933 ATAC:Immature_B_cells-clusters_28-cluster_2 +934 UW29.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW29.1/summary/Ex_neurons_SCPN-clusters_29-cluster_1_mm10.w5 128 32 4.0 mean 934 ATAC:Ex_neurons_SCPN-clusters_29-cluster_1 +935 UW3.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW3.1/summary/Hepatocytes-clusters_3-cluster_1_mm10.w5 128 32 4.0 mean 935 ATAC:Hepatocytes-clusters_3-cluster_1 +936 UW30.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW30.1/summary/Type_II_pneumocytes-clusters_30-cluster_1_mm10.w5 128 32 4.0 mean 936 ATAC:Type_II_pneumocytes-clusters_30-cluster_1 +937 UW30.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW30.2/summary/Unknown-clusters_30-cluster_2_mm10.w5 128 32 4.0 mean 937 ATAC:Unknown-clusters_30-cluster_2 +938 UW30.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW30.3/summary/Collisions-clusters_30-cluster_3_mm10.w5 128 32 4.0 mean 938 ATAC:Collisions-clusters_30-cluster_3 +939 UW30.4 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW30.4/summary/Unknown-clusters_30-cluster_4_mm10.w5 128 32 4.0 mean 939 ATAC:Unknown-clusters_30-cluster_4 +940 UW4.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW4.1/summary/B_cells-clusters_4-cluster_1_mm10.w5 128 32 4.0 mean 940 ATAC:B_cells-clusters_4-cluster_1 +941 UW4.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW4.2/summary/B_cells-clusters_4-cluster_2_mm10.w5 128 32 4.0 mean 941 ATAC:B_cells-clusters_4-cluster_2 +942 UW4.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW4.3/summary/B_cells-clusters_4-cluster_3_mm10.w5 128 32 4.0 mean 942 ATAC:B_cells-clusters_4-cluster_3 +943 UW4.4 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW4.4/summary/Activated_B_cells-clusters_4-cluster_4_mm10.w5 128 32 4.0 mean 943 ATAC:Activated_B_cells-clusters_4-cluster_4 +944 UW5.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW5.1/summary/Ex_neurons_CPN-clusters_5-cluster_1_mm10.w5 128 32 4.0 mean 944 ATAC:Ex_neurons_CPN-clusters_5-cluster_1 +945 UW5.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW5.2/summary/Ex_neurons_SCPN-clusters_5-cluster_2_mm10.w5 128 32 4.0 mean 945 ATAC:Ex_neurons_SCPN-clusters_5-cluster_2 +946 UW5.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW5.3/summary/Ex_neurons_CThPN-clusters_5-cluster_3_mm10.w5 128 32 4.0 mean 946 ATAC:Ex_neurons_CThPN-clusters_5-cluster_3 +947 UW5.4 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW5.4/summary/Ex_neurons_CThPN-clusters_5-cluster_4_mm10.w5 128 32 4.0 mean 947 ATAC:Ex_neurons_CThPN-clusters_5-cluster_4 +948 UW5.5 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW5.5/summary/Inhibitory_neurons-clusters_5-cluster_5_mm10.w5 128 32 4.0 mean 948 ATAC:Inhibitory_neurons-clusters_5-cluster_5 +949 UW5.6 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW5.6/summary/Unknown-clusters_5-cluster_6_mm10.w5 128 32 4.0 mean 949 ATAC:Unknown-clusters_5-cluster_6 +950 UW6.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW6.1/summary/Enterocytes-clusters_6-cluster_1_mm10.w5 128 32 4.0 mean 950 ATAC:Enterocytes-clusters_6-cluster_1 +951 UW7.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW7.1/summary/Cardiomyocytes-clusters_7-cluster_1_mm10.w5 128 32 4.0 mean 951 ATAC:Cardiomyocytes-clusters_7-cluster_1 +952 UW7.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW7.2/summary/Unknown-clusters_7-cluster_2_mm10.w5 128 32 4.0 mean 952 ATAC:Unknown-clusters_7-cluster_2 +953 UW8.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW8.1/summary/Cerebellar_granule_cells-clusters_8-cluster_1_mm10.w5 128 32 4.0 mean 953 ATAC:Cerebellar_granule_cells-clusters_8-cluster_1 +954 UW8.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW8.2/summary/Cerebellar_granule_cells-clusters_8-cluster_2_mm10.w5 128 32 4.0 mean 954 ATAC:Cerebellar_granule_cells-clusters_8-cluster_2 +955 UW9.1 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW9.1/summary/Unknown-clusters_9-cluster_1_mm10.w5 128 32 4.0 mean 955 ATAC:Unknown-clusters_9-cluster_1 +956 UW9.2 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW9.2/summary/Endothelial_II_cells-clusters_9-cluster_2_mm10.w5 128 32 4.0 mean 956 ATAC:Endothelial_II_cells-clusters_9-cluster_2 +957 UW9.3 /home/drk/tillage/datasets/mouse/atac/uw-atlas/UW9.3/summary/Endothelial_II_cells-clusters_9-cluster_3_mm10.w5 128 32 4.0 mean 957 ATAC:Endothelial_II_cells-clusters_9-cluster_3 +958 yangli1 /home/drk/tillage/datasets/mouse/atac/catlas/yangli1/summary/ASCG.w5 384 96 0.01 sum 958 ATAC:ASCG +959 yangli10 /home/drk/tillage/datasets/mouse/atac/catlas/yangli10/summary/CA3GL4.w5 384 96 0.01 sum 959 ATAC:CA3GL4 +960 yangli100 /home/drk/tillage/datasets/mouse/atac/catlas/yangli100/summary/OBGL1.w5 384 96 0.01 sum 960 ATAC:OBGL1 +961 yangli101 /home/drk/tillage/datasets/mouse/atac/catlas/yangli101/summary/OBGL2.w5 384 96 0.01 sum 961 ATAC:OBGL2 +962 yangli102 /home/drk/tillage/datasets/mouse/atac/catlas/yangli102/summary/OBGL3.w5 384 96 0.01 sum 962 ATAC:OBGL3 +963 yangli103 /home/drk/tillage/datasets/mouse/atac/catlas/yangli103/summary/OBGL4.w5 384 96 0.01 sum 963 ATAC:OBGL4 +964 yangli104 /home/drk/tillage/datasets/mouse/atac/catlas/yangli104/summary/OBGL5.w5 384 96 0.01 sum 964 ATAC:OBGL5 +965 yangli105 /home/drk/tillage/datasets/mouse/atac/catlas/yangli105/summary/OBNBL.w5 384 96 0.01 sum 965 ATAC:OBNBL +966 yangli106 /home/drk/tillage/datasets/mouse/atac/catlas/yangli106/summary/OLFGL1.w5 384 96 0.01 sum 966 ATAC:OLFGL1 +967 yangli107 /home/drk/tillage/datasets/mouse/atac/catlas/yangli107/summary/OLFGL2.w5 384 96 0.01 sum 967 ATAC:OLFGL2 +968 yangli108 /home/drk/tillage/datasets/mouse/atac/catlas/yangli108/summary/OLFGL3.w5 384 96 0.01 sum 968 ATAC:OLFGL3 +969 yangli109 /home/drk/tillage/datasets/mouse/atac/catlas/yangli109/summary/OLFGL4.w5 384 96 0.01 sum 969 ATAC:OLFGL4 +970 yangli11 /home/drk/tillage/datasets/mouse/atac/catlas/yangli11/summary/CA3GL5.w5 384 96 0.01 sum 970 ATAC:CA3GL5 +971 yangli110 /home/drk/tillage/datasets/mouse/atac/catlas/yangli110/summary/OLFGL5.w5 384 96 0.01 sum 971 ATAC:OLFGL5 +972 yangli111 /home/drk/tillage/datasets/mouse/atac/catlas/yangli111/summary/OLFGL6.w5 384 96 0.01 sum 972 ATAC:OLFGL6 +973 yangli112 /home/drk/tillage/datasets/mouse/atac/catlas/yangli112/summary/OLFGL7.w5 384 96 0.01 sum 973 ATAC:OLFGL7 +974 yangli113 /home/drk/tillage/datasets/mouse/atac/catlas/yangli113/summary/OPC.w5 384 96 0.01 sum 974 ATAC:OPC +975 yangli114 /home/drk/tillage/datasets/mouse/atac/catlas/yangli114/summary/PER.w5 384 96 0.01 sum 975 ATAC:PER +976 yangli115 /home/drk/tillage/datasets/mouse/atac/catlas/yangli115/summary/PIRGL.w5 384 96 0.01 sum 976 ATAC:PIRGL +977 yangli116 /home/drk/tillage/datasets/mouse/atac/catlas/yangli116/summary/PTGL1.w5 384 96 0.01 sum 977 ATAC:PTGL1 +978 yangli117 /home/drk/tillage/datasets/mouse/atac/catlas/yangli117/summary/PTGL2.w5 384 96 0.01 sum 978 ATAC:PTGL2 +979 yangli118 /home/drk/tillage/datasets/mouse/atac/catlas/yangli118/summary/PTGL3.w5 384 96 0.01 sum 979 ATAC:PTGL3 +980 yangli119 /home/drk/tillage/datasets/mouse/atac/catlas/yangli119/summary/PTGL4.w5 384 96 0.01 sum 980 ATAC:PTGL4 +981 yangli12 /home/drk/tillage/datasets/mouse/atac/catlas/yangli12/summary/CA3GL6.w5 384 96 0.01 sum 981 ATAC:CA3GL6 +982 yangli120 /home/drk/tillage/datasets/mouse/atac/catlas/yangli120/summary/PTGL5.w5 384 96 0.01 sum 982 ATAC:PTGL5 +983 yangli121 /home/drk/tillage/datasets/mouse/atac/catlas/yangli121/summary/PTGL6.w5 384 96 0.01 sum 983 ATAC:PTGL6 +984 yangli122 /home/drk/tillage/datasets/mouse/atac/catlas/yangli122/summary/PTGL7.w5 384 96 0.01 sum 984 ATAC:PTGL7 +985 yangli123 /home/drk/tillage/datasets/mouse/atac/catlas/yangli123/summary/PTGL8.w5 384 96 0.01 sum 985 ATAC:PTGL8 +986 yangli124 /home/drk/tillage/datasets/mouse/atac/catlas/yangli124/summary/PVGA1.w5 384 96 0.01 sum 986 ATAC:PVGA1 +987 yangli125 /home/drk/tillage/datasets/mouse/atac/catlas/yangli125/summary/PVGA2.w5 384 96 0.01 sum 987 ATAC:PVGA2 +988 yangli126 /home/drk/tillage/datasets/mouse/atac/catlas/yangli126/summary/PVGA3.w5 384 96 0.01 sum 988 ATAC:PVGA3 +989 yangli127 /home/drk/tillage/datasets/mouse/atac/catlas/yangli127/summary/PVGA4.w5 384 96 0.01 sum 989 ATAC:PVGA4 +990 yangli128 /home/drk/tillage/datasets/mouse/atac/catlas/yangli128/summary/PVGA5.w5 384 96 0.01 sum 990 ATAC:PVGA5 +991 yangli129 /home/drk/tillage/datasets/mouse/atac/catlas/yangli129/summary/PVGA6.w5 384 96 0.01 sum 991 ATAC:PVGA6 +992 yangli13 /home/drk/tillage/datasets/mouse/atac/catlas/yangli13/summary/CLAGL1.w5 384 96 0.01 sum 992 ATAC:CLAGL1 +993 yangli130 /home/drk/tillage/datasets/mouse/atac/catlas/yangli130/summary/PVGA7.w5 384 96 0.01 sum 993 ATAC:PVGA7 +994 yangli131 /home/drk/tillage/datasets/mouse/atac/catlas/yangli131/summary/PVM.w5 384 96 0.01 sum 994 ATAC:PVM +995 yangli132 /home/drk/tillage/datasets/mouse/atac/catlas/yangli132/summary/RGDG.w5 384 96 0.01 sum 995 ATAC:RGDG +996 yangli133 /home/drk/tillage/datasets/mouse/atac/catlas/yangli133/summary/RGSZ.w5 384 96 0.01 sum 996 ATAC:RGSZ +997 yangli134 /home/drk/tillage/datasets/mouse/atac/catlas/yangli134/summary/SSTGA1.w5 384 96 0.01 sum 997 ATAC:SSTGA1 +998 yangli135 /home/drk/tillage/datasets/mouse/atac/catlas/yangli135/summary/SSTGA10.w5 384 96 0.01 sum 998 ATAC:SSTGA10 +999 yangli136 /home/drk/tillage/datasets/mouse/atac/catlas/yangli136/summary/SSTGA2.w5 384 96 0.01 sum 999 ATAC:SSTGA2 +1000 yangli137 /home/drk/tillage/datasets/mouse/atac/catlas/yangli137/summary/SSTGA3.w5 384 96 0.01 sum 1000 ATAC:SSTGA3 +1001 yangli138 /home/drk/tillage/datasets/mouse/atac/catlas/yangli138/summary/SSTGA4.w5 384 96 0.01 sum 1001 ATAC:SSTGA4 +1002 yangli139 /home/drk/tillage/datasets/mouse/atac/catlas/yangli139/summary/SSTGA5.w5 384 96 0.01 sum 1002 ATAC:SSTGA5 +1003 yangli14 /home/drk/tillage/datasets/mouse/atac/catlas/yangli14/summary/CLAGL2.w5 384 96 0.01 sum 1003 ATAC:CLAGL2 +1004 yangli140 /home/drk/tillage/datasets/mouse/atac/catlas/yangli140/summary/SSTGA6.w5 384 96 0.01 sum 1004 ATAC:SSTGA6 +1005 yangli141 /home/drk/tillage/datasets/mouse/atac/catlas/yangli141/summary/SSTGA7.w5 384 96 0.01 sum 1005 ATAC:SSTGA7 +1006 yangli142 /home/drk/tillage/datasets/mouse/atac/catlas/yangli142/summary/SSTGA8.w5 384 96 0.01 sum 1006 ATAC:SSTGA8 +1007 yangli143 /home/drk/tillage/datasets/mouse/atac/catlas/yangli143/summary/SSTGA9.w5 384 96 0.01 sum 1007 ATAC:SSTGA9 +1008 yangli144 /home/drk/tillage/datasets/mouse/atac/catlas/yangli144/summary/STRGA1.w5 384 96 0.01 sum 1008 ATAC:STRGA1 +1009 yangli145 /home/drk/tillage/datasets/mouse/atac/catlas/yangli145/summary/STRGA2.w5 384 96 0.01 sum 1009 ATAC:STRGA2 +1010 yangli146 /home/drk/tillage/datasets/mouse/atac/catlas/yangli146/summary/STRGA3.w5 384 96 0.01 sum 1010 ATAC:STRGA3 +1011 yangli147 /home/drk/tillage/datasets/mouse/atac/catlas/yangli147/summary/STRGA4.w5 384 96 0.01 sum 1011 ATAC:STRGA4 +1012 yangli148 /home/drk/tillage/datasets/mouse/atac/catlas/yangli148/summary/STRGA5.w5 384 96 0.01 sum 1012 ATAC:STRGA5 +1013 yangli149 /home/drk/tillage/datasets/mouse/atac/catlas/yangli149/summary/STRGA6.w5 384 96 0.01 sum 1013 ATAC:STRGA6 +1014 yangli15 /home/drk/tillage/datasets/mouse/atac/catlas/yangli15/summary/CLAGL3.w5 384 96 0.01 sum 1014 ATAC:CLAGL3 +1015 yangli150 /home/drk/tillage/datasets/mouse/atac/catlas/yangli150/summary/VEC.w5 384 96 0.01 sum 1015 ATAC:VEC +1016 yangli151 /home/drk/tillage/datasets/mouse/atac/catlas/yangli151/summary/VIPGA1.w5 384 96 0.01 sum 1016 ATAC:VIPGA1 +1017 yangli152 /home/drk/tillage/datasets/mouse/atac/catlas/yangli152/summary/VIPGA2.w5 384 96 0.01 sum 1017 ATAC:VIPGA2 +1018 yangli153 /home/drk/tillage/datasets/mouse/atac/catlas/yangli153/summary/VIPGA3.w5 384 96 0.01 sum 1018 ATAC:VIPGA3 +1019 yangli154 /home/drk/tillage/datasets/mouse/atac/catlas/yangli154/summary/VIPGA4.w5 384 96 0.01 sum 1019 ATAC:VIPGA4 +1020 yangli155 /home/drk/tillage/datasets/mouse/atac/catlas/yangli155/summary/VLMC1.w5 384 96 0.01 sum 1020 ATAC:VLMC1 +1021 yangli156 /home/drk/tillage/datasets/mouse/atac/catlas/yangli156/summary/VLMC2.w5 384 96 0.01 sum 1021 ATAC:VLMC2 +1022 yangli157 /home/drk/tillage/datasets/mouse/atac/catlas/yangli157/summary/VLMC3.w5 384 96 0.01 sum 1022 ATAC:VLMC3 +1023 yangli158 /home/drk/tillage/datasets/mouse/atac/catlas/yangli158/summary/VPIA1.w5 384 96 0.01 sum 1023 ATAC:VPIA1 +1024 yangli159 /home/drk/tillage/datasets/mouse/atac/catlas/yangli159/summary/VPIA2.w5 384 96 0.01 sum 1024 ATAC:VPIA2 +1025 yangli16 /home/drk/tillage/datasets/mouse/atac/catlas/yangli16/summary/CNUGA.w5 384 96 0.01 sum 1025 ATAC:CNUGA +1026 yangli160 /home/drk/tillage/datasets/mouse/atac/catlas/yangli160/summary/VPIA3.w5 384 96 0.01 sum 1026 ATAC:VPIA3 +1027 yangli17 /home/drk/tillage/datasets/mouse/atac/catlas/yangli17/summary/COP.w5 384 96 0.01 sum 1027 ATAC:COP +1028 yangli18 /home/drk/tillage/datasets/mouse/atac/catlas/yangli18/summary/CRC.w5 384 96 0.01 sum 1028 ATAC:CRC +1029 yangli19 /home/drk/tillage/datasets/mouse/atac/catlas/yangli19/summary/CTGL1.w5 384 96 0.01 sum 1029 ATAC:CTGL1 +1030 yangli2 /home/drk/tillage/datasets/mouse/atac/catlas/yangli2/summary/ASCN.w5 384 96 0.01 sum 1030 ATAC:ASCN +1031 yangli20 /home/drk/tillage/datasets/mouse/atac/catlas/yangli20/summary/CTGL2.w5 384 96 0.01 sum 1031 ATAC:CTGL2 +1032 yangli21 /home/drk/tillage/datasets/mouse/atac/catlas/yangli21/summary/CTGL3.w5 384 96 0.01 sum 1032 ATAC:CTGL3 +1033 yangli22 /home/drk/tillage/datasets/mouse/atac/catlas/yangli22/summary/CTGL4.w5 384 96 0.01 sum 1033 ATAC:CTGL4 +1034 yangli23 /home/drk/tillage/datasets/mouse/atac/catlas/yangli23/summary/CTGL5.w5 384 96 0.01 sum 1034 ATAC:CTGL5 +1035 yangli24 /home/drk/tillage/datasets/mouse/atac/catlas/yangli24/summary/CTGL6.w5 384 96 0.01 sum 1035 ATAC:CTGL6 +1036 yangli25 /home/drk/tillage/datasets/mouse/atac/catlas/yangli25/summary/D1MSN1.w5 384 96 0.01 sum 1036 ATAC:D1MSN1 +1037 yangli26 /home/drk/tillage/datasets/mouse/atac/catlas/yangli26/summary/D1MSN2.w5 384 96 0.01 sum 1037 ATAC:D1MSN2 +1038 yangli27 /home/drk/tillage/datasets/mouse/atac/catlas/yangli27/summary/D1MSN3.w5 384 96 0.01 sum 1038 ATAC:D1MSN3 +1039 yangli28 /home/drk/tillage/datasets/mouse/atac/catlas/yangli28/summary/D1MSN4.w5 384 96 0.01 sum 1039 ATAC:D1MSN4 +1040 yangli29 /home/drk/tillage/datasets/mouse/atac/catlas/yangli29/summary/D1MSN5.w5 384 96 0.01 sum 1040 ATAC:D1MSN5 +1041 yangli3 /home/drk/tillage/datasets/mouse/atac/catlas/yangli3/summary/ASCW.w5 384 96 0.01 sum 1041 ATAC:ASCW +1042 yangli30 /home/drk/tillage/datasets/mouse/atac/catlas/yangli30/summary/D2MSN1.w5 384 96 0.01 sum 1042 ATAC:D2MSN1 +1043 yangli31 /home/drk/tillage/datasets/mouse/atac/catlas/yangli31/summary/D2MSN2.w5 384 96 0.01 sum 1043 ATAC:D2MSN2 +1044 yangli32 /home/drk/tillage/datasets/mouse/atac/catlas/yangli32/summary/D2MSN3.w5 384 96 0.01 sum 1044 ATAC:D2MSN3 +1045 yangli33 /home/drk/tillage/datasets/mouse/atac/catlas/yangli33/summary/D2MSN4.w5 384 96 0.01 sum 1045 ATAC:D2MSN4 +1046 yangli34 /home/drk/tillage/datasets/mouse/atac/catlas/yangli34/summary/DGGR.w5 384 96 0.01 sum 1046 ATAC:DGGR +1047 yangli35 /home/drk/tillage/datasets/mouse/atac/catlas/yangli35/summary/DGNBL1.w5 384 96 0.01 sum 1047 ATAC:DGNBL1 +1048 yangli36 /home/drk/tillage/datasets/mouse/atac/catlas/yangli36/summary/DGNBL2.w5 384 96 0.01 sum 1048 ATAC:DGNBL2 +1049 yangli37 /home/drk/tillage/datasets/mouse/atac/catlas/yangli37/summary/DGNBL3.w5 384 96 0.01 sum 1049 ATAC:DGNBL3 +1050 yangli38 /home/drk/tillage/datasets/mouse/atac/catlas/yangli38/summary/ITHGL1.w5 384 96 0.01 sum 1050 ATAC:ITHGL1 +1051 yangli39 /home/drk/tillage/datasets/mouse/atac/catlas/yangli39/summary/ITHGL2.w5 384 96 0.01 sum 1051 ATAC:ITHGL2 +1052 yangli4 /home/drk/tillage/datasets/mouse/atac/catlas/yangli4/summary/CA1GL1.w5 384 96 0.01 sum 1052 ATAC:CA1GL1 +1053 yangli40 /home/drk/tillage/datasets/mouse/atac/catlas/yangli40/summary/ITHGL3.w5 384 96 0.01 sum 1053 ATAC:ITHGL3 +1054 yangli41 /home/drk/tillage/datasets/mouse/atac/catlas/yangli41/summary/ITL23GL1.w5 384 96 0.01 sum 1054 ATAC:ITL23GL1 +1055 yangli42 /home/drk/tillage/datasets/mouse/atac/catlas/yangli42/summary/ITL23GL2.w5 384 96 0.01 sum 1055 ATAC:ITL23GL2 +1056 yangli43 /home/drk/tillage/datasets/mouse/atac/catlas/yangli43/summary/ITL23GL3.w5 384 96 0.01 sum 1056 ATAC:ITL23GL3 +1057 yangli44 /home/drk/tillage/datasets/mouse/atac/catlas/yangli44/summary/ITL4GL1.w5 384 96 0.01 sum 1057 ATAC:ITL4GL1 +1058 yangli45 /home/drk/tillage/datasets/mouse/atac/catlas/yangli45/summary/ITL4GL2.w5 384 96 0.01 sum 1058 ATAC:ITL4GL2 +1059 yangli46 /home/drk/tillage/datasets/mouse/atac/catlas/yangli46/summary/ITL5GL1.w5 384 96 0.01 sum 1059 ATAC:ITL5GL1 +1060 yangli47 /home/drk/tillage/datasets/mouse/atac/catlas/yangli47/summary/ITL5GL2.w5 384 96 0.01 sum 1060 ATAC:ITL5GL2 +1061 yangli48 /home/drk/tillage/datasets/mouse/atac/catlas/yangli48/summary/ITL5GL3.w5 384 96 0.01 sum 1061 ATAC:ITL5GL3 +1062 yangli49 /home/drk/tillage/datasets/mouse/atac/catlas/yangli49/summary/ITL6GL1.w5 384 96 0.01 sum 1062 ATAC:ITL6GL1 +1063 yangli5 /home/drk/tillage/datasets/mouse/atac/catlas/yangli5/summary/CA1GL2.w5 384 96 0.01 sum 1063 ATAC:CA1GL2 +1064 yangli50 /home/drk/tillage/datasets/mouse/atac/catlas/yangli50/summary/ITL6GL2.w5 384 96 0.01 sum 1064 ATAC:ITL6GL2 +1065 yangli51 /home/drk/tillage/datasets/mouse/atac/catlas/yangli51/summary/ITL6GL3.w5 384 96 0.01 sum 1065 ATAC:ITL6GL3 +1066 yangli52 /home/drk/tillage/datasets/mouse/atac/catlas/yangli52/summary/ITL6GL4.w5 384 96 0.01 sum 1066 ATAC:ITL6GL4 +1067 yangli53 /home/drk/tillage/datasets/mouse/atac/catlas/yangli53/summary/ITL6GL5.w5 384 96 0.01 sum 1067 ATAC:ITL6GL5 +1068 yangli54 /home/drk/tillage/datasets/mouse/atac/catlas/yangli54/summary/ITL6GL6.w5 384 96 0.01 sum 1068 ATAC:ITL6GL6 +1069 yangli55 /home/drk/tillage/datasets/mouse/atac/catlas/yangli55/summary/L6bGL1.w5 384 96 0.01 sum 1069 ATAC:L6bGL1 +1070 yangli56 /home/drk/tillage/datasets/mouse/atac/catlas/yangli56/summary/L6bGL2.w5 384 96 0.01 sum 1070 ATAC:L6bGL2 +1071 yangli57 /home/drk/tillage/datasets/mouse/atac/catlas/yangli57/summary/L6bGL3.w5 384 96 0.01 sum 1071 ATAC:L6bGL3 +1072 yangli58 /home/drk/tillage/datasets/mouse/atac/catlas/yangli58/summary/L6bGL4.w5 384 96 0.01 sum 1072 ATAC:L6bGL4 +1073 yangli59 /home/drk/tillage/datasets/mouse/atac/catlas/yangli59/summary/L6bGL5.w5 384 96 0.01 sum 1073 ATAC:L6bGL5 +1074 yangli6 /home/drk/tillage/datasets/mouse/atac/catlas/yangli6/summary/CA1GL3.w5 384 96 0.01 sum 1074 ATAC:CA1GL3 +1075 yangli60 /home/drk/tillage/datasets/mouse/atac/catlas/yangli60/summary/L6bGL6.w5 384 96 0.01 sum 1075 ATAC:L6bGL6 +1076 yangli61 /home/drk/tillage/datasets/mouse/atac/catlas/yangli61/summary/LAMGA1.w5 384 96 0.01 sum 1076 ATAC:LAMGA1 +1077 yangli62 /home/drk/tillage/datasets/mouse/atac/catlas/yangli62/summary/LAMGA2.w5 384 96 0.01 sum 1077 ATAC:LAMGA2 +1078 yangli63 /home/drk/tillage/datasets/mouse/atac/catlas/yangli63/summary/LAMGA3.w5 384 96 0.01 sum 1078 ATAC:LAMGA3 +1079 yangli64 /home/drk/tillage/datasets/mouse/atac/catlas/yangli64/summary/LAMGA4.w5 384 96 0.01 sum 1079 ATAC:LAMGA4 +1080 yangli65 /home/drk/tillage/datasets/mouse/atac/catlas/yangli65/summary/LSXGA1.w5 384 96 0.01 sum 1080 ATAC:LSXGA1 +1081 yangli66 /home/drk/tillage/datasets/mouse/atac/catlas/yangli66/summary/LSXGA2.w5 384 96 0.01 sum 1081 ATAC:LSXGA2 +1082 yangli67 /home/drk/tillage/datasets/mouse/atac/catlas/yangli67/summary/LSXGA3.w5 384 96 0.01 sum 1082 ATAC:LSXGA3 +1083 yangli68 /home/drk/tillage/datasets/mouse/atac/catlas/yangli68/summary/LSXGA4.w5 384 96 0.01 sum 1083 ATAC:LSXGA4 +1084 yangli69 /home/drk/tillage/datasets/mouse/atac/catlas/yangli69/summary/LSXGA5.w5 384 96 0.01 sum 1084 ATAC:LSXGA5 +1085 yangli7 /home/drk/tillage/datasets/mouse/atac/catlas/yangli7/summary/CA3GL1.w5 384 96 0.01 sum 1085 ATAC:CA3GL1 +1086 yangli70 /home/drk/tillage/datasets/mouse/atac/catlas/yangli70/summary/LSXGA6.w5 384 96 0.01 sum 1086 ATAC:LSXGA6 +1087 yangli71 /home/drk/tillage/datasets/mouse/atac/catlas/yangli71/summary/LSXGA7.w5 384 96 0.01 sum 1087 ATAC:LSXGA7 +1088 yangli72 /home/drk/tillage/datasets/mouse/atac/catlas/yangli72/summary/MFOL.w5 384 96 0.01 sum 1088 ATAC:MFOL +1089 yangli73 /home/drk/tillage/datasets/mouse/atac/catlas/yangli73/summary/MGL.w5 384 96 0.01 sum 1089 ATAC:MGL +1090 yangli74 /home/drk/tillage/datasets/mouse/atac/catlas/yangli74/summary/MOL.w5 384 96 0.01 sum 1090 ATAC:MOL +1091 yangli75 /home/drk/tillage/datasets/mouse/atac/catlas/yangli75/summary/MSGA1.w5 384 96 0.01 sum 1091 ATAC:MSGA1 +1092 yangli76 /home/drk/tillage/datasets/mouse/atac/catlas/yangli76/summary/MSGA10.w5 384 96 0.01 sum 1092 ATAC:MSGA10 +1093 yangli77 /home/drk/tillage/datasets/mouse/atac/catlas/yangli77/summary/MSGA11.w5 384 96 0.01 sum 1093 ATAC:MSGA11 +1094 yangli78 /home/drk/tillage/datasets/mouse/atac/catlas/yangli78/summary/MSGA12.w5 384 96 0.01 sum 1094 ATAC:MSGA12 +1095 yangli79 /home/drk/tillage/datasets/mouse/atac/catlas/yangli79/summary/MSGA13.w5 384 96 0.01 sum 1095 ATAC:MSGA13 +1096 yangli8 /home/drk/tillage/datasets/mouse/atac/catlas/yangli8/summary/CA3GL2.w5 384 96 0.01 sum 1096 ATAC:CA3GL2 +1097 yangli80 /home/drk/tillage/datasets/mouse/atac/catlas/yangli80/summary/MSGA2.w5 384 96 0.01 sum 1097 ATAC:MSGA2 +1098 yangli81 /home/drk/tillage/datasets/mouse/atac/catlas/yangli81/summary/MSGA3.w5 384 96 0.01 sum 1098 ATAC:MSGA3 +1099 yangli82 /home/drk/tillage/datasets/mouse/atac/catlas/yangli82/summary/MSGA4.w5 384 96 0.01 sum 1099 ATAC:MSGA4 +1100 yangli83 /home/drk/tillage/datasets/mouse/atac/catlas/yangli83/summary/MSGA5.w5 384 96 0.01 sum 1100 ATAC:MSGA5 +1101 yangli84 /home/drk/tillage/datasets/mouse/atac/catlas/yangli84/summary/MSGA6.w5 384 96 0.01 sum 1101 ATAC:MSGA6 +1102 yangli85 /home/drk/tillage/datasets/mouse/atac/catlas/yangli85/summary/MSGA7.w5 384 96 0.01 sum 1102 ATAC:MSGA7 +1103 yangli86 /home/drk/tillage/datasets/mouse/atac/catlas/yangli86/summary/MSGA8.w5 384 96 0.01 sum 1103 ATAC:MSGA8 +1104 yangli87 /home/drk/tillage/datasets/mouse/atac/catlas/yangli87/summary/MSGA9.w5 384 96 0.01 sum 1104 ATAC:MSGA9 +1105 yangli88 /home/drk/tillage/datasets/mouse/atac/catlas/yangli88/summary/MXD.w5 384 96 0.01 sum 1105 ATAC:MXD +1106 yangli89 /home/drk/tillage/datasets/mouse/atac/catlas/yangli89/summary/NFOL.w5 384 96 0.01 sum 1106 ATAC:NFOL +1107 yangli9 /home/drk/tillage/datasets/mouse/atac/catlas/yangli9/summary/CA3GL3.w5 384 96 0.01 sum 1107 ATAC:CA3GL3 +1108 yangli90 /home/drk/tillage/datasets/mouse/atac/catlas/yangli90/summary/NIPC.w5 384 96 0.01 sum 1108 ATAC:NIPC +1109 yangli91 /home/drk/tillage/datasets/mouse/atac/catlas/yangli91/summary/NPGL1.w5 384 96 0.01 sum 1109 ATAC:NPGL1 +1110 yangli92 /home/drk/tillage/datasets/mouse/atac/catlas/yangli92/summary/NPGL2.w5 384 96 0.01 sum 1110 ATAC:NPGL2 +1111 yangli93 /home/drk/tillage/datasets/mouse/atac/catlas/yangli93/summary/NPGL3.w5 384 96 0.01 sum 1111 ATAC:NPGL3 +1112 yangli94 /home/drk/tillage/datasets/mouse/atac/catlas/yangli94/summary/NPGL4.w5 384 96 0.01 sum 1112 ATAC:NPGL4 +1113 yangli95 /home/drk/tillage/datasets/mouse/atac/catlas/yangli95/summary/NPGL5.w5 384 96 0.01 sum 1113 ATAC:NPGL5 +1114 yangli96 /home/drk/tillage/datasets/mouse/atac/catlas/yangli96/summary/OBDOP1.w5 384 96 0.01 sum 1114 ATAC:OBDOP1 +1115 yangli97 /home/drk/tillage/datasets/mouse/atac/catlas/yangli97/summary/OBDOP2.w5 384 96 0.01 sum 1115 ATAC:OBDOP2 +1116 yangli98 /home/drk/tillage/datasets/mouse/atac/catlas/yangli98/summary/OBGA1.w5 384 96 0.01 sum 1116 ATAC:OBGA1 +1117 yangli99 /home/drk/tillage/datasets/mouse/atac/catlas/yangli99/summary/OBGA2.w5 384 96 0.01 sum 1117 ATAC:OBGA2 +1118 ENCFF426ODS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ADI/summary/ENCFF426ODS.w5 128 32 2.0 mean 1118 CHIP:H3K4me3:B10.H-2aH-4bp/Wts CH12.LX +1119 ENCFF993SYN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ADK/summary/ENCFF993SYN.w5 128 32 2.0 mean 1119 CHIP:H3K4me1:129 ES-E14 +1120 ENCFF240MDV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ADL/summary/ENCFF240MDV.w5 128 32 2.0 mean 1120 CHIP:H3K4me3:129 ES-E14 +1121 ENCFF325GQU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ADM/summary/ENCFF325GQU.w5 128 32 2.0 mean 1121 CHIP:H3K9me3:129 ES-E14 +1122 ENCFF409ARU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ADP/summary/ENCFF409ARU.w5 128 32 2.0 mean 1122 CHIP:H3K27me3:DBA/2 MEL cell line +1123 ENCFF070ZCF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ADQ/summary/ENCFF070ZCF.w5 128 32 2.0 mean 1123 CHIP:H3K4me1:DBA/2 MEL cell line +1124 ENCFF583MBD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ADR/summary/ENCFF583MBD.w5 128 32 2.0 mean 1124 CHIP:H3K4me3:DBA/2 MEL cell line +1125 ENCFF089WYW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ADT/summary/ENCFF089WYW.w5 128 32 2.0 mean 1125 CHIP:H3K9me3:DBA/2 MEL cell line +1126 ENCFF268EAZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ADW/summary/ENCFF268EAZ.w5 128 32 2.0 mean 1126 CHIP:H3K27me3:DBA/2 MEL cell line treated with 2% dimethyl sulfoxide for 5 days +1127 ENCFF981EVL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ADX/summary/ENCFF981EVL.w5 128 32 2.0 mean 1127 CHIP:H3K36me3:DBA/2 MEL cell line treated with 2% dimethyl sulfoxide for 5 days +1128 ENCFF335VHL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ADY/summary/ENCFF335VHL.w5 128 32 2.0 mean 1128 CHIP:H3K4me1:DBA/2 MEL cell line treated with 2% dimethyl sulfoxide for 5 days +1129 ENCFF206DMH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AEA/summary/ENCFF206DMH.w5 128 32 2.0 mean 1129 CHIP:H3K4me3:DBA/2 MEL cell line treated with 2% dimethyl sulfoxide for 5 days +1130 ENCFF393FAV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AEB/summary/ENCFF393FAV.w5 128 32 2.0 mean 1130 CHIP:H3K9me3:DBA/2 MEL cell line treated with 2% dimethyl sulfoxide for 5 days +1131 ENCFF517JPC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AHL/summary/ENCFF517JPC.w5 128 32 2.0 mean 1131 CHIP:H3K36me3:C3H C2C12 +1132 ENCFF488BZX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AHM/summary/ENCFF488BZX.w5 128 32 2.0 mean 1132 CHIP:H3K27me3:C3H myocyte originated from C2C12 +1133 ENCFF388RKS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AHN/summary/ENCFF388RKS.w5 128 32 2.0 mean 1133 CHIP:H3K79me2:C3H myocyte originated from C2C12 +1134 ENCFF436ZSV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AHO/summary/ENCFF436ZSV.w5 128 32 2.0 mean 1134 CHIP:H3K4me3:C3H C2C12 +1135 ENCFF696NHF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AHP/summary/ENCFF696NHF.w5 128 32 2.0 mean 1135 CHIP:H3ac:C3H C2C12 +1136 ENCFF442ENM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AHQ/summary/ENCFF442ENM.w5 128 32 2.0 mean 1136 CHIP:H3K79me2:C3H C2C12 +1137 ENCFF962JNF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AHR/summary/ENCFF962JNF.w5 128 32 2.0 mean 1137 CHIP:H3K27me3:C3H C2C12 +1138 ENCFF245DME /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AHS/summary/ENCFF245DME.w5 128 32 2.0 mean 1138 CHIP:H3K4me2:C3H myocyte originated from C2C12 +1139 ENCFF518CXM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AHT/summary/ENCFF518CXM.w5 128 32 2.0 mean 1139 CHIP:H3K4me3:C3H myocyte originated from C2C12 +1140 ENCFF222EBN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AHU/summary/ENCFF222EBN.w5 128 32 2.0 mean 1140 CHIP:H3ac:C3H myocyte originated from C2C12 +1141 ENCFF046UQY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AHV/summary/ENCFF046UQY.w5 128 32 2.0 mean 1141 CHIP:H3K36me3:C3H myocyte originated from C2C12 +1142 ENCFF921JTJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AHW/summary/ENCFF921JTJ.w5 128 32 2.0 mean 1142 CHIP:H3K79me3:C3H C2C12 +1143 ENCFF096FAR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIB/summary/ENCFF096FAR.w5 128 32 2.0 mean 1143 CHIP:CEBPB:C3H myocyte originated from C2C12 +1144 ENCFF059BXG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIC/summary/ENCFF059BXG.w5 128 32 2.0 mean 1144 CHIP:MYOG:C3H myocyte originated from C2C12 +1145 ENCFF891VZV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AID/summary/ENCFF891VZV.w5 128 32 2.0 mean 1145 CHIP:MYOG:C3H myocyte originated from C2C12 +1146 ENCFF170WKG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIE/summary/ENCFF170WKG.w5 128 32 2.0 mean 1146 CHIP:USF1:C3H myocyte originated from C2C12 +1147 ENCFF462ONT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIG/summary/ENCFF462ONT.w5 128 32 2.0 mean 1147 CHIP:MYOD1:C3H C2C12 +1148 ENCFF779WLW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIH/summary/ENCFF779WLW.w5 128 32 2.0 mean 1148 CHIP:MYOD1:C3H myocyte originated from C2C12 +1149 ENCFF528UTW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AII/summary/ENCFF528UTW.w5 128 32 2.0 mean 1149 CHIP:E2F4:C3H myocyte originated from C2C12 +1150 ENCFF583QSM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIJ/summary/ENCFF583QSM.w5 128 32 2.0 mean 1150 CHIP:CTCF:C3H C2C12 +1151 ENCFF595PRV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIL/summary/ENCFF595PRV.w5 128 32 2.0 mean 1151 CHIP:MAX:C3H myocyte originated from C2C12 +1152 ENCFF552PFK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIM/summary/ENCFF552PFK.w5 128 32 2.0 mean 1152 CHIP:SRF:C3H myocyte originated from C2C12 +1153 ENCFF247PFV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIN/summary/ENCFF247PFV.w5 128 32 2.0 mean 1153 CHIP:TCF12:C3H myocyte originated from C2C12 +1154 ENCFF862HLX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIO/summary/ENCFF862HLX.w5 128 32 2.0 mean 1154 CHIP:MAX:C3H C2C12 +1155 ENCFF565MVR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIP/summary/ENCFF565MVR.w5 128 32 2.0 mean 1155 CHIP:TCF3:C3H myocyte originated from C2C12 +1156 ENCFF602DCT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIQ/summary/ENCFF602DCT.w5 128 32 2.0 mean 1156 CHIP:USF1:C3H C2C12 +1157 ENCFF539PGU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIR/summary/ENCFF539PGU.w5 128 32 2.0 mean 1157 CHIP:MYOD1:C3H myocyte originated from C2C12 +1158 ENCFF967HVW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIS/summary/ENCFF967HVW.w5 128 32 2.0 mean 1158 CHIP:REST:C3H myocyte originated from C2C12 +1159 ENCFF122ZAR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIT/summary/ENCFF122ZAR.w5 128 32 2.0 mean 1159 CHIP:MYOD1:C3H myocyte originated from C2C12 +1160 ENCFF323UQU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIU/summary/ENCFF323UQU.w5 128 32 2.0 mean 1160 CHIP:POLR2AphosphoS2:C3H myocyte originated from C2C12 +1161 ENCFF674URL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIV/summary/ENCFF674URL.w5 128 32 2.0 mean 1161 CHIP:CEBPB:C3H C2C12 +1162 ENCFF140UCV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIW/summary/ENCFF140UCV.w5 128 32 2.0 mean 1162 CHIP:MYOG:C3H myocyte originated from C2C12 +1163 ENCFF680DPI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000AIX/summary/ENCFF680DPI.w5 128 32 2.0 mean 1163 CHIP:POLR2A:C3H myocyte originated from C2C12 +1164 ENCFF021EDM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAE/summary/ENCFF021EDM.w5 128 32 2.0 mean 1164 CHIP:H3K4me1:C57BL/6 heart male adult (8 weeks) +1165 ENCFF192PPW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAF/summary/ENCFF192PPW.w5 128 32 2.0 mean 1165 CHIP:H3K4me1:C57BL/6 kidney male adult (8 weeks) +1166 ENCFF518BJU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAG/summary/ENCFF518BJU.w5 128 32 2.0 mean 1166 CHIP:H3K4me1:C57BL/6 bone marrow male adult (8 weeks) +1167 ENCFF620KTZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAH/summary/ENCFF620KTZ.w5 128 32 2.0 mean 1167 CHIP:H3K4me3:C57BL/6 bone marrow male adult (8 weeks) +1168 ENCFF815CFZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAI/summary/ENCFF815CFZ.w5 128 32 2.0 mean 1168 CHIP:H3K4me1:C57BL/6 cortical plate male adult (8 weeks) +1169 ENCFF288SAJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAK/summary/ENCFF288SAJ.w5 128 32 2.0 mean 1169 CHIP:H3K4me3:C57BL/6 cerebellum male adult (8 weeks) +1170 ENCFF827AKI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAL/summary/ENCFF827AKI.w5 128 32 2.0 mean 1170 CHIP:H3K4me1:C57BL/6 cerebellum male adult (8 weeks) +1171 ENCFF118SXS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAM/summary/ENCFF118SXS.w5 128 32 2.0 mean 1171 CHIP:H3K4me3:C57BL/6 heart male adult (8 weeks) +1172 ENCFF177YGH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAN/summary/ENCFF177YGH.w5 128 32 2.0 mean 1172 CHIP:H3K4me3:C57BL/6 kidney male adult (8 weeks) +1173 ENCFF881VZQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAO/summary/ENCFF881VZQ.w5 128 32 2.0 mean 1173 CHIP:H3K4me1:C57BL/6 liver male adult (8 weeks) +1174 ENCFF753IXB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAP/summary/ENCFF753IXB.w5 128 32 2.0 mean 1174 CHIP:H3K4me3:C57BL/6 liver male adult (8 weeks) +1175 ENCFF206LES /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAQ/summary/ENCFF206LES.w5 128 32 2.0 mean 1175 CHIP:H3K4me1:C57BL/6 lung male adult (8 weeks) +1176 ENCFF353PNU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CAZ/summary/ENCFF353PNU.w5 128 32 2.0 mean 1176 CHIP:H3K4me1:C57BL/6 embryonic fibroblast male embryo (13.5 weeks) +1177 ENCFF379VFT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBA/summary/ENCFF379VFT.w5 128 32 2.0 mean 1177 CHIP:H3K4me3:C57BL/6 embryonic fibroblast male embryo (13.5 weeks) +1178 ENCFF202RFR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBC/summary/ENCFF202RFR.w5 128 32 2.0 mean 1178 CHIP:H3K4me1:C57BL/6 spleen male adult (8 weeks) +1179 ENCFF295HDA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBF/summary/ENCFF295HDA.w5 128 32 2.0 mean 1179 CHIP:H3K4me1:C57BL/6 ES-Bruce4 +1180 ENCFF363JJV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBG/summary/ENCFF363JJV.w5 128 32 2.0 mean 1180 CHIP:H3K4me3:C57BL/6 ES-Bruce4 +1181 ENCFF639AFA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBI/summary/ENCFF639AFA.w5 128 32 2.0 mean 1181 CHIP:CTCF:C57BL/6 heart male adult (8 weeks) +1182 ENCFF164ZFD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBJ/summary/ENCFF164ZFD.w5 128 32 2.0 mean 1182 CHIP:CTCF:C57BL/6 kidney male adult (8 weeks) +1183 ENCFF916PLK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBK/summary/ENCFF916PLK.w5 128 32 2.0 mean 1183 CHIP:POLR2A:C57BL/6 kidney male adult (8 weeks) +1184 ENCFF156BYW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBL/summary/ENCFF156BYW.w5 128 32 2.0 mean 1184 CHIP:CTCF:C57BL/6 bone marrow male adult (8 weeks) +1185 ENCFF595EFE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBM/summary/ENCFF595EFE.w5 128 32 2.0 mean 1185 CHIP:POLR2A:C57BL/6 bone marrow male adult (8 weeks) +1186 ENCFF867MOQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBN/summary/ENCFF867MOQ.w5 128 32 2.0 mean 1186 CHIP:CTCF:C57BL/6 cerebellum male adult (8 weeks) +1187 ENCFF625PME /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBQ/summary/ENCFF625PME.w5 128 32 2.0 mean 1187 CHIP:POLR2A:C57BL/6 cerebellum male adult (8 weeks) +1188 ENCFF868AOT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBS/summary/ENCFF868AOT.w5 128 32 2.0 mean 1188 CHIP:POLR2A:C57BL/6 heart male adult (8 weeks) +1189 ENCFF919KIP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBU/summary/ENCFF919KIP.w5 128 32 2.0 mean 1189 CHIP:CTCF:C57BL/6 liver male adult (8 weeks) +1190 ENCFF538KBR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBV/summary/ENCFF538KBR.w5 128 32 2.0 mean 1190 CHIP:CTCF:C57BL/6 lung male adult (8 weeks) +1191 ENCFF432JCZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBW/summary/ENCFF432JCZ.w5 128 32 2.0 mean 1191 CHIP:CTCF:C57BL/6 embryonic fibroblast male embryo (13.5 weeks) +1192 ENCFF378CWR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBX/summary/ENCFF378CWR.w5 128 32 2.0 mean 1192 CHIP:POLR2A:C57BL/6 embryonic fibroblast male embryo (13.5 weeks) +1193 ENCFF376VCU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CBZ/summary/ENCFF376VCU.w5 128 32 2.0 mean 1193 CHIP:POLR2A:C57BL/6 spleen male adult (8 weeks) +1194 ENCFF852EDW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCA/summary/ENCFF852EDW.w5 128 32 2.0 mean 1194 CHIP:EP300:C57BL/6 heart male adult (8 weeks) +1195 ENCFF339GSH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCB/summary/ENCFF339GSH.w5 128 32 2.0 mean 1195 CHIP:CTCF:C57BL/6 ES-Bruce4 +1196 ENCFF040HIS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCC/summary/ENCFF040HIS.w5 128 32 2.0 mean 1196 CHIP:POLR2A:C57BL/6 ES-Bruce4 +1197 ENCFF320LZI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCD/summary/ENCFF320LZI.w5 128 32 2.0 mean 1197 CHIP:EP300:C57BL/6 ES-Bruce4 +1198 ENCFF669DNL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCE/summary/ENCFF669DNL.w5 128 32 2.0 mean 1198 CHIP:H3K27ac:C57BL/6 olfactory bulb male adult (8 weeks) +1199 ENCFF733KTU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCF/summary/ENCFF733KTU.w5 128 32 2.0 mean 1199 CHIP:H3K4me1:C57BL/6 olfactory bulb male adult (8 weeks) +1200 ENCFF354DWX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCH/summary/ENCFF354DWX.w5 128 32 2.0 mean 1200 CHIP:H3K27ac:C57BL/6 thymus male adult (8 weeks) +1201 ENCFF666XCJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCI/summary/ENCFF666XCJ.w5 128 32 2.0 mean 1201 CHIP:H3K4me1:C57BL/6 thymus male adult (8 weeks) +1202 ENCFF810PIW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCJ/summary/ENCFF810PIW.w5 128 32 2.0 mean 1202 CHIP:H3K4me3:C57BL/6 thymus male adult (8 weeks) +1203 ENCFF692OFZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCL/summary/ENCFF692OFZ.w5 128 32 2.0 mean 1203 CHIP:H3K27ac:C57BL/6 bone marrow male adult (8 weeks) +1204 ENCFF212SMS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCQ/summary/ENCFF212SMS.w5 128 32 2.0 mean 1204 CHIP:H3K27ac:C57BL/6 small intestine male adult (8 weeks) +1205 ENCFF159RUF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCR/summary/ENCFF159RUF.w5 128 32 2.0 mean 1205 CHIP:H3K4me1:C57BL/6 small intestine male adult (8 weeks) +1206 ENCFF032JSS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCS/summary/ENCFF032JSS.w5 128 32 2.0 mean 1206 CHIP:H3K4me3:C57BL/6 small intestine male adult (8 weeks) +1207 ENCFF572JFH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCU/summary/ENCFF572JFH.w5 128 32 2.0 mean 1207 CHIP:H3K27ac:C57BL/6 testis male adult (8 weeks) +1208 ENCFF073HRX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCV/summary/ENCFF073HRX.w5 128 32 2.0 mean 1208 CHIP:H3K4me1:C57BL/6 testis male adult (8 weeks) +1209 ENCFF147BMC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CCW/summary/ENCFF147BMC.w5 128 32 2.0 mean 1209 CHIP:H3K4me3:C57BL/6 testis male adult (8 weeks) +1210 ENCFF234IJR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDC/summary/ENCFF234IJR.w5 128 32 2.0 mean 1210 CHIP:H3K27ac:C57BL/6 cerebellum male adult (8 weeks) +1211 ENCFF619CVT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDD/summary/ENCFF619CVT.w5 128 32 2.0 mean 1211 CHIP:H3K27ac:C57BL/6 cortical plate male adult (8 weeks) +1212 ENCFF124CBV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDE/summary/ENCFF124CBV.w5 128 32 2.0 mean 1212 CHIP:H3K27ac:C57BL/6 ES-Bruce4 +1213 ENCFF691YDA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDF/summary/ENCFF691YDA.w5 128 32 2.0 mean 1213 CHIP:H3K27ac:C57BL/6 heart male adult (8 weeks) +1214 ENCFF188DKO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDG/summary/ENCFF188DKO.w5 128 32 2.0 mean 1214 CHIP:H3K27ac:C57BL/6 kidney male adult (8 weeks) +1215 ENCFF632VNQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDH/summary/ENCFF632VNQ.w5 128 32 2.0 mean 1215 CHIP:H3K27ac:C57BL/6 liver male adult (8 weeks) +1216 ENCFF387JGJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDI/summary/ENCFF387JGJ.w5 128 32 2.0 mean 1216 CHIP:H3K27ac:C57BL/6 embryonic fibroblast male embryo (13.5 weeks) +1217 ENCFF470WIY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDJ/summary/ENCFF470WIY.w5 128 32 2.0 mean 1217 CHIP:H3K27ac:C57BL/6 spleen male adult (8 weeks) +1218 ENCFF034YQZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDK/summary/ENCFF034YQZ.w5 128 32 2.0 mean 1218 CHIP:H3K27ac:C57BL/6 heart embryo (14.5 days) +1219 ENCFF415BLI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDL/summary/ENCFF415BLI.w5 128 32 2.0 mean 1219 CHIP:H3K4me1:C57BL/6 heart embryo (14.5 days) +1220 ENCFF227BHG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDM/summary/ENCFF227BHG.w5 128 32 2.0 mean 1220 CHIP:H3K4me3:C57BL/6 heart embryo (14.5 days) +1221 ENCFF924IWW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDO/summary/ENCFF924IWW.w5 128 32 2.0 mean 1221 CHIP:H3K27ac:C57BL/6 placenta female adult (8 weeks) +1222 ENCFF844ZFQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDP/summary/ENCFF844ZFQ.w5 128 32 2.0 mean 1222 CHIP:H3K4me1:C57BL/6 placenta female adult (8 weeks) +1223 ENCFF235ZAJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDQ/summary/ENCFF235ZAJ.w5 128 32 2.0 mean 1223 CHIP:H3K4me3:C57BL/6 placenta female adult (8 weeks) +1224 ENCFF069XTE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDS/summary/ENCFF069XTE.w5 128 32 2.0 mean 1224 CHIP:H3K4me3:C57BL/6 olfactory bulb male adult (8 weeks) +1225 ENCFF506KCT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDU/summary/ENCFF506KCT.w5 128 32 2.0 mean 1225 CHIP:H3K27ac:C57BL/6 liver embryo (14.5 days) +1226 ENCFF309BER /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDV/summary/ENCFF309BER.w5 128 32 2.0 mean 1226 CHIP:H3K4me3:C57BL/6 liver embryo (14.5 days) +1227 ENCFF850QFU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDW/summary/ENCFF850QFU.w5 128 32 2.0 mean 1227 CHIP:H3K4me1:C57BL/6 liver embryo (14.5 days) +1228 ENCFF193PLE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CDY/summary/ENCFF193PLE.w5 128 32 2.0 mean 1228 CHIP:POLR2A:C57BL/6 olfactory bulb male adult (8 weeks) +1229 ENCFF820TWT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEE/summary/ENCFF820TWT.w5 128 32 2.0 mean 1229 CHIP:POLR2A:C57BL/6 small intestine male adult (8 weeks) +1230 ENCFF946MNK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEF/summary/ENCFF946MNK.w5 128 32 2.0 mean 1230 CHIP:CTCF:C57BL/6 testis male adult (8 weeks) +1231 ENCFF333YCZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEG/summary/ENCFF333YCZ.w5 128 32 2.0 mean 1231 CHIP:POLR2A:C57BL/6 testis male adult (8 weeks) +1232 ENCFF344SFV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEJ/summary/ENCFF344SFV.w5 128 32 2.0 mean 1232 CHIP:H3K27me3:C57BL/6 heart male adult (8 weeks) +1233 ENCFF865YFU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEK/summary/ENCFF865YFU.w5 128 32 2.0 mean 1233 CHIP:H3K36me3:C57BL/6 heart male adult (8 weeks) +1234 ENCFF042INO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEL/summary/ENCFF042INO.w5 128 32 2.0 mean 1234 CHIP:H3K79me2:C57BL/6 heart male adult (8 weeks) +1235 ENCFF421ODL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEM/summary/ENCFF421ODL.w5 128 32 2.0 mean 1235 CHIP:H3K9ac:C57BL/6 heart male adult (8 weeks) +1236 ENCFF172GJQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEN/summary/ENCFF172GJQ.w5 128 32 2.0 mean 1236 CHIP:H3K27me3:C57BL/6 liver male adult (8 weeks) +1237 ENCFF093JJJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEO/summary/ENCFF093JJJ.w5 128 32 2.0 mean 1237 CHIP:H3K36me3:C57BL/6 liver male adult (8 weeks) +1238 ENCFF157PQZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEP/summary/ENCFF157PQZ.w5 128 32 2.0 mean 1238 CHIP:H3K79me2:C57BL/6 liver male adult (8 weeks) +1239 ENCFF265TCP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEQ/summary/ENCFF265TCP.w5 128 32 2.0 mean 1239 CHIP:H3K9ac:C57BL/6 liver male adult (8 weeks) +1240 ENCFF029TZQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CER/summary/ENCFF029TZQ.w5 128 32 2.0 mean 1240 CHIP:H3K27me3:DBA/2 MEL cell line +1241 ENCFF207HCQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CES/summary/ENCFF207HCQ.w5 128 32 2.0 mean 1241 CHIP:H3K36me3:DBA/2 MEL cell line +1242 ENCFF279GJP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CET/summary/ENCFF279GJP.w5 128 32 2.0 mean 1242 CHIP:H3K79me2:DBA/2 MEL cell line +1243 ENCFF603CPU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEU/summary/ENCFF603CPU.w5 128 32 2.0 mean 1243 CHIP:H3K9ac:DBA/2 MEL cell line +1244 ENCFF414TDD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEV/summary/ENCFF414TDD.w5 128 32 2.0 mean 1244 CHIP:H3K27ac:DBA/2 MEL cell line +1245 ENCFF488XKH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEW/summary/ENCFF488XKH.w5 128 32 2.0 mean 1245 CHIP:H3K4me1:DBA/2 MEL cell line +1246 ENCFF496UKK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEX/summary/ENCFF496UKK.w5 128 32 2.0 mean 1246 CHIP:H3K4me3:DBA/2 MEL cell line +1247 ENCFF012PBY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CEZ/summary/ENCFF012PBY.w5 128 32 2.0 mean 1247 CHIP:H3K27ac:C57BL/6 brown adipose tissue male adult (24 weeks) +1248 ENCFF494KRZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFA/summary/ENCFF494KRZ.w5 128 32 2.0 mean 1248 CHIP:H3K4me1:C57BL/6 brown adipose tissue male adult (24 weeks) +1249 ENCFF966UOI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFB/summary/ENCFF966UOI.w5 128 32 2.0 mean 1249 CHIP:H3K4me3:C57BL/6 brown adipose tissue male adult (24 weeks) +1250 ENCFF668SMR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFD/summary/ENCFF668SMR.w5 128 32 2.0 mean 1250 CHIP:H3K27ac:C57BL/6 bone marrow macrophage male adult (8 weeks) +1251 ENCFF167PJA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFE/summary/ENCFF167PJA.w5 128 32 2.0 mean 1251 CHIP:H3K4me1:C57BL/6 bone marrow macrophage male adult (8 weeks) +1252 ENCFF186XGM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFF/summary/ENCFF186XGM.w5 128 32 2.0 mean 1252 CHIP:H3K4me3:C57BL/6 bone marrow macrophage male adult (8 weeks) +1253 ENCFF771PZS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFH/summary/ENCFF771PZS.w5 128 32 2.0 mean 1253 CHIP:CTCF:DBA/2 MEL cell line +1254 ENCFF218UAZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFI/summary/ENCFF218UAZ.w5 128 32 2.0 mean 1254 CHIP:POLR2A:DBA/2 MEL cell line +1255 ENCFF826OQW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFJ/summary/ENCFF826OQW.w5 128 32 2.0 mean 1255 CHIP:CTCF:C57BL/6 bone marrow macrophage male adult (8 weeks) +1256 ENCFF575UNY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFK/summary/ENCFF575UNY.w5 128 32 2.0 mean 1256 CHIP:POLR2A:C57BL/6 bone marrow macrophage male adult (8 weeks) +1257 ENCFF863PEU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFL/summary/ENCFF863PEU.w5 128 32 2.0 mean 1257 CHIP:H3K36me3:B10.H-2aH-4bp/Wts CH12.LX +1258 ENCFF614WMD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFM/summary/ENCFF614WMD.w5 128 32 2.0 mean 1258 CHIP:H3K27me3:C57BL/6 cerebellum male adult (8 weeks) +1259 ENCFF316WWL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFN/summary/ENCFF316WWL.w5 128 32 2.0 mean 1259 CHIP:H3K27me3:C57BL/6 ES-Bruce4 +1260 ENCFF109YST /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFO/summary/ENCFF109YST.w5 128 32 2.0 mean 1260 CHIP:H3K36me3:C57BL/6 ES-Bruce4 +1261 ENCFF324KCF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFP/summary/ENCFF324KCF.w5 128 32 2.0 mean 1261 CHIP:H3K27me3:C57BL/6 kidney male adult (8 weeks) +1262 ENCFF175AIB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFQ/summary/ENCFF175AIB.w5 128 32 2.0 mean 1262 CHIP:H3K36me3:C57BL/6 kidney male adult (8 weeks) +1263 ENCFF906CRU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFR/summary/ENCFF906CRU.w5 128 32 2.0 mean 1263 CHIP:H3K27me3:C57BL/6 small intestine male adult (8 weeks) +1264 ENCFF204STH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFS/summary/ENCFF204STH.w5 128 32 2.0 mean 1264 CHIP:H3K36me3:C57BL/6 small intestine male adult (8 weeks) +1265 ENCFF911TSZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFT/summary/ENCFF911TSZ.w5 128 32 2.0 mean 1265 CHIP:H3K36me3:C57BL/6 spleen male adult (8 weeks) +1266 ENCFF974RAM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFU/summary/ENCFF974RAM.w5 128 32 2.0 mean 1266 CHIP:H3K36me3:C57BL/6 testis male adult (8 weeks) +1267 ENCFF287DIJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFV/summary/ENCFF287DIJ.w5 128 32 2.0 mean 1267 CHIP:H3K36me3:C57BL/6 thymus male adult (8 weeks) +1268 ENCFF655NSX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFX/summary/ENCFF655NSX.w5 128 32 2.0 mean 1268 CHIP:H3K27me3:C57BL/6 splenic B cell male adult (8 weeks) +1269 ENCFF198YXO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFY/summary/ENCFF198YXO.w5 128 32 2.0 mean 1269 CHIP:H3K36me3:C57BL/6 splenic B cell male adult (8 weeks) +1270 ENCFF857TIJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CFZ/summary/ENCFF857TIJ.w5 128 32 2.0 mean 1270 CHIP:H3K9me3:C57BL/6 ES-Bruce4 +1271 ENCFF576XZZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGA/summary/ENCFF576XZZ.w5 128 32 2.0 mean 1271 CHIP:H3K27me3:C57BL/6 spleen male adult (8 weeks) +1272 ENCFF443EON /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGB/summary/ENCFF443EON.w5 128 32 2.0 mean 1272 CHIP:H3K27me3:C57BL/6 testis male adult (8 weeks) +1273 ENCFF476YTS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGC/summary/ENCFF476YTS.w5 128 32 2.0 mean 1273 CHIP:H3K27me3:C57BL/6 thymus male adult (8 weeks) +1274 ENCFF819RQI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGH/summary/ENCFF819RQI.w5 128 32 2.0 mean 1274 CHIP:H3K4me1:B10.H-2aH-4bp/Wts CH12.LX +1275 ENCFF122GDX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGI/summary/ENCFF122GDX.w5 128 32 2.0 mean 1275 CHIP:H3K4me2:B10.H-2aH-4bp/Wts CH12.LX +1276 ENCFF867PYD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGJ/summary/ENCFF867PYD.w5 128 32 2.0 mean 1276 CHIP:H3K27ac:B10.H-2aH-4bp/Wts CH12.LX +1277 ENCFF500BXF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGK/summary/ENCFF500BXF.w5 128 32 2.0 mean 1277 CHIP:H3K4me3:B10.H-2aH-4bp/Wts CH12.LX +1278 ENCFF250LUT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGL/summary/ENCFF250LUT.w5 128 32 2.0 mean 1278 CHIP:H3K9ac:B10.H-2aH-4bp/Wts CH12.LX +1279 ENCFF840JAO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGM/summary/ENCFF840JAO.w5 128 32 2.0 mean 1279 CHIP:H3K79me2:B10.H-2aH-4bp/Wts CH12.LX +1280 ENCFF899GLT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGN/summary/ENCFF899GLT.w5 128 32 2.0 mean 1280 CHIP:H3K4me1:129 ES-E14 +1281 ENCFF857GJE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGO/summary/ENCFF857GJE.w5 128 32 2.0 mean 1281 CHIP:H3K4me3:129 ES-E14 +1282 ENCFF809IES /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGP/summary/ENCFF809IES.w5 128 32 2.0 mean 1282 CHIP:H3K9ac:129 ES-E14 +1283 ENCFF163HEV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGQ/summary/ENCFF163HEV.w5 128 32 2.0 mean 1283 CHIP:H3K27ac:129 ES-E14 +1284 ENCFF111CIF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGR/summary/ENCFF111CIF.w5 128 32 2.0 mean 1284 CHIP:H3K36me3:129 ES-E14 +1285 ENCFF609ZSZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000CGS/summary/ENCFF609ZSZ.w5 128 32 2.0 mean 1285 CHIP:H3K9ac:C57BL/6 ES-Bruce4 +1286 ENCFF460EAQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHD/summary/ENCFF460EAQ.w5 128 32 2.0 mean 1286 CHIP:FLI1:CD-1 megakaryocyte embryo (14.5 days) +1287 ENCFF331NCU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHF/summary/ENCFF331NCU.w5 128 32 2.0 mean 1287 CHIP:H3K27me3:CD-1 megakaryocyte embryo (14.5 days) +1288 ENCFF612WPA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHG/summary/ENCFF612WPA.w5 128 32 2.0 mean 1288 CHIP:H3K4me3:CD-1 erythroblast embryo (14.5 days) +1289 ENCFF851DGO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHH/summary/ENCFF851DGO.w5 128 32 2.0 mean 1289 CHIP:H3K9me3:CD-1 megakaryocyte embryo (14.5 days) +1290 ENCFF410HES /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHI/summary/ENCFF410HES.w5 128 32 2.0 mean 1290 CHIP:H3K36me3:CD-1 megakaryocyte embryo (14.5 days) +1291 ENCFF797MYD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHJ/summary/ENCFF797MYD.w5 128 32 2.0 mean 1291 CHIP:H3K9me3:129 G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +1292 ENCFF007WLT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHK/summary/ENCFF007WLT.w5 128 32 2.0 mean 1292 CHIP:H3K4me1:CD-1 erythroblast embryo (14.5 days) +1293 ENCFF034FFR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHL/summary/ENCFF034FFR.w5 128 32 2.0 mean 1293 CHIP:H3K36me3:CD-1 erythroblast embryo (14.5 days) +1294 ENCFF140MAG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHM/summary/ENCFF140MAG.w5 128 32 2.0 mean 1294 CHIP:H3K9me3:129 G1E +1295 ENCFF277AAJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHN/summary/ENCFF277AAJ.w5 128 32 2.0 mean 1295 CHIP:H3K9me3:CD-1 erythroblast embryo (14.5 days) +1296 ENCFF662VCX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHO/summary/ENCFF662VCX.w5 128 32 2.0 mean 1296 CHIP:H3K9me3:B10.H-2aH-4bp/Wts CH12.LX +1297 ENCFF220KWG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHP/summary/ENCFF220KWG.w5 128 32 2.0 mean 1297 CHIP:H3K27me3:CD-1 erythroblast embryo (14.5 days) +1298 ENCFF751XSP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHQ/summary/ENCFF751XSP.w5 128 32 2.0 mean 1298 CHIP:H3K4me1:B10.H-2aH-4bp/Wts CH12.LX +1299 ENCFF817HHK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHR/summary/ENCFF817HHK.w5 128 32 2.0 mean 1299 CHIP:H3K4me1:CD-1 megakaryocyte embryo (14.5 days) +1300 ENCFF750FWM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHS/summary/ENCFF750FWM.w5 128 32 2.0 mean 1300 CHIP:H3K36me3:129 G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +1301 ENCFF211IXB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHT/summary/ENCFF211IXB.w5 128 32 2.0 mean 1301 CHIP:H3K4me3:CD-1 megakaryocyte embryo (14.5 days) +1302 ENCFF288OKN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHU/summary/ENCFF288OKN.w5 128 32 2.0 mean 1302 CHIP:H3K4me3:B10.H-2aH-4bp/Wts CH12.LX +1303 ENCFF613LOW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHV/summary/ENCFF613LOW.w5 128 32 2.0 mean 1303 CHIP:H3K36me3:129 G1E +1304 ENCFF601MZE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHW/summary/ENCFF601MZE.w5 128 32 2.0 mean 1304 CHIP:H3K36me3:B10.H-2aH-4bp/Wts CH12.LX +1305 ENCFF541KFF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHX/summary/ENCFF541KFF.w5 128 32 2.0 mean 1305 CHIP:H3K27me3:129 G1E +1306 ENCFF923MXW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHY/summary/ENCFF923MXW.w5 128 32 2.0 mean 1306 CHIP:H3K27me3:B10.H-2aH-4bp/Wts CH12.LX +1307 ENCFF398WUM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DHZ/summary/ENCFF398WUM.w5 128 32 2.0 mean 1307 CHIP:POLR2AphosphoS5:DBA/2 MEL cell line +1308 ENCFF302RFZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIA/summary/ENCFF302RFZ.w5 128 32 2.0 mean 1308 CHIP:TAL1:DBA/2 MEL cell line +1309 ENCFF860JAB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIC/summary/ENCFF860JAB.w5 128 32 2.0 mean 1309 CHIP:GATA1:129 G1E +1310 ENCFF367IMF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DID/summary/ENCFF367IMF.w5 128 32 2.0 mean 1310 CHIP:GATA2:129 G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +1311 ENCFF630HUU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIE/summary/ENCFF630HUU.w5 128 32 2.0 mean 1311 CHIP:GATA2:129 G1E +1312 ENCFF754BDS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIF/summary/ENCFF754BDS.w5 128 32 2.0 mean 1312 CHIP:POLR2AphosphoS5:129 G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +1313 ENCFF561RBC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIG/summary/ENCFF561RBC.w5 128 32 2.0 mean 1313 CHIP:POLR2AphosphoS5:129 G1E +1314 ENCFF636WAD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIH/summary/ENCFF636WAD.w5 128 32 2.0 mean 1314 CHIP:PAX5:B10.H-2aH-4bp/Wts CH12.LX +1315 ENCFF779AOL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIL/summary/ENCFF779AOL.w5 128 32 2.0 mean 1315 CHIP:GATA1:CD-1 erythroblast embryo (14.5 days) +1316 ENCFF653ZIY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIM/summary/ENCFF653ZIY.w5 128 32 2.0 mean 1316 CHIP:TAL1:129 G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +1317 ENCFF263FFV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIN/summary/ENCFF263FFV.w5 128 32 2.0 mean 1317 CHIP:TAL1:129 G1E +1318 ENCFF498RZJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIP/summary/ENCFF498RZJ.w5 128 32 2.0 mean 1318 CHIP:CTCF:DBA/2 MEL cell line +1319 ENCFF279GQJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIQ/summary/ENCFF279GQJ.w5 128 32 2.0 mean 1319 CHIP:GATA1:129 G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +1320 ENCFF816XFR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIR/summary/ENCFF816XFR.w5 128 32 2.0 mean 1320 CHIP:CTCF:129 G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +1321 ENCFF938UFC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIS/summary/ENCFF938UFC.w5 128 32 2.0 mean 1321 CHIP:CTCF:129 G1E +1322 ENCFF844EOO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIU/summary/ENCFF844EOO.w5 128 32 2.0 mean 1322 CHIP:CTCF:B10.H-2aH-4bp/Wts CH12.LX +1323 ENCFF299BAW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIW/summary/ENCFF299BAW.w5 128 32 2.0 mean 1323 CHIP:H3K4me3:129 G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +1324 ENCFF249XCU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIX/summary/ENCFF249XCU.w5 128 32 2.0 mean 1324 CHIP:H3K4me3:129 G1E +1325 ENCFF469EMZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIY/summary/ENCFF469EMZ.w5 128 32 2.0 mean 1325 CHIP:H3K4me1:129 G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +1326 ENCFF999SUS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DIZ/summary/ENCFF999SUS.w5 128 32 2.0 mean 1326 CHIP:H3K27me3:129 G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +1327 ENCFF289VIM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000DJB/summary/ENCFF289VIM.w5 128 32 2.0 mean 1327 CHIP:H3K4me1:129 G1E +1328 ENCFF365FLT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EQO/summary/ENCFF365FLT.w5 128 32 2.0 mean 1328 CHIP:ZNF384:B10.H-2aH-4bp/Wts CH12.LX +1329 ENCFF133UFU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EQP/summary/ENCFF133UFU.w5 128 32 2.0 mean 1329 CHIP:ZMIZ1:B10.H-2aH-4bp/Wts CH12.LX +1330 ENCFF781WYP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EQR/summary/ENCFF781WYP.w5 128 32 2.0 mean 1330 CHIP:HCFC1:B10.H-2aH-4bp/Wts CH12.LX +1331 ENCFF985RMX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EQS/summary/ENCFF985RMX.w5 128 32 2.0 mean 1331 CHIP:GABPA:B10.H-2aH-4bp/Wts CH12.LX +1332 ENCFF283POK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EQT/summary/ENCFF283POK.w5 128 32 2.0 mean 1332 CHIP:MAZ:B10.H-2aH-4bp/Wts CH12.LX +1333 ENCFF835AUM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EQU/summary/ENCFF835AUM.w5 128 32 2.0 mean 1333 CHIP:KAT2A:B10.H-2aH-4bp/Wts CH12.LX +1334 ENCFF217FQC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EQV/summary/ENCFF217FQC.w5 128 32 2.0 mean 1334 CHIP:CHD1:B10.H-2aH-4bp/Wts CH12.LX +1335 ENCFF411YYG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EQW/summary/ENCFF411YYG.w5 128 32 2.0 mean 1335 CHIP:ZKSCAN1:B10.H-2aH-4bp/Wts CH12.LX +1336 ENCFF950EEV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EQX/summary/ENCFF950EEV.w5 128 32 2.0 mean 1336 CHIP:UBTF:B10.H-2aH-4bp/Wts CH12.LX +1337 ENCFF659WQU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EQY/summary/ENCFF659WQU.w5 128 32 2.0 mean 1337 CHIP:SIN3A:B10.H-2aH-4bp/Wts CH12.LX +1338 ENCFF210EXT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EQZ/summary/ENCFF210EXT.w5 128 32 2.0 mean 1338 CHIP:RCOR1:B10.H-2aH-4bp/Wts CH12.LX +1339 ENCFF165JVA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERA/summary/ENCFF165JVA.w5 128 32 2.0 mean 1339 CHIP:ETS1:B10.H-2aH-4bp/Wts CH12.LX +1340 ENCFF727IMP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERB/summary/ENCFF727IMP.w5 128 32 2.0 mean 1340 CHIP:MAFK:B10.H-2aH-4bp/Wts CH12.LX +1341 ENCFF668KLU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERC/summary/ENCFF668KLU.w5 128 32 2.0 mean 1341 CHIP:BHLHE40:B10.H-2aH-4bp/Wts CH12.LX +1342 ENCFF775LKP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERD/summary/ENCFF775LKP.w5 128 32 2.0 mean 1342 CHIP:NELFE:B10.H-2aH-4bp/Wts CH12.LX +1343 ENCFF724BKJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERE/summary/ENCFF724BKJ.w5 128 32 2.0 mean 1343 CHIP:MXI1:B10.H-2aH-4bp/Wts CH12.LX +1344 ENCFF216JRQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERG/summary/ENCFF216JRQ.w5 128 32 2.0 mean 1344 CHIP:SMC3:B10.H-2aH-4bp/Wts CH12.LX +1345 ENCFF630JDA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERH/summary/ENCFF630JDA.w5 128 32 2.0 mean 1345 CHIP:POLR2AphosphoS2:B10.H-2aH-4bp/Wts CH12.LX +1346 ENCFF140YUU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERI/summary/ENCFF140YUU.w5 128 32 2.0 mean 1346 CHIP:EP300:B10.H-2aH-4bp/Wts CH12.LX +1347 ENCFF495UJH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERJ/summary/ENCFF495UJH.w5 128 32 2.0 mean 1347 CHIP:USF2:B10.H-2aH-4bp/Wts CH12.LX +1348 ENCFF388SYO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERK/summary/ENCFF388SYO.w5 128 32 2.0 mean 1348 CHIP:RAD21:B10.H-2aH-4bp/Wts CH12.LX +1349 ENCFF291HYO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERL/summary/ENCFF291HYO.w5 128 32 2.0 mean 1349 CHIP:MAX:B10.H-2aH-4bp/Wts CH12.LX +1350 ENCFF025UEN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERM/summary/ENCFF025UEN.w5 128 32 2.0 mean 1350 CHIP:CTCF:B10.H-2aH-4bp/Wts CH12.LX +1351 ENCFF562UOS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERN/summary/ENCFF562UOS.w5 128 32 2.0 mean 1351 CHIP:MYC:B10.H-2aH-4bp/Wts CH12.LX +1352 ENCFF944MUS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERO/summary/ENCFF944MUS.w5 128 32 2.0 mean 1352 CHIP:JUN:B10.H-2aH-4bp/Wts CH12.LX +1353 ENCFF111ZPI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERP/summary/ENCFF111ZPI.w5 128 32 2.0 mean 1353 CHIP:TBP:B10.H-2aH-4bp/Wts CH12.LX +1354 ENCFF847BNC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERQ/summary/ENCFF847BNC.w5 128 32 2.0 mean 1354 CHIP:POLR2A:B10.H-2aH-4bp/Wts CH12.LX +1355 ENCFF544VSI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERR/summary/ENCFF544VSI.w5 128 32 2.0 mean 1355 CHIP:JUND:B10.H-2aH-4bp/Wts CH12.LX +1356 ENCFF162FEM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERU/summary/ENCFF162FEM.w5 128 32 2.0 mean 1356 CHIP:E2F4:B10.H-2aH-4bp/Wts CH12.LX +1357 ENCFF643WMY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERV/summary/ENCFF643WMY.w5 128 32 2.0 mean 1357 CHIP:ZNF384:129 ES-E14 +1358 ENCFF122FYP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ERX/summary/ENCFF122FYP.w5 128 32 2.0 mean 1358 CHIP:HCFC1:129 ES-E14 +1359 ENCFF807HIG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ESD/summary/ENCFF807HIG.w5 128 32 2.0 mean 1359 CHIP:ZNF384:DBA/2 MEL cell line +1360 ENCFF689AFL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ESE/summary/ENCFF689AFL.w5 128 32 2.0 mean 1360 CHIP:ZMIZ1:DBA/2 MEL cell line +1361 ENCFF707FPY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ESG/summary/ENCFF707FPY.w5 128 32 2.0 mean 1361 CHIP:HCFC1:DBA/2 MEL cell line +1362 ENCFF252OTG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ESH/summary/ENCFF252OTG.w5 128 32 2.0 mean 1362 CHIP:BHLHE40:DBA/2 MEL cell line +1363 ENCFF525ZGM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ESI/summary/ENCFF525ZGM.w5 128 32 2.0 mean 1363 CHIP:RCOR1:DBA/2 MEL cell line +1364 ENCFF790OHZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ESJ/summary/ENCFF790OHZ.w5 128 32 2.0 mean 1364 CHIP:UBTF:DBA/2 MEL cell line +1365 ENCFF104SUM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ESK/summary/ENCFF104SUM.w5 128 32 2.0 mean 1365 CHIP:GABPA:DBA/2 MEL cell line +1366 ENCFF591TEI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ESL/summary/ENCFF591TEI.w5 128 32 2.0 mean 1366 CHIP:MAZ:DBA/2 MEL cell line +1367 ENCFF013LGH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ESM/summary/ENCFF013LGH.w5 128 32 2.0 mean 1367 CHIP:KAT2A:DBA/2 MEL cell line +1368 ENCFF498FCD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ESN/summary/ENCFF498FCD.w5 128 32 2.0 mean 1368 CHIP:CHD1:DBA/2 MEL cell line +1369 ENCFF585YOH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ESX/summary/ENCFF585YOH.w5 128 32 2.0 mean 1369 CHIP:ZKSCAN1:DBA/2 MEL cell line +1370 ENCFF369XQN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ESY/summary/ENCFF369XQN.w5 128 32 2.0 mean 1370 CHIP:MAFK:DBA/2 MEL cell line treated with 2% dimethyl sulfoxide for 5 days +1371 ENCFF283VEC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETA/summary/ENCFF283VEC.w5 128 32 2.0 mean 1371 CHIP:GATA1:DBA/2 MEL cell line treated with 2% dimethyl sulfoxide for 5 days +1372 ENCFF760BIB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETB/summary/ENCFF760BIB.w5 128 32 2.0 mean 1372 CHIP:ETS1:DBA/2 MEL cell line +1373 ENCFF297ZST /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETC/summary/ENCFF297ZST.w5 128 32 2.0 mean 1373 CHIP:SIN3A:DBA/2 MEL cell line +1374 ENCFF056UBA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETE/summary/ENCFF056UBA.w5 128 32 2.0 mean 1374 CHIP:CTCF:DBA/2 MEL cell line treated with 2% dimethyl sulfoxide for 5 days +1375 ENCFF141IPU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETF/summary/ENCFF141IPU.w5 128 32 2.0 mean 1375 CHIP:USF2:DBA/2 MEL cell line +1376 ENCFF128VVT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETG/summary/ENCFF128VVT.w5 128 32 2.0 mean 1376 CHIP:POLR2A:DBA/2 MEL cell line treated with 2% dimethyl sulfoxide for 5 days +1377 ENCFF516ABO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETK/summary/ENCFF516ABO.w5 128 32 2.0 mean 1377 CHIP:MAFK:DBA/2 MEL cell line +1378 ENCFF034BBD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETL/summary/ENCFF034BBD.w5 128 32 2.0 mean 1378 CHIP:SMC3:DBA/2 MEL cell line +1379 ENCFF587NFU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETM/summary/ENCFF587NFU.w5 128 32 2.0 mean 1379 CHIP:POLR2AphosphoS2:DBA/2 MEL cell line +1380 ENCFF780AWC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETN/summary/ENCFF780AWC.w5 128 32 2.0 mean 1380 CHIP:MXI1:DBA/2 MEL cell line +1381 ENCFF266YOT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETP/summary/ENCFF266YOT.w5 128 32 2.0 mean 1381 CHIP:EP300:DBA/2 MEL cell line +1382 ENCFF515BPN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETQ/summary/ENCFF515BPN.w5 128 32 2.0 mean 1382 CHIP:CTCF:DBA/2 MEL cell line +1383 ENCFF428NKW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETR/summary/ENCFF428NKW.w5 128 32 2.0 mean 1383 CHIP:MYB:DBA/2 MEL cell line +1384 ENCFF198FKD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETS/summary/ENCFF198FKD.w5 128 32 2.0 mean 1384 CHIP:RAD21:DBA/2 MEL cell line +1385 ENCFF527GUY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETT/summary/ENCFF527GUY.w5 128 32 2.0 mean 1385 CHIP:RAD21:DBA/2 MEL cell line treated with 2% dimethyl sulfoxide for 5 days +1386 ENCFF060EGQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETU/summary/ENCFF060EGQ.w5 128 32 2.0 mean 1386 CHIP:NELFE:DBA/2 MEL cell line +1387 ENCFF325EHX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETV/summary/ENCFF325EHX.w5 128 32 2.0 mean 1387 CHIP:EP300:DBA/2 MEL cell line +1388 ENCFF465SVV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETW/summary/ENCFF465SVV.w5 128 32 2.0 mean 1388 CHIP:USF2:DBA/2 MEL cell line +1389 ENCFF368ANL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETX/summary/ENCFF368ANL.w5 128 32 2.0 mean 1389 CHIP:MAX:DBA/2 MEL cell line +1390 ENCFF240ORP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETY/summary/ENCFF240ORP.w5 128 32 2.0 mean 1390 CHIP:E2F4:DBA/2 MEL cell line +1391 ENCFF673UJE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000ETZ/summary/ENCFF673UJE.w5 128 32 2.0 mean 1391 CHIP:JUND:DBA/2 MEL cell line +1392 ENCFF998KIS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EUA/summary/ENCFF998KIS.w5 128 32 2.0 mean 1392 CHIP:MYC:DBA/2 MEL cell line +1393 ENCFF026AMD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EUB/summary/ENCFF026AMD.w5 128 32 2.0 mean 1393 CHIP:TBP:DBA/2 MEL cell line +1394 ENCFF906BFI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EUC/summary/ENCFF906BFI.w5 128 32 2.0 mean 1394 CHIP:POLR2A:DBA/2 MEL cell line +1395 ENCFF160HQN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR000EUG/summary/ENCFF160HQN.w5 128 32 2.0 mean 1395 CHIP:GATA1:DBA/2 MEL cell line +1396 ENCFF661SPC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR002ZAG/summary/ENCFF661SPC.w5 128 32 2.0 mean 1396 CHIP:CTCF:C57BL/6 intestine postnatal (0 days) +1397 ENCFF409BJZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR007XTC/summary/ENCFF409BJZ.w5 128 32 2.0 mean 1397 CHIP:H3K9me3:C57BL/6 heart embryo (14.5 days) +1398 ENCFF672JMP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR008WOQ/summary/ENCFF672JMP.w5 128 32 2.0 mean 1398 CHIP:H3K27me3:C57BL/6 embryonic facial prominence embryo (13.5 days) +1399 ENCFF774BPM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR014MXQ/summary/ENCFF774BPM.w5 128 32 2.0 mean 1399 CHIP:H3K4me1:C57BL/6 hindbrain embryo (12.5 days) +1400 ENCFF212XYE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR014TEJ/summary/ENCFF212XYE.w5 128 32 2.0 mean 1400 CHIP:H3K9ac:C57BL/6 forebrain embryo (16.5 days) +1401 ENCFF396TSB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR014YAY/summary/ENCFF396TSB.w5 128 32 2.0 mean 1401 CHIP:H3K27me3:C57BL/6 intestine embryo (14.5 days) +1402 ENCFF538TEQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR021ALF/summary/ENCFF538TEQ.w5 128 32 2.0 mean 1402 CHIP:H3K27ac:C57BL/6 limb embryo (14.5 days) +1403 ENCFF577WZP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR022DED/summary/ENCFF577WZP.w5 128 32 2.0 mean 1403 CHIP:H3K9me3:C57BL/6 limb embryo (13.5 days) +1404 ENCFF469CPH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR022KDE/summary/ENCFF469CPH.w5 128 32 2.0 mean 1404 CHIP:H3K4me3:C57BL/6 forebrain embryo (15.5 days) +1405 ENCFF280VOA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR023VJO/summary/ENCFF280VOA.w5 128 32 2.0 mean 1405 CHIP:H3K4me3:C57BL/6 stomach embryo (14.5 days) +1406 ENCFF210YRR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR025AOC/summary/ENCFF210YRR.w5 128 32 2.0 mean 1406 CHIP:H3K27me3:C57BL/6 lung postnatal (0 days) +1407 ENCFF786GEI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR027DMG/summary/ENCFF786GEI.w5 128 32 2.0 mean 1407 CHIP:H3K36me3:C57BL/6 embryonic facial prominence embryo (11.5 days) +1408 ENCFF379BOF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR028VCU/summary/ENCFF379BOF.w5 128 32 2.0 mean 1408 CHIP:H3K9ac:C57BL/6 midbrain postnatal (0 days) +1409 ENCFF367TZB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR028ZHS/summary/ENCFF367TZB.w5 128 32 2.0 mean 1409 CHIP:H3K9me3:C57BL/6 limb embryo (15.5 days) +1410 ENCFF043LTY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR032JUI/summary/ENCFF043LTY.w5 128 32 2.0 mean 1410 CHIP:H3K4me1:129 E14TG2a.4 +1411 ENCFF507ODK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR033OWC/summary/ENCFF507ODK.w5 128 32 2.0 mean 1411 CHIP:ELF1:DBA/2 MEL cell line +1412 ENCFF348ZCL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR036YKL/summary/ENCFF348ZCL.w5 128 32 2.0 mean 1412 CHIP:H3K4me2:C57BL/6 kidney embryo (15.5 days) +1413 ENCFF813MON /home/drk/tillage/datasets/mouse/chip/encode/ENCSR037HCS/summary/ENCFF813MON.w5 128 32 2.0 mean 1413 CHIP:H3K36me3:C57BL/6 liver embryo (13.5 days) +1414 ENCFF044DZV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR037HLB/summary/ENCFF044DZV.w5 128 32 2.0 mean 1414 CHIP:H3K4me1:C57BL/6 midbrain embryo (14.5 days) +1415 ENCFF101ASP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR041KEJ/summary/ENCFF101ASP.w5 128 32 2.0 mean 1415 CHIP:H3K9me3:C57BL/6 forebrain embryo (13.5 days) +1416 ENCFF186QKQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR041SMK/summary/ENCFF186QKQ.w5 128 32 2.0 mean 1416 CHIP:CTCF:C57BL/6 liver postnatal (0 days) +1417 ENCFF030BTL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR044SNE/summary/ENCFF030BTL.w5 128 32 2.0 mean 1417 CHIP:H3K4me3:C57BL/6 neural tube embryo (15.5 days) +1418 ENCFF849CQA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR044SUW/summary/ENCFF849CQA.w5 128 32 2.0 mean 1418 CHIP:H3K4me2:C57BL/6 forebrain embryo (13.5 days) +1419 ENCFF419CPS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR045NOD/summary/ENCFF419CPS.w5 128 32 2.0 mean 1419 CHIP:H3K4me1:C57BL/6 liver embryo (13.5 days) +1420 ENCFF574LGA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR046SWI/summary/ENCFF574LGA.w5 128 32 2.0 mean 1420 CHIP:H3K27me3:C57BL/6 hindbrain embryo (16.5 days) +1421 ENCFF563JNZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR048IZJ/summary/ENCFF563JNZ.w5 128 32 2.0 mean 1421 CHIP:H3K9ac:C57BL/6 stomach postnatal (0 days) +1422 ENCFF887SQZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR051CUH/summary/ENCFF887SQZ.w5 128 32 2.0 mean 1422 CHIP:H3K4me1:C57BL/6 intestine embryo (15.5 days) +1423 ENCFF774GGS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR052CDF/summary/ENCFF774GGS.w5 128 32 2.0 mean 1423 CHIP:H3K4me3:C57BL/6 heart embryo (14.5 days) +1424 ENCFF774AQE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR054JHZ/summary/ENCFF774AQE.w5 128 32 2.0 mean 1424 CHIP:H3K27ac:C57BL/6 hindbrain embryo (14.5 days) +1425 ENCFF394ABQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR057BKH/summary/ENCFF394ABQ.w5 128 32 2.0 mean 1425 CHIP:H3K4me3:C57BL/6 embryonic facial prominence embryo (13.5 days) +1426 ENCFF298GLI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR057SHA/summary/ENCFF298GLI.w5 128 32 2.0 mean 1426 CHIP:H3K27ac:C57BL/6 kidney embryo (14.5 days) +1427 ENCFF274AWM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR058DOA/summary/ENCFF274AWM.w5 128 32 2.0 mean 1427 CHIP:H3K27ac:C57BL/6 liver embryo (11.5 days) +1428 ENCFF312WJP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR059LYY/summary/ENCFF312WJP.w5 128 32 2.0 mean 1428 CHIP:H3K4me3:C57BL/6 embryonic facial prominence embryo (14.5 days) +1429 ENCFF012GHA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR059MBO/summary/ENCFF012GHA.w5 128 32 2.0 mean 1429 CHIP:H3K27me3:129 E14TG2a.4 +1430 ENCFF944UBT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR061DPP/summary/ENCFF944UBT.w5 128 32 2.0 mean 1430 CHIP:H3K27me3:C57BL/6 embryonic facial prominence embryo (15.5 days) +1431 ENCFF518FTX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR064DGY/summary/ENCFF518FTX.w5 128 32 2.0 mean 1431 CHIP:H3K27me3:C57BL/6 stomach embryo (14.5 days) +1432 ENCFF999AED /home/drk/tillage/datasets/mouse/chip/encode/ENCSR065CJC/summary/ENCFF999AED.w5 128 32 2.0 mean 1432 CHIP:H3K9ac:C57BL/6 neural tube embryo (12.5 days) +1433 ENCFF887HCS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR065LAB/summary/ENCFF887HCS.w5 128 32 2.0 mean 1433 CHIP:H3K9me3:C57BL/6 midbrain postnatal (0 days) +1434 ENCFF985XJA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR066LZB/summary/ENCFF985XJA.w5 128 32 2.0 mean 1434 CHIP:H3K36me3:C57BL/6 forebrain embryo (11.5 days) +1435 ENCFF652HPR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR066WUD/summary/ENCFF652HPR.w5 128 32 2.0 mean 1435 CHIP:H3K36me3:C57BL/6 midbrain embryo (13.5 days) +1436 ENCFF121XJR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR066XFL/summary/ENCFF121XJR.w5 128 32 2.0 mean 1436 CHIP:H3K27ac:C57BL/6 hindbrain embryo (15.5 days) +1437 ENCFF371WPS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR066XXZ/summary/ENCFF371WPS.w5 128 32 2.0 mean 1437 CHIP:H3K36me3:C57BL/6 hindbrain embryo (14.5 days) +1438 ENCFF537ZRP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR067FKP/summary/ENCFF537ZRP.w5 128 32 2.0 mean 1438 CHIP:H3K9ac:C57BL/6 midbrain embryo (14.5 days) +1439 ENCFF880WPO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR067ZDE/summary/ENCFF880WPO.w5 128 32 2.0 mean 1439 CHIP:H3K4me2:C57BL/6 forebrain embryo (11.5 days) +1440 ENCFF565CYY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR068XYW/summary/ENCFF565CYY.w5 128 32 2.0 mean 1440 CHIP:H3K9ac:C57BL/6 midbrain embryo (16.5 days) +1441 ENCFF606CKV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR069TDC/summary/ENCFF606CKV.w5 128 32 2.0 mean 1441 CHIP:H3K36me3:C57BL/6 forebrain postnatal (0 days) +1442 ENCFF045OSV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR069XHI/summary/ENCFF045OSV.w5 128 32 2.0 mean 1442 CHIP:H3K9ac:C57BL/6 heart embryo (13.5 days) +1443 ENCFF183CXT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR070MOK/summary/ENCFF183CXT.w5 128 32 2.0 mean 1443 CHIP:H3K27me3:C57BL/6 forebrain postnatal (0 days) +1444 ENCFF921YZN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR071SQK/summary/ENCFF921YZN.w5 128 32 2.0 mean 1444 CHIP:H3K9me3:C57BL/6 heart embryo (15.5 days) +1445 ENCFF518VSR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR075SNV/summary/ENCFF518VSR.w5 128 32 2.0 mean 1445 CHIP:H3K27ac:C57BL/6 liver embryo (14.5 days) +1446 ENCFF170JVA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR076FAM/summary/ENCFF170JVA.w5 128 32 2.0 mean 1446 CHIP:H3K9ac:C57BL/6 embryonic facial prominence embryo (11.5 days) +1447 ENCFF592XTS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR076MXH/summary/ENCFF592XTS.w5 128 32 2.0 mean 1447 CHIP:H3K4me2:C57BL/6 stomach embryo (14.5 days) +1448 ENCFF730VFB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR080GQM/summary/ENCFF730VFB.w5 128 32 2.0 mean 1448 CHIP:H3K4me1:C57BL/6 heart embryo (14.5 days) +1449 ENCFF731BDW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR085EYQ/summary/ENCFF731BDW.w5 128 32 2.0 mean 1449 CHIP:H3K27me3:C57BL/6 limb embryo (11.5 days) +1450 ENCFF654YXS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR087JEN/summary/ENCFF654YXS.w5 128 32 2.0 mean 1450 CHIP:H3K36me3:C57BL/6 forebrain embryo (13.5 days) +1451 ENCFF290BIA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR087PLZ/summary/ENCFF290BIA.w5 128 32 2.0 mean 1451 CHIP:H3K9ac:C57BL/6 neural tube embryo (13.5 days) +1452 ENCFF230FQZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR088UKA/summary/ENCFF230FQZ.w5 128 32 2.0 mean 1452 CHIP:H3K27ac:C57BL/6 midbrain embryo (11.5 days) +1453 ENCFF633MDI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR091WNX/summary/ENCFF633MDI.w5 128 32 2.0 mean 1453 CHIP:H3K4me1:C57BL/6 embryonic facial prominence embryo (15.5 days) +1454 ENCFF229GJB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR093BYV/summary/ENCFF229GJB.w5 128 32 2.0 mean 1454 CHIP:H3K4me3:C57BL/6 neural tube embryo (14.5 days) +1455 ENCFF114FKU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR093DWU/summary/ENCFF114FKU.w5 128 32 2.0 mean 1455 CHIP:H3K9me3:C57BL/6 forebrain postnatal (0 days) +1456 ENCFF846FSQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR094QZC/summary/ENCFF846FSQ.w5 128 32 2.0 mean 1456 CHIP:H3K36me3:C57BL/6 neural tube embryo (12.5 days) +1457 ENCFF835DLZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR094TTT/summary/ENCFF835DLZ.w5 128 32 2.0 mean 1457 CHIP:H3K27ac:C57BL/6 forebrain postnatal (0 days) +1458 ENCFF947IGN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR095FDG/summary/ENCFF947IGN.w5 128 32 2.0 mean 1458 CHIP:H3K9me3:C57BL/6 liver embryo (16.5 days) +1459 ENCFF069MRV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR098AHF/summary/ENCFF069MRV.w5 128 32 2.0 mean 1459 CHIP:H3K4me2:C57BL/6 limb embryo (15.5 days) +1460 ENCFF637INN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR098USP/summary/ENCFF637INN.w5 128 32 2.0 mean 1460 CHIP:H3K9me3:C57BL/6 heart embryo (13.5 days) +1461 ENCFF223PHS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR099BYW/summary/ENCFF223PHS.w5 128 32 2.0 mean 1461 CHIP:H3K27me3:C57BL/6 neural tube embryo (14.5 days) +1462 ENCFF854ASW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR100DLL/summary/ENCFF854ASW.w5 128 32 2.0 mean 1462 CHIP:H3K4me2:C57BL/6 intestine embryo (16.5 days) +1463 ENCFF612RDE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR104PWP/summary/ENCFF612RDE.w5 128 32 2.0 mean 1463 CHIP:H3K27me3:C57BL/6 midbrain embryo (12.5 days) +1464 ENCFF685EPG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR104QEN/summary/ENCFF685EPG.w5 128 32 2.0 mean 1464 CHIP:CTCF:C57BL/6 stomach postnatal (0 days) +1465 ENCFF499LGL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR105OGF/summary/ENCFF499LGL.w5 128 32 2.0 mean 1465 CHIP:H3K9ac:C57BL/6 embryonic facial prominence embryo (12.5 days) +1466 ENCFF508WCS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR106QSU/summary/ENCFF508WCS.w5 128 32 2.0 mean 1466 CHIP:H3K4me3:C57BL/6 embryonic facial prominence embryo (12.5 days) +1467 ENCFF032KOM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR107SLP/summary/ENCFF032KOM.w5 128 32 2.0 mean 1467 CHIP:H3K36me3:C57BL/6 neural tube embryo (13.5 days) +1468 ENCFF118BRC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR108TWE/summary/ENCFF118BRC.w5 128 32 2.0 mean 1468 CHIP:H3K36me3:C57BL/6 lung embryo (14.5 days) +1469 ENCFF899RFO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR110MSZ/summary/ENCFF899RFO.w5 128 32 2.0 mean 1469 CHIP:H3K4me2:C57BL/6 liver embryo (12.5 days) +1470 ENCFF959NHK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR110TIW/summary/ENCFF959NHK.w5 128 32 2.0 mean 1470 CHIP:H3K4me3:C57BL/6 hindbrain embryo (16.5 days) +1471 ENCFF061QXI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR115FTP/summary/ENCFF061QXI.w5 128 32 2.0 mean 1471 CHIP:H3K4me2:C57BL/6 limb embryo (11.5 days) +1472 ENCFF808VAT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR119YTR/summary/ENCFF808VAT.w5 128 32 2.0 mean 1472 CHIP:H3K4me3:C57BL/6 heart embryo (16.5 days) +1473 ENCFF069GVS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR121BWA/summary/ENCFF069GVS.w5 128 32 2.0 mean 1473 CHIP:H3K36me3:C57BL/6 kidney embryo (16.5 days) +1474 ENCFF637DCW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR122CJZ/summary/ENCFF637DCW.w5 128 32 2.0 mean 1474 CHIP:H3K9me3:C57BL/6 heart embryo (16.5 days) +1475 ENCFF379MVJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR122HLY/summary/ENCFF379MVJ.w5 128 32 2.0 mean 1475 CHIP:H3K27me3:C57BL/6 heart embryo (12.5 days) +1476 ENCFF937AXD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR123MLY/summary/ENCFF937AXD.w5 128 32 2.0 mean 1476 CHIP:H3K27ac:C57BL/6 heart embryo (12.5 days) +1477 ENCFF716TGN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR125VVG/summary/ENCFF716TGN.w5 128 32 2.0 mean 1477 CHIP:H3K9me3:C57BL/6 liver embryo (14.5 days) +1478 ENCFF719KVF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR128ZCB/summary/ENCFF719KVF.w5 128 32 2.0 mean 1478 CHIP:H3K9ac:C57BL/6 forebrain embryo (14.5 days) +1479 ENCFF041JZL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR129DIK/summary/ENCFF041JZL.w5 128 32 2.0 mean 1479 CHIP:H3K4me3:C57BL/6 forebrain embryo (16.5 days) +1480 ENCFF177SVC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR129LAP/summary/ENCFF177SVC.w5 128 32 2.0 mean 1480 CHIP:H3K27ac:C57BL/6 hindbrain embryo (11.5 days) +1481 ENCFF610HBD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR129OJN/summary/ENCFF610HBD.w5 128 32 2.0 mean 1481 CHIP:H3K27me3:C57BL/6 midbrain embryo (13.5 days) +1482 ENCFF909TOQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR133EGP/summary/ENCFF909TOQ.w5 128 32 2.0 mean 1482 CHIP:H3K4me1:C57BL/6 liver embryo (15.5 days) +1483 ENCFF943PON /home/drk/tillage/datasets/mouse/chip/encode/ENCSR135QMP/summary/ENCFF943PON.w5 128 32 2.0 mean 1483 CHIP:H3K27me3:C57BL/6 liver embryo (16.5 days) +1484 ENCFF623PQH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR135SWH/summary/ENCFF623PQH.w5 128 32 2.0 mean 1484 CHIP:NRF1:DBA/2 MEL cell line +1485 ENCFF285ZJD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR136GMT/summary/ENCFF285ZJD.w5 128 32 2.0 mean 1485 CHIP:H3K27ac:C57BL/6 liver embryo (12.5 days) +1486 ENCFF820QKT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR137NUN/summary/ENCFF820QKT.w5 128 32 2.0 mean 1486 CHIP:H3K9me3:C57BL/6 embryonic facial prominence embryo (15.5 days) +1487 ENCFF681GAF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR140UEX/summary/ENCFF681GAF.w5 128 32 2.0 mean 1487 CHIP:H3K27ac:C57BL/6 lung embryo (16.5 days) +1488 ENCFF873EBR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR140YPL/summary/ENCFF873EBR.w5 128 32 2.0 mean 1488 CHIP:H3K27ac:C57BL/6 kidney postnatal (0 days) +1489 ENCFF033MMS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR141ZQF/summary/ENCFF033MMS.w5 128 32 2.0 mean 1489 CHIP:H3K4me1:C57BL/6 forebrain embryo (16.5 days) +1490 ENCFF083ZQN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR143WOK/summary/ENCFF083ZQN.w5 128 32 2.0 mean 1490 CHIP:CTCF:C57BL/6 kidney postnatal (0 days) +1491 ENCFF421HSW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR147PYG/summary/ENCFF421HSW.w5 128 32 2.0 mean 1491 CHIP:H3K9me3:C57BL/6 embryonic facial prominence embryo (13.5 days) +1492 ENCFF164GUC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR148IGO/summary/ENCFF164GUC.w5 128 32 2.0 mean 1492 CHIP:H3K27me3:C57BL/6 lung embryo (14.5 days) +1493 ENCFF741LSB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR149GYK/summary/ENCFF741LSB.w5 128 32 2.0 mean 1493 CHIP:H3K9me3:C57BL/6 liver embryo (12.5 days) +1494 ENCFF251FHP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR150RGT/summary/ENCFF251FHP.w5 128 32 2.0 mean 1494 CHIP:CTCF:C57BL/6 hindbrain postnatal (0 days) +1495 ENCFF440LYR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR151APL/summary/ENCFF440LYR.w5 128 32 2.0 mean 1495 CHIP:H3K27ac:C57BL/6 embryonic facial prominence embryo (10.5 days) +1496 ENCFF957GWA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR154DTK/summary/ENCFF957GWA.w5 128 32 2.0 mean 1496 CHIP:H3K4me2:C57BL/6 lung embryo (16.5 days) +1497 ENCFF450IIR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR157IVC/summary/ENCFF450IIR.w5 128 32 2.0 mean 1497 CHIP:H3K4me1:C57BL/6 midbrain embryo (12.5 days) +1498 ENCFF173OSG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR157LYR/summary/ENCFF173OSG.w5 128 32 2.0 mean 1498 CHIP:H3K4me1:C57BL/6 intestine embryo (14.5 days) +1499 ENCFF493WPM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR158ONM/summary/ENCFF493WPM.w5 128 32 2.0 mean 1499 CHIP:H3K4me3:C57BL/6 hindbrain embryo (12.5 days) +1500 ENCFF028YVD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR159RVN/summary/ENCFF028YVD.w5 128 32 2.0 mean 1500 CHIP:H3K4me1:C57BL/6 intestine postnatal (0 days) +1501 ENCFF087LVF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR161DAW/summary/ENCFF087LVF.w5 128 32 2.0 mean 1501 CHIP:H3K4me2:C57BL/6 hindbrain embryo (16.5 days) +1502 ENCFF111SQJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR161UUY/summary/ENCFF111SQJ.w5 128 32 2.0 mean 1502 CHIP:H3K9ac:C57BL/6 forebrain embryo (12.5 days) +1503 ENCFF842FSB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR167ZGV/summary/ENCFF842FSB.w5 128 32 2.0 mean 1503 CHIP:H3K4me3:C57BL/6 midbrain embryo (13.5 days) +1504 ENCFF465QRZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR172XOZ/summary/ENCFF465QRZ.w5 128 32 2.0 mean 1504 CHIP:H3K4me3:C57BL/6 forebrain embryo (14.5 days) +1505 ENCFF289PYS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR175KBJ/summary/ENCFF289PYS.w5 128 32 2.0 mean 1505 CHIP:H3K27ac:C57BL/6 liver embryo (13.5 days) +1506 ENCFF480NZE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR175QZX/summary/ENCFF480NZE.w5 128 32 2.0 mean 1506 CHIP:H3K36me3:C57BL/6 hindbrain embryo (11.5 days) +1507 ENCFF994FKT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR176BXC/summary/ENCFF994FKT.w5 128 32 2.0 mean 1507 CHIP:H3K4me3:C57BL/6 limb embryo (14.5 days) +1508 ENCFF837LYE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR185JBL/summary/ENCFF837LYE.w5 128 32 2.0 mean 1508 CHIP:H3K4me3:C57BL/6 hindbrain embryo (14.5 days) +1509 ENCFF361RBA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR186PIV/summary/ENCFF361RBA.w5 128 32 2.0 mean 1509 CHIP:H3K9ac:C57BL/6 hindbrain embryo (14.5 days) +1510 ENCFF768XVB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR190RKJ/summary/ENCFF768XVB.w5 128 32 2.0 mean 1510 CHIP:H3K9ac:C57BL/6 hindbrain embryo (13.5 days) +1511 ENCFF258AAD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR191XHV/summary/ENCFF258AAD.w5 128 32 2.0 mean 1511 CHIP:H3K9me3:C57BL/6 liver embryo (13.5 days) +1512 ENCFF900GLU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR193AEW/summary/ENCFF900GLU.w5 128 32 2.0 mean 1512 CHIP:H3K4me2:C57BL/6 embryonic facial prominence embryo (15.5 days) +1513 ENCFF353ZBQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR194GNY/summary/ENCFF353ZBQ.w5 128 32 2.0 mean 1513 CHIP:GATA4:C57BL/6xSv129 liver adult +1514 ENCFF877WYE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR195NDO/summary/ENCFF877WYE.w5 128 32 2.0 mean 1514 CHIP:H3K4me2:C57BL/6 embryonic facial prominence embryo (12.5 days) +1515 ENCFF553YRO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR196ENU/summary/ENCFF553YRO.w5 128 32 2.0 mean 1515 CHIP:H3K4me1:C57BL/6 kidney embryo (14.5 days) +1516 ENCFF115TVW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR198ACZ/summary/ENCFF115TVW.w5 128 32 2.0 mean 1516 CHIP:H3K4me3:C57BL/6 intestine postnatal (0 days) +1517 ENCFF002MTC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR203KIB/summary/ENCFF002MTC.w5 128 32 2.0 mean 1517 CHIP:H3K4me3:C57BL/6 midbrain embryo (14.5 days) +1518 ENCFF692XLZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR205XBQ/summary/ENCFF692XLZ.w5 128 32 2.0 mean 1518 CHIP:H3K36me3:C57BL/6 midbrain embryo (16.5 days) +1519 ENCFF795DAU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR207UMX/summary/ENCFF795DAU.w5 128 32 2.0 mean 1519 CHIP:H3K36me3:C57BL/6 neural tube embryo (15.5 days) +1520 ENCFF469DBC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR212KGS/summary/ENCFF469DBC.w5 128 32 2.0 mean 1520 CHIP:H3K4me3:129 E14TG2a.4 +1521 ENCFF305IHU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR222IHX/summary/ENCFF305IHU.w5 128 32 2.0 mean 1521 CHIP:H3K27ac:C57BL/6 heart embryo (11.5 days) +1522 ENCFF705MSQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR223FDX/summary/ENCFF705MSQ.w5 128 32 2.0 mean 1522 CHIP:H3K9me3:C57BL/6 neural tube embryo (14.5 days) +1523 ENCFF563GBE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR227UTY/summary/ENCFF563GBE.w5 128 32 2.0 mean 1523 CHIP:H3K36me3:C57BL/6 forebrain embryo (14.5 days) +1524 ENCFF865VFL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR229LTY/summary/ENCFF865VFL.w5 128 32 2.0 mean 1524 CHIP:H3K27me3:C57BL/6 limb embryo (12.5 days) +1525 ENCFF755WDT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR231CHC/summary/ENCFF755WDT.w5 128 32 2.0 mean 1525 CHIP:H3K27me3:C57BL/6 limb embryo (15.5 days) +1526 ENCFF841DUP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR233LBT/summary/ENCFF841DUP.w5 128 32 2.0 mean 1526 CHIP:H3K36me3:C57BL/6 limb embryo (12.5 days) +1527 ENCFF263ROX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR234ISO/summary/ENCFF263ROX.w5 128 32 2.0 mean 1527 CHIP:H3K4me1:C57BL/6 liver embryo (14.5 days) +1528 ENCFF791DCG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR234YTN/summary/ENCFF791DCG.w5 128 32 2.0 mean 1528 CHIP:H3K4me2:C57BL/6 hindbrain embryo (14.5 days) +1529 ENCFF690EIB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR235JCY/summary/ENCFF690EIB.w5 128 32 2.0 mean 1529 CHIP:H3K36me3:C57BL/6 heart postnatal (0 days) +1530 ENCFF920CEX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR238SGC/summary/ENCFF920CEX.w5 128 32 2.0 mean 1530 CHIP:H3K4me1:C57BL/6 limb embryo (10.5 days) +1531 ENCFF399KQJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR238ZCJ/summary/ENCFF399KQJ.w5 128 32 2.0 mean 1531 CHIP:H3K9me3:C57BL/6 lung postnatal (0 days) +1532 ENCFF046KVG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR239TSZ/summary/ENCFF046KVG.w5 128 32 2.0 mean 1532 CHIP:H3K36me3:C57BL/6 heart embryo (13.5 days) +1533 ENCFF217EPP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR240OUM/summary/ENCFF217EPP.w5 128 32 2.0 mean 1533 CHIP:H3K27me3:C57BL/6 neural tube embryo (11.5 days) +1534 ENCFF789UMC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR241BSK/summary/ENCFF789UMC.w5 128 32 2.0 mean 1534 CHIP:H3K27ac:C57BL/6 neural tube embryo (15.5 days) +1535 ENCFF918CGY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR242PHH/summary/ENCFF918CGY.w5 128 32 2.0 mean 1535 CHIP:H3K36me3:C57BL/6 limb embryo (10.5 days) +1536 ENCFF193NMX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR243JOL/summary/ENCFF193NMX.w5 128 32 2.0 mean 1536 CHIP:H3K4me1:C57BL/6 forebrain embryo (12.5 days) +1537 ENCFF205JZO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR244ASF/summary/ENCFF205JZO.w5 128 32 2.0 mean 1537 CHIP:H3K4me3:C57BL/6 limb embryo (15.5 days) +1538 ENCFF800NSG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR244KCW/summary/ENCFF800NSG.w5 128 32 2.0 mean 1538 CHIP:H3K27me3:C57BL/6 liver embryo (11.5 days) +1539 ENCFF610GUY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR249AUA/summary/ENCFF610GUY.w5 128 32 2.0 mean 1539 CHIP:H3K9ac:C57BL/6 lung embryo (14.5 days) +1540 ENCFF165DVJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR250LXH/summary/ENCFF165DVJ.w5 128 32 2.0 mean 1540 CHIP:H3K36me3:C57BL/6 lung postnatal (0 days) +1541 ENCFF746ONG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR251LFV/summary/ENCFF746ONG.w5 128 32 2.0 mean 1541 CHIP:H3K4me1:C57BL/6 heart embryo (16.5 days) +1542 ENCFF315VST /home/drk/tillage/datasets/mouse/chip/encode/ENCSR252GKD/summary/ENCFF315VST.w5 128 32 2.0 mean 1542 CHIP:H3K4me3:C57BL/6 liver embryo (16.5 days) +1543 ENCFF406MDW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR252ONR/summary/ENCFF406MDW.w5 128 32 2.0 mean 1543 CHIP:H3K27ac:C57BL/6 midbrain embryo (12.5 days) +1544 ENCFF704PEM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR253IEG/summary/ENCFF704PEM.w5 128 32 2.0 mean 1544 CHIP:H3K4me1:C57BL/6 midbrain embryo (13.5 days) +1545 ENCFF785WPG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR253QPK/summary/ENCFF785WPG.w5 128 32 2.0 mean 1545 CHIP:H3K36me3:129 E14TG2a.4 +1546 ENCFF900XUW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR254AHA/summary/ENCFF900XUW.w5 128 32 2.0 mean 1546 CHIP:H3K27ac:C57BL/6 midbrain embryo (14.5 days) +1547 ENCFF983GFL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR257GTR/summary/ENCFF983GFL.w5 128 32 2.0 mean 1547 CHIP:H3K27me3:C57BL/6 embryonic facial prominence embryo (12.5 days) +1548 ENCFF667NCZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR257JSX/summary/ENCFF667NCZ.w5 128 32 2.0 mean 1548 CHIP:H3K4me1:C57BL/6 hindbrain embryo (16.5 days) +1549 ENCFF218NHY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR258AED/summary/ENCFF218NHY.w5 128 32 2.0 mean 1549 CHIP:H3K4me1:C57BL/6 embryonic facial prominence embryo (13.5 days) +1550 ENCFF755OHM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR258RGT/summary/ENCFF755OHM.w5 128 32 2.0 mean 1550 CHIP:H3K4me2:C57BL/6 liver embryo (15.5 days) +1551 ENCFF330NUS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR258YWW/summary/ENCFF330NUS.w5 128 32 2.0 mean 1551 CHIP:H3K4me3:C57BL/6 forebrain postnatal (0 days) +1552 ENCFF408PKX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR259KYK/summary/ENCFF408PKX.w5 128 32 2.0 mean 1552 CHIP:H3K27me3:C57BL/6 hindbrain embryo (15.5 days) +1553 ENCFF657UHZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR260OUP/summary/ENCFF657UHZ.w5 128 32 2.0 mean 1553 CHIP:H3K9ac:C57BL/6 intestine embryo (14.5 days) +1554 ENCFF770WCB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR261OAK/summary/ENCFF770WCB.w5 128 32 2.0 mean 1554 CHIP:H3K4me2:C57BL/6 midbrain embryo (11.5 days) +1555 ENCFF377ZXP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR262CSU/summary/ENCFF377ZXP.w5 128 32 2.0 mean 1555 CHIP:H3K4me3:C57BL/6 lung postnatal (0 days) +1556 ENCFF428RXK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR263CKR/summary/ENCFF428RXK.w5 128 32 2.0 mean 1556 CHIP:H3K4me1:C57BL/6 neural tube embryo (12.5 days) +1557 ENCFF849QJF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR264HHL/summary/ENCFF849QJF.w5 128 32 2.0 mean 1557 CHIP:H3K36me3:C57BL/6 limb embryo (15.5 days) +1558 ENCFF921KVA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR264TXI/summary/ENCFF921KVA.w5 128 32 2.0 mean 1558 CHIP:H3K4me2:C57BL/6 forebrain embryo (14.5 days) +1559 ENCFF494OOQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR265NBM/summary/ENCFF494OOQ.w5 128 32 2.0 mean 1559 CHIP:H3K27ac:C57BL/6 neural tube embryo (14.5 days) +1560 ENCFF046VWN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR266JQW/summary/ENCFF046VWN.w5 128 32 2.0 mean 1560 CHIP:H3K27me3:C57BL/6 heart embryo (10.5 days) +1561 ENCFF034URR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR270WWV/summary/ENCFF034URR.w5 128 32 2.0 mean 1561 CHIP:H3K9me3:C57BL/6 forebrain embryo (14.5 days) +1562 ENCFF853FDI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR272GNQ/summary/ENCFF853FDI.w5 128 32 2.0 mean 1562 CHIP:H3K4me1:C57BL/6 midbrain embryo (10.5 days) +1563 ENCFF353QFF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR272XPJ/summary/ENCFF353QFF.w5 128 32 2.0 mean 1563 CHIP:H3K36me3:C57BL/6 intestine embryo (16.5 days) +1564 ENCFF443SGX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR274ACJ/summary/ENCFF443SGX.w5 128 32 2.0 mean 1564 CHIP:H3K9ac:C57BL/6 limb embryo (14.5 days) +1565 ENCFF827WXG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR275KPI/summary/ENCFF827WXG.w5 128 32 2.0 mean 1565 CHIP:H3K27ac:C57BL/6 forebrain embryo (11.5 days) +1566 ENCFF886OPQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR282GAG/summary/ENCFF886OPQ.w5 128 32 2.0 mean 1566 CHIP:H3K27me3:C57BL/6 stomach embryo (15.5 days) +1567 ENCFF950JKI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR282OTI/summary/ENCFF950JKI.w5 128 32 2.0 mean 1567 CHIP:H3K4me2:C57BL/6 stomach embryo (16.5 days) +1568 ENCFF081AUZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR283RFW/summary/ENCFF081AUZ.w5 128 32 2.0 mean 1568 CHIP:H3K4me3:C57BL/6 midbrain embryo (11.5 days) +1569 ENCFF798NZC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR285IQN/summary/ENCFF798NZC.w5 128 32 2.0 mean 1569 CHIP:H3K27me3:C57BL/6 embryonic facial prominence embryo (10.5 days) +1570 ENCFF851ZPG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR286IGS/summary/ENCFF851ZPG.w5 128 32 2.0 mean 1570 CHIP:H3K9ac:C57BL/6 limb embryo (11.5 days) +1571 ENCFF147ZWY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR286OKN/summary/ENCFF147ZWY.w5 128 32 2.0 mean 1571 CHIP:H3K9ac:C57BL/6 midbrain embryo (13.5 days) +1572 ENCFF761AXJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR288LNF/summary/ENCFF761AXJ.w5 128 32 2.0 mean 1572 CHIP:H3K4me2:C57BL/6 stomach postnatal (0 days) +1573 ENCFF495TXX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR288OHO/summary/ENCFF495TXX.w5 128 32 2.0 mean 1573 CHIP:H3K9ac:C57BL/6 kidney postnatal (0 days) +1574 ENCFF066PYN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR289SWJ/summary/ENCFF066PYN.w5 128 32 2.0 mean 1574 CHIP:H3K27ac:C57BL/6 neural tube embryo (13.5 days) +1575 ENCFF613PHO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR293ORS/summary/ENCFF613PHO.w5 128 32 2.0 mean 1575 CHIP:H3K9ac:C57BL/6 stomach embryo (14.5 days) +1576 ENCFF600CWP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR293WTN/summary/ENCFF600CWP.w5 128 32 2.0 mean 1576 CHIP:ELF1:B10.H-2aH-4bp/Wts CH12.LX +1577 ENCFF389JEL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR295PFM/summary/ENCFF389JEL.w5 128 32 2.0 mean 1577 CHIP:H3K4me3:C57BL/6 lung embryo (16.5 days) +1578 ENCFF004EZJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR295ZLV/summary/ENCFF004EZJ.w5 128 32 2.0 mean 1578 CHIP:H3K36me3:C57BL/6 liver embryo (12.5 days) +1579 ENCFF597HKP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR297CHD/summary/ENCFF597HKP.w5 128 32 2.0 mean 1579 CHIP:H3K27me3:C57BL/6 forebrain embryo (12.5 days) +1580 ENCFF499LUF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR299DRH/summary/ENCFF499LUF.w5 128 32 2.0 mean 1580 CHIP:H3K9me3:C57BL/6 hindbrain embryo (15.5 days) +1581 ENCFF570FAF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR299NCJ/summary/ENCFF570FAF.w5 128 32 2.0 mean 1581 CHIP:H3K9me3:C57BL/6 hindbrain embryo (11.5 days) +1582 ENCFF349EPV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR302HOJ/summary/ENCFF349EPV.w5 128 32 2.0 mean 1582 CHIP:H3K27me3:C57BL/6 hindbrain embryo (12.5 days) +1583 ENCFF551UEX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR305GII/summary/ENCFF551UEX.w5 128 32 2.0 mean 1583 CHIP:H3K4me3:C57BL/6 lung embryo (15.5 days) +1584 ENCFF323FCP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR308GFM/summary/ENCFF323FCP.w5 128 32 2.0 mean 1584 CHIP:H3K4me1:C57BL/6 liver postnatal (0 days) +1585 ENCFF156SIF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR308TAV/summary/ENCFF156SIF.w5 128 32 2.0 mean 1585 CHIP:H3K27me3:C57BL/6 hindbrain postnatal (0 days) +1586 ENCFF492VCK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR311LZM/summary/ENCFF492VCK.w5 128 32 2.0 mean 1586 CHIP:H3K9me3:C57BL/6 lung embryo (14.5 days) +1587 ENCFF402XZO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR311TLE/summary/ENCFF402XZO.w5 128 32 2.0 mean 1587 CHIP:H3K9me3:C57BL/6 midbrain embryo (11.5 days) +1588 ENCFF776ZXU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR311VKI/summary/ENCFF776ZXU.w5 128 32 2.0 mean 1588 CHIP:H3K9me3:C57BL/6 intestine embryo (15.5 days) +1589 ENCFF107UHG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR311YPF/summary/ENCFF107UHG.w5 128 32 2.0 mean 1589 CHIP:H3K27ac:C57BL/6 forebrain embryo (13.5 days) +1590 ENCFF909MUJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR312BOK/summary/ENCFF909MUJ.w5 128 32 2.0 mean 1590 CHIP:H3K9ac:C57BL/6 midbrain embryo (12.5 days) +1591 ENCFF152HAY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR314FGC/summary/ENCFF152HAY.w5 128 32 2.0 mean 1591 CHIP:H3K4me2:C57BL/6 midbrain embryo (16.5 days) +1592 ENCFF356LUU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR315ZEU/summary/ENCFF356LUU.w5 128 32 2.0 mean 1592 CHIP:H3K4me3:C57BL/6 hindbrain embryo (13.5 days) +1593 ENCFF919YGD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR316AZB/summary/ENCFF919YGD.w5 128 32 2.0 mean 1593 CHIP:H3K9me3:C57BL/6 embryonic facial prominence embryo (12.5 days) +1594 ENCFF036PPT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR316CNR/summary/ENCFF036PPT.w5 128 32 2.0 mean 1594 CHIP:H3K27ac:C57BL/6 stomach embryo (14.5 days) +1595 ENCFF563HQA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR318QFY/summary/ENCFF563HQA.w5 128 32 2.0 mean 1595 CHIP:H3K4me3:C57BL/6 limb embryo (10.5 days) +1596 ENCFF627ZLA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR320CCJ/summary/ENCFF627ZLA.w5 128 32 2.0 mean 1596 CHIP:H3K4me3:C57BL/6 embryonic facial prominence embryo (11.5 days) +1597 ENCFF527FOD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR320EEW/summary/ENCFF527FOD.w5 128 32 2.0 mean 1597 CHIP:H3K27ac:C57BL/6 forebrain embryo (14.5 days) +1598 ENCFF354BAJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR320UJD/summary/ENCFF354BAJ.w5 128 32 2.0 mean 1598 CHIP:H3K4me3:C57BL/6 kidney embryo (15.5 days) +1599 ENCFF952PTF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR322XHD/summary/ENCFF952PTF.w5 128 32 2.0 mean 1599 CHIP:H3K4me2:C57BL/6 midbrain postnatal (0 days) +1600 ENCFF768DOK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR323NOC/summary/ENCFF768DOK.w5 128 32 2.0 mean 1600 CHIP:H3K4me2:C57BL/6 hindbrain embryo (11.5 days) +1601 ENCFF779TQD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR325JLI/summary/ENCFF779TQD.w5 128 32 2.0 mean 1601 CHIP:H3K9me3:C57BL/6 hindbrain embryo (13.5 days) +1602 ENCFF474PLN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR328WMV/summary/ENCFF474PLN.w5 128 32 2.0 mean 1602 CHIP:H3K36me3:C57BL/6 heart embryo (10.5 days) +1603 ENCFF665WCP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR332JYZ/summary/ENCFF665WCP.w5 128 32 2.0 mean 1603 CHIP:H3K27ac:C57BL/6 hindbrain postnatal (0 days) +1604 ENCFF641CTW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR332UCY/summary/ENCFF641CTW.w5 128 32 2.0 mean 1604 CHIP:H3K9ac:C57BL/6 limb embryo (15.5 days) +1605 ENCFF033IOU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR333QJO/summary/ENCFF033IOU.w5 128 32 2.0 mean 1605 CHIP:H3K9me3:C57BL/6 limb embryo (14.5 days) +1606 ENCFF877BSP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR333ZWB/summary/ENCFF877BSP.w5 128 32 2.0 mean 1606 CHIP:H3K4me2:C57BL/6 intestine postnatal (0 days) +1607 ENCFF576SVQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR335TVR/summary/ENCFF576SVQ.w5 128 32 2.0 mean 1607 CHIP:H3K4me3:C57BL/6 hindbrain embryo (15.5 days) +1608 ENCFF866OFV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR335WME/summary/ENCFF866OFV.w5 128 32 2.0 mean 1608 CHIP:H3K4me1:C57BL/6 stomach embryo (14.5 days) +1609 ENCFF035UHY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR337XUK/summary/ENCFF035UHY.w5 128 32 2.0 mean 1609 CHIP:H3K27me3:C57BL/6 liver embryo (14.5 days) +1610 ENCFF816ESX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR339LMJ/summary/ENCFF816ESX.w5 128 32 2.0 mean 1610 CHIP:H3K4me1:C57BL/6 heart embryo (15.5 days) +1611 ENCFF149AGA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR340ROY/summary/ENCFF149AGA.w5 128 32 2.0 mean 1611 CHIP:H3K27me3:C57BL/6 midbrain postnatal (0 days) +1612 ENCFF382IWW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR340VNK/summary/ENCFF382IWW.w5 128 32 2.0 mean 1612 CHIP:H3K4me2:C57BL/6 lung postnatal (0 days) +1613 ENCFF040LFN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR341XCL/summary/ENCFF040LFN.w5 128 32 2.0 mean 1613 CHIP:H3K9me3:C57BL/6 liver postnatal (0 days) +1614 ENCFF275WHD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR344BPJ/summary/ENCFF275WHD.w5 128 32 2.0 mean 1614 CHIP:H3K4me2:C57BL/6 liver embryo (14.5 days) +1615 ENCFF923TVC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR344HHI/summary/ENCFF923TVC.w5 128 32 2.0 mean 1615 CHIP:H3K27ac:C57BL/6 hindbrain embryo (13.5 days) +1616 ENCFF701NCZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR345DDI/summary/ENCFF701NCZ.w5 128 32 2.0 mean 1616 CHIP:H3K36me3:C57BL/6 forebrain embryo (12.5 days) +1617 ENCFF495VRT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR345HNA/summary/ENCFF495VRT.w5 128 32 2.0 mean 1617 CHIP:H3K9me3:C57BL/6 neural tube embryo (13.5 days) +1618 ENCFF770FZI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR345RKE/summary/ENCFF770FZI.w5 128 32 2.0 mean 1618 CHIP:H3K9ac:C57BL/6 heart embryo (14.5 days) +1619 ENCFF224LDB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR346FJG/summary/ENCFF224LDB.w5 128 32 2.0 mean 1619 CHIP:H3K27ac:C57BL/6 stomach postnatal (0 days) +1620 ENCFF323AGE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR347BFO/summary/ENCFF323AGE.w5 128 32 2.0 mean 1620 CHIP:H3K9ac:C57BL/6 midbrain embryo (15.5 days) +1621 ENCFF894YTN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR349JDH/summary/ENCFF894YTN.w5 128 32 2.0 mean 1621 CHIP:TCF12:DBA/2 MEL cell line +1622 ENCFF429OYS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR349NIS/summary/ENCFF429OYS.w5 128 32 2.0 mean 1622 CHIP:H3K4me1:C57BL/6 hindbrain embryo (13.5 days) +1623 ENCFF423EGY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR352AWJ/summary/ENCFF423EGY.w5 128 32 2.0 mean 1623 CHIP:H3K36me3:C57BL/6 forebrain embryo (16.5 days) +1624 ENCFF809PQF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR352NVU/summary/ENCFF809PQF.w5 128 32 2.0 mean 1624 CHIP:H3K9me3:C57BL/6 forebrain embryo (16.5 days) +1625 ENCFF820UTZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR356LBC/summary/ENCFF820UTZ.w5 128 32 2.0 mean 1625 CHIP:H3K9ac:C57BL/6 lung postnatal (0 days) +1626 ENCFF132TLS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR357JII/summary/ENCFF132TLS.w5 128 32 2.0 mean 1626 CHIP:H3K27ac:C57BL/6 kidney embryo (16.5 days) +1627 ENCFF129FZH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR357OED/summary/ENCFF129FZH.w5 128 32 2.0 mean 1627 CHIP:H3K27me3:C57BL/6 heart embryo (14.5 days) +1628 ENCFF981JRQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR358XYP/summary/ENCFF981JRQ.w5 128 32 2.0 mean 1628 CHIP:H3K9me3:C57BL/6 intestine embryo (16.5 days) +1629 ENCFF689PCR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR360ANE/summary/ENCFF689PCR.w5 128 32 2.0 mean 1629 CHIP:H3K27ac:C57BL/6 heart embryo (14.5 days) +1630 ENCFF363RRH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR362PBD/summary/ENCFF363RRH.w5 128 32 2.0 mean 1630 CHIP:H3K36me3:C57BL/6 kidney postnatal (0 days) +1631 ENCFF454YWR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR362VNF/summary/ENCFF454YWR.w5 128 32 2.0 mean 1631 CHIP:CTCF:129 E14TG2a.4 +1632 ENCFF919CSS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR365CUP/summary/ENCFF919CSS.w5 128 32 2.0 mean 1632 CHIP:H3K9me3:C57BL/6 hindbrain postnatal (0 days) +1633 ENCFF149KHB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR368RJD/summary/ENCFF149KHB.w5 128 32 2.0 mean 1633 CHIP:H3K4me3:C57BL/6 forebrain embryo (10.5 days) +1634 ENCFF841NIU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR369RBO/summary/ENCFF841NIU.w5 128 32 2.0 mean 1634 CHIP:H3K9ac:C57BL/6 forebrain postnatal (0 days) +1635 ENCFF558KHU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR370MSK/summary/ENCFF558KHU.w5 128 32 2.0 mean 1635 CHIP:H3K4me2:C57BL/6 neural tube embryo (12.5 days) +1636 ENCFF440SVM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR373LNE/summary/ENCFF440SVM.w5 128 32 2.0 mean 1636 CHIP:H3K9ac:C57BL/6 heart embryo (16.5 days) +1637 ENCFF099OBO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR373OCX/summary/ENCFF099OBO.w5 128 32 2.0 mean 1637 CHIP:H3K4me2:C57BL/6 heart postnatal (0 days) +1638 ENCFF421NWC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR373TMR/summary/ENCFF421NWC.w5 128 32 2.0 mean 1638 CHIP:H3K9ac:C57BL/6 forebrain embryo (13.5 days) +1639 ENCFF352EVI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR375GSG/summary/ENCFF352EVI.w5 128 32 2.0 mean 1639 CHIP:H3K27me3:C57BL/6 hindbrain embryo (11.5 days) +1640 ENCFF901MQW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR375RUA/summary/ENCFF901MQW.w5 128 32 2.0 mean 1640 CHIP:H3K27me3:C57BL/6 neural tube embryo (12.5 days) +1641 ENCFF062SWO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR377RKW/summary/ENCFF062SWO.w5 128 32 2.0 mean 1641 CHIP:H3K4me2:C57BL/6 heart embryo (15.5 days) +1642 ENCFF972QDT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR380BBL/summary/ENCFF972QDT.w5 128 32 2.0 mean 1642 CHIP:H3K27me3:C57BL/6 heart embryo (13.5 days) +1643 ENCFF033PTH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR382DRK/summary/ENCFF033PTH.w5 128 32 2.0 mean 1643 CHIP:H3K27ac:C57BL/6 embryonic facial prominence embryo (15.5 days) +1644 ENCFF534WMK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR387YSD/summary/ENCFF534WMK.w5 128 32 2.0 mean 1644 CHIP:H3K4me1:C57BL/6 lung embryo (16.5 days) +1645 ENCFF133XFD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR389EYR/summary/ENCFF133XFD.w5 128 32 2.0 mean 1645 CHIP:H3K27me3:C57BL/6 intestine postnatal (0 days) +1646 ENCFF143LAV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR391WSS/summary/ENCFF143LAV.w5 128 32 2.0 mean 1646 CHIP:H3K4me1:C57BL/6 midbrain postnatal (0 days) +1647 ENCFF619BQO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR392DGA/summary/ENCFF619BQO.w5 128 32 2.0 mean 1647 CHIP:POU5F1:129 E14TG2a.4 +1648 ENCFF691ULU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR397RHW/summary/ENCFF691ULU.w5 128 32 2.0 mean 1648 CHIP:CTCF:C57BL/6 liver embryo (14.5 days) +1649 ENCFF645JTI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR398TTY/summary/ENCFF645JTI.w5 128 32 2.0 mean 1649 CHIP:H3K4me2:C57BL/6 lung embryo (15.5 days) +1650 ENCFF324UGW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR399UVI/summary/ENCFF324UGW.w5 128 32 2.0 mean 1650 CHIP:H3K27me3:C57BL/6 kidney embryo (14.5 days) +1651 ENCFF855PPC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR400TGE/summary/ENCFF855PPC.w5 128 32 2.0 mean 1651 CHIP:H3K9ac:C57BL/6 forebrain embryo (11.5 days) +1652 ENCFF089IXS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR401GRX/summary/ENCFF089IXS.w5 128 32 2.0 mean 1652 CHIP:H3K27ac:C57BL/6 embryonic facial prominence embryo (11.5 days) +1653 ENCFF083ZPL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR402ZLE/summary/ENCFF083ZPL.w5 128 32 2.0 mean 1653 CHIP:H3K4me2:C57BL/6 forebrain embryo (15.5 days) +1654 ENCFF433RSJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR405TGI/summary/ENCFF433RSJ.w5 128 32 2.0 mean 1654 CHIP:H3K9me3:C57BL/6 kidney embryo (14.5 days) +1655 ENCFF671LRY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR406KDB/summary/ENCFF671LRY.w5 128 32 2.0 mean 1655 CHIP:H3K4me2:C57BL/6 heart embryo (12.5 days) +1656 ENCFF623HDU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR406VPQ/summary/ENCFF623HDU.w5 128 32 2.0 mean 1656 CHIP:H3K4me2:C57BL/6 embryonic facial prominence embryo (11.5 days) +1657 ENCFF525EWD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR410YIY/summary/ENCFF525EWD.w5 128 32 2.0 mean 1657 CHIP:H3K4me3:C57BL/6 intestine embryo (15.5 days) +1658 ENCFF104WUV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR412ODT/summary/ENCFF104WUV.w5 128 32 2.0 mean 1658 CHIP:H3K9ac:C57BL/6 intestine embryo (15.5 days) +1659 ENCFF290CLV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR415FLZ/summary/ENCFF290CLV.w5 128 32 2.0 mean 1659 CHIP:H3K4me2:C57BL/6 stomach embryo (15.5 days) +1660 ENCFF856ZXY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR416OYH/summary/ENCFF856ZXY.w5 128 32 2.0 mean 1660 CHIP:H3K4me3:C57BL/6 limb embryo (13.5 days) +1661 ENCFF056IYN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR417TXZ/summary/ENCFF056IYN.w5 128 32 2.0 mean 1661 CHIP:H3K4me1:C57BL/6 hindbrain embryo (14.5 days) +1662 ENCFF739QBQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR418SBY/summary/ENCFF739QBQ.w5 128 32 2.0 mean 1662 CHIP:CTCF:C57BL/6 lung postnatal (0 days) +1663 ENCFF654PSV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR419MSI/summary/ENCFF654PSV.w5 128 32 2.0 mean 1663 CHIP:H3K4me1:C57BL/6 liver embryo (11.5 days) +1664 ENCFF306YHS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR420MUV/summary/ENCFF306YHS.w5 128 32 2.0 mean 1664 CHIP:H3K27ac:C57BL/6 embryonic facial prominence embryo (13.5 days) +1665 ENCFF251XZW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR424END/summary/ENCFF251XZW.w5 128 32 2.0 mean 1665 CHIP:H3K27ac:C57BL/6 intestine embryo (14.5 days) +1666 ENCFF133NGW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR425FLT/summary/ENCFF133NGW.w5 128 32 2.0 mean 1666 CHIP:H3K36me3:C57BL/6 kidney embryo (14.5 days) +1667 ENCFF255RIU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR426EZM/summary/ENCFF255RIU.w5 128 32 2.0 mean 1667 CHIP:H3K4me1:C57BL/6 limb embryo (13.5 days) +1668 ENCFF638WLA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR427OZM/summary/ENCFF638WLA.w5 128 32 2.0 mean 1668 CHIP:H3K4me3:C57BL/6 neural tube embryo (11.5 days) +1669 ENCFF339HOE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR427ZJU/summary/ENCFF339HOE.w5 128 32 2.0 mean 1669 CHIP:H3K4me3:C57BL/6 midbrain postnatal (0 days) +1670 ENCFF637BYB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR427ZQB/summary/ENCFF637BYB.w5 128 32 2.0 mean 1670 CHIP:H3K9me3:C57BL/6 hindbrain embryo (16.5 days) +1671 ENCFF616CNP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR428GHF/summary/ENCFF616CNP.w5 128 32 2.0 mean 1671 CHIP:H3K27ac:C57BL/6 midbrain embryo (15.5 days) +1672 ENCFF826KEG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR428OEK/summary/ENCFF826KEG.w5 128 32 2.0 mean 1672 CHIP:H3K27ac:C57BL/6 forebrain embryo (16.5 days) +1673 ENCFF163DQS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR430JWT/summary/ENCFF163DQS.w5 128 32 2.0 mean 1673 CHIP:H3K9ac:C57BL/6 lung embryo (16.5 days) +1674 ENCFF118SNE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR433ESG/summary/ENCFF118SNE.w5 128 32 2.0 mean 1674 CHIP:H3K4me3:C57BL/6 liver embryo (14.5 days) +1675 ENCFF436MUY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR436FYE/summary/ENCFF436MUY.w5 128 32 2.0 mean 1675 CHIP:H3K4me1:C57BL/6 kidney embryo (15.5 days) +1676 ENCFF044FTG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR437NPG/summary/ENCFF044FTG.w5 128 32 2.0 mean 1676 CHIP:H3K27me3:C57BL/6 heart embryo (15.5 days) +1677 ENCFF393XPO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR437SFX/summary/ENCFF393XPO.w5 128 32 2.0 mean 1677 CHIP:H3K36me3:C57BL/6 forebrain embryo (15.5 days) +1678 ENCFF131MXX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR442RYY/summary/ENCFF131MXX.w5 128 32 2.0 mean 1678 CHIP:H3K4me1:C57BL/6 heart embryo (12.5 days) +1679 ENCFF362GRX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR445UYH/summary/ENCFF362GRX.w5 128 32 2.0 mean 1679 CHIP:H3K36me3:C57BL/6 neural tube embryo (11.5 days) +1680 ENCFF350KKE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR447BVY/summary/ENCFF350KKE.w5 128 32 2.0 mean 1680 CHIP:H3K36me3:C57BL/6 embryonic facial prominence embryo (13.5 days) +1681 ENCFF053JUA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR447DOF/summary/ENCFF053JUA.w5 128 32 2.0 mean 1681 CHIP:H3K4me3:C57BL/6 liver embryo (11.5 days) +1682 ENCFF813VAT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR448TTC/summary/ENCFF813VAT.w5 128 32 2.0 mean 1682 CHIP:H3K4me1:C57BL/6 neural tube embryo (11.5 days) +1683 ENCFF833YTL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR449EUZ/summary/ENCFF833YTL.w5 128 32 2.0 mean 1683 CHIP:H3K4me1:C57BL/6 midbrain embryo (15.5 days) +1684 ENCFF554FAE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR450ITF/summary/ENCFF554FAE.w5 128 32 2.0 mean 1684 CHIP:H3K4me1:C57BL/6 midbrain embryo (11.5 days) +1685 ENCFF665UTM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR450KVW/summary/ENCFF665UTM.w5 128 32 2.0 mean 1685 CHIP:EP300:C57BL/6 stomach postnatal (0 days) +1686 ENCFF173XOV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR452WYC/summary/ENCFF173XOV.w5 128 32 2.0 mean 1686 CHIP:H3K27ac:C57BL/6 lung embryo (14.5 days) +1687 ENCFF172OCI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR455REA/summary/ENCFF172OCI.w5 128 32 2.0 mean 1687 CHIP:H3K9me3:C57BL/6 heart embryo (10.5 days) +1688 ENCFF952ZFO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR455UTX/summary/ENCFF952ZFO.w5 128 32 2.0 mean 1688 CHIP:H3K36me3:C57BL/6 kidney embryo (15.5 days) +1689 ENCFF344FSI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR457HDY/summary/ENCFF344FSI.w5 128 32 2.0 mean 1689 CHIP:H3K27me3:C57BL/6 embryonic facial prominence embryo (14.5 days) +1690 ENCFF362OAA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR458FBL/summary/ENCFF362OAA.w5 128 32 2.0 mean 1690 CHIP:H3K4me2:C57BL/6 liver embryo (16.5 days) +1691 ENCFF562BME /home/drk/tillage/datasets/mouse/chip/encode/ENCSR458PAO/summary/ENCFF562BME.w5 128 32 2.0 mean 1691 CHIP:H3K36me3:C57BL/6 hindbrain postnatal (0 days) +1692 ENCFF534ANR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR462BZP/summary/ENCFF534ANR.w5 128 32 2.0 mean 1692 CHIP:H3K9ac:C57BL/6 limb embryo (13.5 days) +1693 ENCFF181ZVG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR462KTT/summary/ENCFF181ZVG.w5 128 32 2.0 mean 1693 CHIP:H3K4me2:C57BL/6 forebrain embryo (16.5 days) +1694 ENCFF207EPW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR463GJL/summary/ENCFF207EPW.w5 128 32 2.0 mean 1694 CHIP:H3K9me3:C57BL/6 heart embryo (11.5 days) +1695 ENCFF751FCH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR464MQU/summary/ENCFF751FCH.w5 128 32 2.0 mean 1695 CHIP:H3K4me3:C57BL/6 intestine embryo (14.5 days) +1696 ENCFF695KNJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR465PLB/summary/ENCFF695KNJ.w5 128 32 2.0 mean 1696 CHIP:H3K4me1:C57BL/6 forebrain postnatal (0 days) +1697 ENCFF025FNF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR465TIZ/summary/ENCFF025FNF.w5 128 32 2.0 mean 1697 CHIP:H3K27me3:C57BL/6 midbrain embryo (16.5 days) +1698 ENCFF973ZKZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR466ZQC/summary/ENCFF973ZKZ.w5 128 32 2.0 mean 1698 CHIP:H3K9me3:C57BL/6 neural tube embryo (11.5 days) +1699 ENCFF686TFW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR467UGM/summary/ENCFF686TFW.w5 128 32 2.0 mean 1699 CHIP:H3K36me3:C57BL/6 heart embryo (16.5 days) +1700 ENCFF487TJN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR471SJG/summary/ENCFF487TJN.w5 128 32 2.0 mean 1700 CHIP:H3K4me3:C57BL/6 liver embryo (12.5 days) +1701 ENCFF669USU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR472YGQ/summary/ENCFF669USU.w5 128 32 2.0 mean 1701 CHIP:H3K4me3:C57BL/6 hindbrain postnatal (0 days) +1702 ENCFF290NHP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR479LFP/summary/ENCFF290NHP.w5 128 32 2.0 mean 1702 CHIP:H3K27ac:C57BL/6 liver embryo (15.5 days) +1703 ENCFF071HGW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR481SGM/summary/ENCFF071HGW.w5 128 32 2.0 mean 1703 CHIP:H3K27ac:C57BL/6 embryonic facial prominence embryo (14.5 days) +1704 ENCFF419ZKE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR483DEV/summary/ENCFF419ZKE.w5 128 32 2.0 mean 1704 CHIP:H3K9ac:C57BL/6 stomach embryo (15.5 days) +1705 ENCFF085HHF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR483KOD/summary/ENCFF085HHF.w5 128 32 2.0 mean 1705 CHIP:H3K36me3:C57BL/6 intestine postnatal (0 days) +1706 ENCFF766FZH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR485GTY/summary/ENCFF766FZH.w5 128 32 2.0 mean 1706 CHIP:H3K4me2:C57BL/6 intestine embryo (14.5 days) +1707 ENCFF724CMA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR486MHP/summary/ENCFF724CMA.w5 128 32 2.0 mean 1707 CHIP:H3K4me3:C57BL/6 midbrain embryo (15.5 days) +1708 ENCFF544BIS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR487OLC/summary/ENCFF544BIS.w5 128 32 2.0 mean 1708 CHIP:H3K4me1:C57BL/6 liver embryo (16.5 days) +1709 ENCFF565NEA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR487RAU/summary/ENCFF565NEA.w5 128 32 2.0 mean 1709 CHIP:H3K36me3:C57BL/6 midbrain embryo (15.5 days) +1710 ENCFF064NXP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR488KVB/summary/ENCFF064NXP.w5 128 32 2.0 mean 1710 CHIP:H3K4me2:C57BL/6 limb embryo (12.5 days) +1711 ENCFF577SJR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR488OCA/summary/ENCFF577SJR.w5 128 32 2.0 mean 1711 CHIP:H3K4me1:C57BL/6 heart postnatal (0 days) +1712 ENCFF338BLF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR489HYX/summary/ENCFF338BLF.w5 128 32 2.0 mean 1712 CHIP:H3K4me2:C57BL/6 neural tube embryo (11.5 days) +1713 ENCFF278NWO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR491NUM/summary/ENCFF278NWO.w5 128 32 2.0 mean 1713 CHIP:CTCF:C57BL/6 heart postnatal (0 days) +1714 ENCFF905MWT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR496TBX/summary/ENCFF905MWT.w5 128 32 2.0 mean 1714 CHIP:H3K4me1:C57BL/6 forebrain embryo (13.5 days) +1715 ENCFF517QXW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR498EVD/summary/ENCFF517QXW.w5 128 32 2.0 mean 1715 CHIP:H3K9me3:C57BL/6 kidney postnatal (0 days) +1716 ENCFF209XEZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR499NUH/summary/ENCFF209XEZ.w5 128 32 2.0 mean 1716 CHIP:H3K9me3:C57BL/6 kidney embryo (15.5 days) +1717 ENCFF054NVM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR501GBJ/summary/ENCFF054NVM.w5 128 32 2.0 mean 1717 CHIP:H3K4me3:C57BL/6 heart embryo (13.5 days) +1718 ENCFF794MAE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR502WUI/summary/ENCFF794MAE.w5 128 32 2.0 mean 1718 CHIP:H3K9ac:C57BL/6 midbrain embryo (11.5 days) +1719 ENCFF073JCW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR508HYL/summary/ENCFF073JCW.w5 128 32 2.0 mean 1719 CHIP:H3K36me3:C57BL/6 hindbrain embryo (12.5 days) +1720 ENCFF397XOU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR510CGB/summary/ENCFF397XOU.w5 128 32 2.0 mean 1720 CHIP:H3K36me3:C57BL/6 liver embryo (15.5 days) +1721 ENCFF258NLM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR511LWL/summary/ENCFF258NLM.w5 128 32 2.0 mean 1721 CHIP:H3K9ac:C57BL/6 neural tube embryo (14.5 days) +1722 ENCFF832GKC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR511WPK/summary/ENCFF832GKC.w5 128 32 2.0 mean 1722 CHIP:H3K4me1:C57BL/6 embryonic facial prominence embryo (11.5 days) +1723 ENCFF893IAL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR515XWH/summary/ENCFF893IAL.w5 128 32 2.0 mean 1723 CHIP:H3K27me3:C57BL/6 intestine embryo (16.5 days) +1724 ENCFF307IYH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR516KLO/summary/ENCFF307IYH.w5 128 32 2.0 mean 1724 CHIP:H3K36me3:C57BL/6 stomach postnatal (0 days) +1725 ENCFF235MDM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR516TYD/summary/ENCFF235MDM.w5 128 32 2.0 mean 1725 CHIP:H3K4me2:C57BL/6 kidney postnatal (0 days) +1726 ENCFF087WAM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR517UZA/summary/ENCFF087WAM.w5 128 32 2.0 mean 1726 CHIP:H3K4me2:C57BL/6 neural tube embryo (15.5 days) +1727 ENCFF042PHG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR519DNE/summary/ENCFF042PHG.w5 128 32 2.0 mean 1727 CHIP:H3K9ac:C57BL/6 heart postnatal (0 days) +1728 ENCFF180VGZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR522LXN/summary/ENCFF180VGZ.w5 128 32 2.0 mean 1728 CHIP:H3K4me3:C57BL/6 stomach embryo (15.5 days) +1729 ENCFF970OZN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR523IIH/summary/ENCFF970OZN.w5 128 32 2.0 mean 1729 CHIP:H3K4me1:C57BL/6 lung postnatal (0 days) +1730 ENCFF777NSG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR525BTK/summary/ENCFF777NSG.w5 128 32 2.0 mean 1730 CHIP:H3K4me2:C57BL/6 midbrain embryo (15.5 days) +1731 ENCFF505IIX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR526JRI/summary/ENCFF505IIX.w5 128 32 2.0 mean 1731 CHIP:H3K9me3:C57BL/6 embryonic facial prominence embryo (11.5 days) +1732 ENCFF583XLF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR527DME/summary/ENCFF583XLF.w5 128 32 2.0 mean 1732 CHIP:EP300:C57BL/6 lung postnatal (0 days) +1733 ENCFF178JLS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR527EUE/summary/ENCFF178JLS.w5 128 32 2.0 mean 1733 CHIP:H3K4me3:C57BL/6 forebrain embryo (12.5 days) +1734 ENCFF606BGM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR529ERN/summary/ENCFF606BGM.w5 128 32 2.0 mean 1734 CHIP:H3K4me1:C57BL/6 limb embryo (14.5 days) +1735 ENCFF422NJA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR531RZS/summary/ENCFF422NJA.w5 128 32 2.0 mean 1735 CHIP:H3K27ac:C57BL/6 neural tube embryo (11.5 days) +1736 ENCFF159AEV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR534HMF/summary/ENCFF159AEV.w5 128 32 2.0 mean 1736 CHIP:H3K4me2:C57BL/6 heart embryo (14.5 days) +1737 ENCFF208SBA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR535NVF/summary/ENCFF208SBA.w5 128 32 2.0 mean 1737 CHIP:H3K36me3:C57BL/6 midbrain embryo (11.5 days) +1738 ENCFF376RQB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR536ILV/summary/ENCFF376RQB.w5 128 32 2.0 mean 1738 CHIP:H3K4me3:C57BL/6 kidney postnatal (0 days) +1739 ENCFF323VNT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR537AKT/summary/ENCFF323VNT.w5 128 32 2.0 mean 1739 CHIP:H3K4me2:C57BL/6 hindbrain embryo (12.5 days) +1740 ENCFF644DVO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR538DPG/summary/ENCFF644DVO.w5 128 32 2.0 mean 1740 CHIP:H3K36me3:C57BL/6 embryonic facial prominence embryo (10.5 days) +1741 ENCFF552KFS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR538SRO/summary/ENCFF552KFS.w5 128 32 2.0 mean 1741 CHIP:H3K4me3:C57BL/6 neural tube embryo (12.5 days) +1742 ENCFF485WCD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR545BRW/summary/ENCFF485WCD.w5 128 32 2.0 mean 1742 CHIP:H3K27me3:C57BL/6 midbrain embryo (11.5 days) +1743 ENCFF069OMF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR546ANT/summary/ENCFF069OMF.w5 128 32 2.0 mean 1743 CHIP:H3K27ac:C57BL/6 stomach embryo (16.5 days) +1744 ENCFF358FNS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR547PLI/summary/ENCFF358FNS.w5 128 32 2.0 mean 1744 CHIP:H3K9ac:C57BL/6 neural tube embryo (11.5 days) +1745 ENCFF956BOI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR548BCO/summary/ENCFF956BOI.w5 128 32 2.0 mean 1745 CHIP:H3K4me1:C57BL/6 limb embryo (11.5 days) +1746 ENCFF179UEK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR548BKP/summary/ENCFF179UEK.w5 128 32 2.0 mean 1746 CHIP:H3K4me1:C57BL/6 stomach embryo (15.5 days) +1747 ENCFF987UOT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR549RVJ/summary/ENCFF987UOT.w5 128 32 2.0 mean 1747 CHIP:H3K9me3:C57BL/6 neural tube embryo (15.5 days) +1748 ENCFF914LRA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR550QED/summary/ENCFF914LRA.w5 128 32 2.0 mean 1748 CHIP:H3K27me3:C57BL/6 liver embryo (13.5 days) +1749 ENCFF447LGJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR553IWV/summary/ENCFF447LGJ.w5 128 32 2.0 mean 1749 CHIP:H3K27ac:C57BL/6 midbrain embryo (16.5 days) +1750 ENCFF682ZKK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR554TSO/summary/ENCFF682ZKK.w5 128 32 2.0 mean 1750 CHIP:H3K4me3:C57BL/6 midbrain embryo (12.5 days) +1751 ENCFF591NXE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR556DLJ/summary/ENCFF591NXE.w5 128 32 2.0 mean 1751 CHIP:H3K9me3:C57BL/6 stomach postnatal (0 days) +1752 ENCFF586OQX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR556ZUY/summary/ENCFF586OQX.w5 128 32 2.0 mean 1752 CHIP:H3K4me1:C57BL/6 forebrain embryo (14.5 days) +1753 ENCFF797OAF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR557SVH/summary/ENCFF797OAF.w5 128 32 2.0 mean 1753 CHIP:H3K9me3:C57BL/6 heart embryo (12.5 days) +1754 ENCFF450RTN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR558NWQ/summary/ENCFF450RTN.w5 128 32 2.0 mean 1754 CHIP:H3K36me3:C57BL/6 forebrain embryo (10.5 days) +1755 ENCFF309BMY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR560CIG/summary/ENCFF309BMY.w5 128 32 2.0 mean 1755 CHIP:H3K36me3:C57BL/6 heart embryo (15.5 days) +1756 ENCFF460ZYC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR564JKR/summary/ENCFF460ZYC.w5 128 32 2.0 mean 1756 CHIP:H3K27me3:C57BL/6 kidney postnatal (0 days) +1757 ENCFF848GOF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR564XSR/summary/ENCFF848GOF.w5 128 32 2.0 mean 1757 CHIP:H3K27me3:C57BL/6 forebrain embryo (15.5 days) +1758 ENCFF702EWM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR569DBO/summary/ENCFF702EWM.w5 128 32 2.0 mean 1758 CHIP:H3K36me3:C57BL/6 liver embryo (16.5 days) +1759 ENCFF244GAG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR569UCG/summary/ENCFF244GAG.w5 128 32 2.0 mean 1759 CHIP:H3K9ac:C57BL/6 kidney embryo (14.5 days) +1760 ENCFF520CMB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR570HJI/summary/ENCFF520CMB.w5 128 32 2.0 mean 1760 CHIP:H3K27me3:C57BL/6 liver embryo (15.5 days) +1761 ENCFF197IGU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR571HOT/summary/ENCFF197IGU.w5 128 32 2.0 mean 1761 CHIP:H3K9ac:C57BL/6 neural tube embryo (15.5 days) +1762 ENCFF390PNF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR571WQI/summary/ENCFF390PNF.w5 128 32 2.0 mean 1762 CHIP:H3K9me3:C57BL/6 midbrain embryo (16.5 days) +1763 ENCFF029WVD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR572KYR/summary/ENCFF029WVD.w5 128 32 2.0 mean 1763 CHIP:H3K4me3:C57BL/6 intestine embryo (16.5 days) +1764 ENCFF021RTZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR574VME/summary/ENCFF021RTZ.w5 128 32 2.0 mean 1764 CHIP:H3K27ac:C57BL/6 heart embryo (15.5 days) +1765 ENCFF017FCI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR576XBN/summary/ENCFF017FCI.w5 128 32 2.0 mean 1765 CHIP:H3K9ac:C57BL/6 hindbrain embryo (12.5 days) +1766 ENCFF960DLF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR577SDJ/summary/ENCFF960DLF.w5 128 32 2.0 mean 1766 CHIP:H3K4me3:C57BL/6 liver embryo (15.5 days) +1767 ENCFF817VNI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR578RLI/summary/ENCFF817VNI.w5 128 32 2.0 mean 1767 CHIP:H3K4me2:C57BL/6 liver postnatal (0 days) +1768 ENCFF161KBG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR579RZO/summary/ENCFF161KBG.w5 128 32 2.0 mean 1768 CHIP:H3K4me2:C57BL/6 neural tube embryo (14.5 days) +1769 ENCFF828TZR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR579UVB/summary/ENCFF828TZR.w5 128 32 2.0 mean 1769 CHIP:H3K9me3:C57BL/6 lung embryo (16.5 days) +1770 ENCFF009QET /home/drk/tillage/datasets/mouse/chip/encode/ENCSR580MEU/summary/ENCFF009QET.w5 128 32 2.0 mean 1770 CHIP:H3K27me3:C57BL/6 neural tube embryo (13.5 days) +1771 ENCFF197CLN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR581EJK/summary/ENCFF197CLN.w5 128 32 2.0 mean 1771 CHIP:H3K4me3:C57BL/6 midbrain embryo (10.5 days) +1772 ENCFF450GMP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR581FAT/summary/ENCFF450GMP.w5 128 32 2.0 mean 1772 CHIP:H3K36me3:C57BL/6 stomach embryo (14.5 days) +1773 ENCFF772NBP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR582IBX/summary/ENCFF772NBP.w5 128 32 2.0 mean 1773 CHIP:H3K36me3:C57BL/6 embryonic facial prominence embryo (12.5 days) +1774 ENCFF222TEU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR582SPN/summary/ENCFF222TEU.w5 128 32 2.0 mean 1774 CHIP:H3K27ac:C57BL/6 heart embryo (10.5 days) +1775 ENCFF564SDZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR592GQI/summary/ENCFF564SDZ.w5 128 32 2.0 mean 1775 CHIP:H3K4me3:C57BL/6 heart embryo (11.5 days) +1776 ENCFF782RAO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR594JGI/summary/ENCFF782RAO.w5 128 32 2.0 mean 1776 CHIP:H3K27ac:C57BL/6 hindbrain embryo (10.5 days) +1777 ENCFF803SVJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR599GVS/summary/ENCFF803SVJ.w5 128 32 2.0 mean 1777 CHIP:H3K27ac:C57BL/6 intestine embryo (15.5 days) +1778 ENCFF966UNT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR599PKR/summary/ENCFF966UNT.w5 128 32 2.0 mean 1778 CHIP:H3K36me3:C57BL/6 stomach embryo (15.5 days) +1779 ENCFF296YDU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR604AMS/summary/ENCFF296YDU.w5 128 32 2.0 mean 1779 CHIP:H3K4me2:C57BL/6 intestine embryo (15.5 days) +1780 ENCFF965EJR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR604DCI/summary/ENCFF965EJR.w5 128 32 2.0 mean 1780 CHIP:H3K36me3:C57BL/6 embryonic facial prominence embryo (14.5 days) +1781 ENCFF738IHC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR604XDL/summary/ENCFF738IHC.w5 128 32 2.0 mean 1781 CHIP:MAFK:129 ES-E14 +1782 ENCFF464IYE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR606AJV/summary/ENCFF464IYE.w5 128 32 2.0 mean 1782 CHIP:H3K36me3:C57BL/6 embryonic facial prominence embryo (15.5 days) +1783 ENCFF705MBY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR607HIQ/summary/ENCFF705MBY.w5 128 32 2.0 mean 1783 CHIP:H3K9ac:C57BL/6 liver embryo (12.5 days) +1784 ENCFF089CJB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR607QZE/summary/ENCFF089CJB.w5 128 32 2.0 mean 1784 CHIP:H3K27me3:C57BL/6 kidney embryo (16.5 days) +1785 ENCFF894YHQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR608QBO/summary/ENCFF894YHQ.w5 128 32 2.0 mean 1785 CHIP:H3K4me1:C57BL/6 hindbrain embryo (10.5 days) +1786 ENCFF588AKC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR616TJM/summary/ENCFF588AKC.w5 128 32 2.0 mean 1786 CHIP:H3K27ac:C57BL/6 liver postnatal (0 days) +1787 ENCFF925YTU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR617VBE/summary/ENCFF925YTU.w5 128 32 2.0 mean 1787 CHIP:H3K4me1:C57BL/6 hindbrain postnatal (0 days) +1788 ENCFF750BCM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR629AFL/summary/ENCFF750BCM.w5 128 32 2.0 mean 1788 CHIP:H3K9ac:C57BL/6 limb embryo (12.5 days) +1789 ENCFF824UJK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR630AOU/summary/ENCFF824UJK.w5 128 32 2.0 mean 1789 CHIP:H3K9me3:C57BL/6 stomach embryo (16.5 days) +1790 ENCFF477XJO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR631KAJ/summary/ENCFF477XJO.w5 128 32 2.0 mean 1790 CHIP:H3K36me3:C57BL/6 heart embryo (11.5 days) +1791 ENCFF977AQW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR631QAE/summary/ENCFF977AQW.w5 128 32 2.0 mean 1791 CHIP:H3K4me2:C57BL/6 limb embryo (14.5 days) +1792 ENCFF137LNV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR634GFJ/summary/ENCFF137LNV.w5 128 32 2.0 mean 1792 CHIP:H3K9me3:C57BL/6 forebrain embryo (12.5 days) +1793 ENCFF644PLR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR637CCT/summary/ENCFF644PLR.w5 128 32 2.0 mean 1793 CHIP:H3K4me3:C57BL/6 midbrain embryo (16.5 days) +1794 ENCFF829GXB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR639DND/summary/ENCFF829GXB.w5 128 32 2.0 mean 1794 CHIP:H3K27ac:C57BL/6 intestine embryo (16.5 days) +1795 ENCFF769VJF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR639IQR/summary/ENCFF769VJF.w5 128 32 2.0 mean 1795 CHIP:H3K36me3:C57BL/6 limb embryo (13.5 days) +1796 ENCFF758CLJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR639KSW/summary/ENCFF758CLJ.w5 128 32 2.0 mean 1796 CHIP:H3K9ac:C57BL/6 hindbrain embryo (15.5 days) +1797 ENCFF152GZF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR641EME/summary/ENCFF152GZF.w5 128 32 2.0 mean 1797 CHIP:H3K36me3:C57BL/6 heart embryo (14.5 days) +1798 ENCFF512TCI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR642VYW/summary/ENCFF512TCI.w5 128 32 2.0 mean 1798 CHIP:H3K27ac:C57BL/6 intestine postnatal (0 days) +1799 ENCFF361SUJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR642ZLN/summary/ENCFF361SUJ.w5 128 32 2.0 mean 1799 CHIP:H3K27me3:C57BL/6 liver embryo (12.5 days) +1800 ENCFF140IVD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR645ETR/summary/ENCFF140IVD.w5 128 32 2.0 mean 1800 CHIP:H3K4me1:C57BL/6 embryonic facial prominence embryo (12.5 days) +1801 ENCFF109NPC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR647OTD/summary/ENCFF109NPC.w5 128 32 2.0 mean 1801 CHIP:H3K4me3:C57BL/6 heart embryo (15.5 days) +1802 ENCFF019ABU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR650UAC/summary/ENCFF019ABU.w5 128 32 2.0 mean 1802 CHIP:H3K4me2:C57BL/6 heart embryo (13.5 days) +1803 ENCFF615IVU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR653AVN/summary/ENCFF615IVU.w5 128 32 2.0 mean 1803 CHIP:H3K4me3:C57BL/6 liver postnatal (0 days) +1804 ENCFF306YUK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR654VMK/summary/ENCFF306YUK.w5 128 32 2.0 mean 1804 CHIP:H3K4me3:C57BL/6 limb embryo (11.5 days) +1805 ENCFF555EBK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR656AMS/summary/ENCFF555EBK.w5 128 32 2.0 mean 1805 CHIP:H3K36me3:C57BL/6 liver postnatal (0 days) +1806 ENCFF935NKI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR658BBG/summary/ENCFF935NKI.w5 128 32 2.0 mean 1806 CHIP:H3K27me3:C57BL/6 forebrain embryo (16.5 days) +1807 ENCFF774ZAY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR658TDS/summary/ENCFF774ZAY.w5 128 32 2.0 mean 1807 CHIP:H3K4me2:C57BL/6 kidney embryo (14.5 days) +1808 ENCFF673JMS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR663VWL/summary/ENCFF673JMS.w5 128 32 2.0 mean 1808 CHIP:H3K4me1:C57BL/6 heart embryo (13.5 days) +1809 ENCFF718RMZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR668BBX/summary/ENCFF718RMZ.w5 128 32 2.0 mean 1809 CHIP:H3K9me3:C57BL/6 forebrain embryo (15.5 days) +1810 ENCFF696ITD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR669AQL/summary/ENCFF696ITD.w5 128 32 2.0 mean 1810 CHIP:H3K4me3:C57BL/6 kidney embryo (14.5 days) +1811 ENCFF426WOD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR670YXP/summary/ENCFF426WOD.w5 128 32 2.0 mean 1811 CHIP:H3K36me3:C57BL/6 liver embryo (14.5 days) +1812 ENCFF416UPA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR671NSS/summary/ENCFF416UPA.w5 128 32 2.0 mean 1812 CHIP:H3K27ac:C57BL/6 midbrain embryo (13.5 days) +1813 ENCFF363IUI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR672UQX/summary/ENCFF363IUI.w5 128 32 2.0 mean 1813 CHIP:H3K9me3:C57BL/6 intestine postnatal (0 days) +1814 ENCFF961KLD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR672ZXY/summary/ENCFF961KLD.w5 128 32 2.0 mean 1814 CHIP:H3K27ac:C57BL/6 midbrain postnatal (0 days) +1815 ENCFF317OTS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR674PZU/summary/ENCFF317OTS.w5 128 32 2.0 mean 1815 CHIP:H3K4me1:C57BL/6 kidney postnatal (0 days) +1816 ENCFF804EYG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR675HDX/summary/ENCFF804EYG.w5 128 32 2.0 mean 1816 CHIP:H3K27ac:C57BL/6 heart postnatal (0 days) +1817 ENCFF500SSQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR677HXC/summary/ENCFF500SSQ.w5 128 32 2.0 mean 1817 CHIP:CTCF:C57BL/6 forebrain postnatal (0 days) +1818 ENCFF176KNA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR677SIH/summary/ENCFF176KNA.w5 128 32 2.0 mean 1818 CHIP:CTCF:C57BL/6 lung embryo (14.5 days) +1819 ENCFF298SER /home/drk/tillage/datasets/mouse/chip/encode/ENCSR678FIT/summary/ENCFF298SER.w5 128 32 2.0 mean 1819 CHIP:H3K4me1:C57BL/6 midbrain embryo (16.5 days) +1820 ENCFF309WXH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR684UWM/summary/ENCFF309WXH.w5 128 32 2.0 mean 1820 CHIP:H3K4me3:C57BL/6 stomach embryo (16.5 days) +1821 ENCFF331OQE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR688ZOR/summary/ENCFF331OQE.w5 128 32 2.0 mean 1821 CHIP:H3K4me3:C57BL/6 heart embryo (12.5 days) +1822 ENCFF463QJN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR689NXE/summary/ENCFF463QJN.w5 128 32 2.0 mean 1822 CHIP:H3K9me3:C57BL/6 hindbrain embryo (14.5 days) +1823 ENCFF440SCC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR691NQH/summary/ENCFF440SCC.w5 128 32 2.0 mean 1823 CHIP:H3K27ac:C57BL/6 forebrain embryo (15.5 days) +1824 ENCFF690UIX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR693UWX/summary/ENCFF690UIX.w5 128 32 2.0 mean 1824 CHIP:H3K4me2:C57BL/6 liver embryo (11.5 days) +1825 ENCFF124AFN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR695FPP/summary/ENCFF124AFN.w5 128 32 2.0 mean 1825 CHIP:H3K4me1:C57BL/6 hindbrain embryo (11.5 days) +1826 ENCFF814OZR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR698WCJ/summary/ENCFF814OZR.w5 128 32 2.0 mean 1826 CHIP:H3K9me3:C57BL/6 intestine embryo (14.5 days) +1827 ENCFF870SQY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR699XHY/summary/ENCFF870SQY.w5 128 32 2.0 mean 1827 CHIP:H3K27ac:C57BL/6 heart embryo (13.5 days) +1828 ENCFF422AUK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR701GKO/summary/ENCFF422AUK.w5 128 32 2.0 mean 1828 CHIP:H3K27me3:C57BL/6 embryonic facial prominence embryo (11.5 days) +1829 ENCFF997AJH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR702JYV/summary/ENCFF997AJH.w5 128 32 2.0 mean 1829 CHIP:H3K36me3:C57BL/6 midbrain embryo (14.5 days) +1830 ENCFF672EXE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR703TBR/summary/ENCFF672EXE.w5 128 32 2.0 mean 1830 CHIP:H3K9me3:C57BL/6 hindbrain embryo (10.5 days) +1831 ENCFF293PAY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR704IWS/summary/ENCFF293PAY.w5 128 32 2.0 mean 1831 CHIP:H3K4me3:C57BL/6 neural tube embryo (13.5 days) +1832 ENCFF377WET /home/drk/tillage/datasets/mouse/chip/encode/ENCSR705HGT/summary/ENCFF377WET.w5 128 32 2.0 mean 1832 CHIP:USF1:DBA/2 MEL cell line +1833 ENCFF769YBW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR706PSQ/summary/ENCFF769YBW.w5 128 32 2.0 mean 1833 CHIP:H3K4me2:C57BL/6 midbrain embryo (13.5 days) +1834 ENCFF933OKD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR706SZI/summary/ENCFF933OKD.w5 128 32 2.0 mean 1834 CHIP:H3K9me3:C57BL/6 embryonic facial prominence embryo (14.5 days) +1835 ENCFF399YKQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR707DZS/summary/ENCFF399YKQ.w5 128 32 2.0 mean 1835 CHIP:H3K4me2:C57BL/6 midbrain embryo (12.5 days) +1836 ENCFF779RNO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR709CLU/summary/ENCFF779RNO.w5 128 32 2.0 mean 1836 CHIP:H3K27me3:C57BL/6 limb embryo (13.5 days) +1837 ENCFF297RMH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR711SVB/summary/ENCFF297RMH.w5 128 32 2.0 mean 1837 CHIP:H3K27ac:C57BL/6 kidney embryo (15.5 days) +1838 ENCFF391DVK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR717XCU/summary/ENCFF391DVK.w5 128 32 2.0 mean 1838 CHIP:H3K9ac:C57BL/6 forebrain embryo (15.5 days) +1839 ENCFF810BQP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR718CAJ/summary/ENCFF810BQP.w5 128 32 2.0 mean 1839 CHIP:H3K9ac:C57BL/6 hindbrain embryo (16.5 days) +1840 ENCFF638WTR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR721MSV/summary/ENCFF638WTR.w5 128 32 2.0 mean 1840 CHIP:H3K9ac:C57BL/6 hindbrain postnatal (0 days) +1841 ENCFF863DGY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR731KRE/summary/ENCFF863DGY.w5 128 32 2.0 mean 1841 CHIP:H3K4me3:C57BL/6 hindbrain embryo (10.5 days) +1842 ENCFF581EFO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR731OMP/summary/ENCFF581EFO.w5 128 32 2.0 mean 1842 CHIP:H3K4me2:C57BL/6 liver embryo (13.5 days) +1843 ENCFF923XEA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR734IEL/summary/ENCFF923XEA.w5 128 32 2.0 mean 1843 CHIP:H3K9ac:C57BL/6 hindbrain embryo (11.5 days) +1844 ENCFF063ZLI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR735VEJ/summary/ENCFF063ZLI.w5 128 32 2.0 mean 1844 CHIP:H3K27me3:C57BL/6 forebrain embryo (10.5 days) +1845 ENCFF760VJL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR736GVO/summary/ENCFF760VJL.w5 128 32 2.0 mean 1845 CHIP:H3K27me3:C57BL/6 forebrain embryo (13.5 days) +1846 ENCFF075BNF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR737JHU/summary/ENCFF075BNF.w5 128 32 2.0 mean 1846 CHIP:H3K9me3:C57BL/6 forebrain embryo (10.5 days) +1847 ENCFF008XYM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR737QWV/summary/ENCFF008XYM.w5 128 32 2.0 mean 1847 CHIP:H3K27ac:C57BL/6 limb embryo (12.5 days) +1848 ENCFF880ZZM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR739DVM/summary/ENCFF880ZZM.w5 128 32 2.0 mean 1848 CHIP:H3K4me3:C57BL/6 forebrain embryo (11.5 days) +1849 ENCFF201DBX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR740DYF/summary/ENCFF201DBX.w5 128 32 2.0 mean 1849 CHIP:H3K27me3:C57BL/6 kidney embryo (15.5 days) +1850 ENCFF113RHY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR743ZJL/summary/ENCFF113RHY.w5 128 32 2.0 mean 1850 CHIP:IRF4:B10.H-2aH-4bp/Wts CH12.LX +1851 ENCFF209EYY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR744LCN/summary/ENCFF209EYY.w5 128 32 2.0 mean 1851 CHIP:H3K9ac:C57BL/6 lung embryo (15.5 days) +1852 ENCFF652SDD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR747VPY/summary/ENCFF652SDD.w5 128 32 2.0 mean 1852 CHIP:H3K4me2:C57BL/6 heart embryo (16.5 days) +1853 ENCFF102KDV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR747ZXL/summary/ENCFF102KDV.w5 128 32 2.0 mean 1853 CHIP:H3K36me3:C57BL/6 midbrain embryo (10.5 days) +1854 ENCFF311RNJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR748BQH/summary/ENCFF311RNJ.w5 128 32 2.0 mean 1854 CHIP:H3K4me2:C57BL/6 heart embryo (11.5 days) +1855 ENCFF463JGO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR749PZB/summary/ENCFF463JGO.w5 128 32 2.0 mean 1855 CHIP:H3K9ac:C57BL/6 intestine embryo (16.5 days) +1856 ENCFF870LES /home/drk/tillage/datasets/mouse/chip/encode/ENCSR751AAB/summary/ENCFF870LES.w5 128 32 2.0 mean 1856 CHIP:H3K4me1:C57BL/6 limb embryo (12.5 days) +1857 ENCFF702AEO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR751HEG/summary/ENCFF702AEO.w5 128 32 2.0 mean 1857 CHIP:H3K9ac:C57BL/6 heart embryo (11.5 days) +1858 ENCFF878NXY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR754AXU/summary/ENCFF878NXY.w5 128 32 2.0 mean 1858 CHIP:H3K9me3:C57BL/6 midbrain embryo (13.5 days) +1859 ENCFF055NHC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR755PMX/summary/ENCFF055NHC.w5 128 32 2.0 mean 1859 CHIP:H3K9ac:C57BL/6 embryonic facial prominence embryo (15.5 days) +1860 ENCFF374BEH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR755SGK/summary/ENCFF374BEH.w5 128 32 2.0 mean 1860 CHIP:H3K4me2:C57BL/6 midbrain embryo (14.5 days) +1861 ENCFF749LHU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR758FWH/summary/ENCFF749LHU.w5 128 32 2.0 mean 1861 CHIP:H3K27me3:C57BL/6 lung embryo (16.5 days) +1862 ENCFF173LCU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR758UAI/summary/ENCFF173LCU.w5 128 32 2.0 mean 1862 CHIP:H3K4me2:C57BL/6 embryonic facial prominence embryo (13.5 days) +1863 ENCFF084MIV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR759RVE/summary/ENCFF084MIV.w5 128 32 2.0 mean 1863 CHIP:H3K4me2:C57BL/6 limb embryo (13.5 days) +1864 ENCFF741GHS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR764UIE/summary/ENCFF741GHS.w5 128 32 2.0 mean 1864 CHIP:H3K36me3:C57BL/6 midbrain embryo (12.5 days) +1865 ENCFF675TVC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR765JWZ/summary/ENCFF675TVC.w5 128 32 2.0 mean 1865 CHIP:H3K4me3:C57BL/6 embryonic facial prominence embryo (10.5 days) +1866 ENCFF251ZXL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR765RPR/summary/ENCFF251ZXL.w5 128 32 2.0 mean 1866 CHIP:EP300:C57BL/6 liver postnatal (0 days) +1867 ENCFF014HTI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR765SJF/summary/ENCFF014HTI.w5 128 32 2.0 mean 1867 CHIP:H3K27me3:C57BL/6 limb embryo (14.5 days) +1868 ENCFF432ADZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR765UOU/summary/ENCFF432ADZ.w5 128 32 2.0 mean 1868 CHIP:H3K9me3:C57BL/6 midbrain embryo (12.5 days) +1869 ENCFF190AYI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR769ITO/summary/ENCFF190AYI.w5 128 32 2.0 mean 1869 CHIP:H3K9me3:C57BL/6 midbrain embryo (10.5 days) +1870 ENCFF335NBL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR770OXU/summary/ENCFF335NBL.w5 128 32 2.0 mean 1870 CHIP:H3K4me1:C57BL/6 liver embryo (12.5 days) +1871 ENCFF035FHN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR771ILS/summary/ENCFF035FHN.w5 128 32 2.0 mean 1871 CHIP:H3K4me2:C57BL/6 hindbrain postnatal (0 days) +1872 ENCFF368BVK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR773EGV/summary/ENCFF368BVK.w5 128 32 2.0 mean 1872 CHIP:H3K9me3:C57BL/6 lung embryo (15.5 days) +1873 ENCFF713BVE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR776QRL/summary/ENCFF713BVE.w5 128 32 2.0 mean 1873 CHIP:H3K36me3:C57BL/6 neural tube embryo (14.5 days) +1874 ENCFF110NKA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR776RJR/summary/ENCFF110NKA.w5 128 32 2.0 mean 1874 CHIP:H3K36me3:C57BL/6 lung embryo (15.5 days) +1875 ENCFF338GTZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR777VNA/summary/ENCFF338GTZ.w5 128 32 2.0 mean 1875 CHIP:EP300:C57BL/6 heart postnatal (0 days) +1876 ENCFF232UNC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR779CZG/summary/ENCFF232UNC.w5 128 32 2.0 mean 1876 CHIP:NANOG:129 E14TG2a.4 +1877 ENCFF442KVI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR779MSI/summary/ENCFF442KVI.w5 128 32 2.0 mean 1877 CHIP:H3K9me3:C57BL/6 embryonic facial prominence embryo (10.5 days) +1878 ENCFF582THV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR780NWL/summary/ENCFF582THV.w5 128 32 2.0 mean 1878 CHIP:H3K9ac:C57BL/6 embryonic facial prominence embryo (13.5 days) +1879 ENCFF280UTR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR781ORQ/summary/ENCFF280UTR.w5 128 32 2.0 mean 1879 CHIP:H3K27me3:C57BL/6 heart embryo (16.5 days) +1880 ENCFF889KFS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR782DEA/summary/ENCFF889KFS.w5 128 32 2.0 mean 1880 CHIP:H3K4me3:C57BL/6 heart embryo (10.5 days) +1881 ENCFF513MGM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR782DGO/summary/ENCFF513MGM.w5 128 32 2.0 mean 1881 CHIP:H3K4me1:C57BL/6 heart embryo (10.5 days) +1882 ENCFF660OFZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR782SUW/summary/ENCFF660OFZ.w5 128 32 2.0 mean 1882 CHIP:H3K4me2:C57BL/6 forebrain embryo (12.5 days) +1883 ENCFF631JYQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR784TLR/summary/ENCFF631JYQ.w5 128 32 2.0 mean 1883 CHIP:H3K27ac:C57BL/6 hindbrain embryo (12.5 days) +1884 ENCFF845OXU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR790LOX/summary/ENCFF845OXU.w5 128 32 2.0 mean 1884 CHIP:H3K27me3:C57BL/6 heart postnatal (0 days) +1885 ENCFF164OJX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR795UOC/summary/ENCFF164OJX.w5 128 32 2.0 mean 1885 CHIP:H3K9ac:C57BL/6 liver embryo (16.5 days) +1886 ENCFF011VMX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR797EYS/summary/ENCFF011VMX.w5 128 32 2.0 mean 1886 CHIP:H3K27ac:C57BL/6 hindbrain embryo (16.5 days) +1887 ENCFF094RXG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR798VHN/summary/ENCFF094RXG.w5 128 32 2.0 mean 1887 CHIP:H3K9ac:C57BL/6 stomach embryo (16.5 days) +1888 ENCFF526VAG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR802RET/summary/ENCFF526VAG.w5 128 32 2.0 mean 1888 CHIP:H3K27ac:C57BL/6 liver embryo (16.5 days) +1889 ENCFF199YQB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR806JZK/summary/ENCFF199YQB.w5 128 32 2.0 mean 1889 CHIP:MEF2A:B10.H-2aH-4bp/Wts CH12.LX +1890 ENCFF032RGE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR809NWL/summary/ENCFF032RGE.w5 128 32 2.0 mean 1890 CHIP:H3K36me3:C57BL/6 hindbrain embryo (15.5 days) +1891 ENCFF623CRW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR810EOA/summary/ENCFF623CRW.w5 128 32 2.0 mean 1891 CHIP:H3K4me1:C57BL/6 limb embryo (15.5 days) +1892 ENCFF596BHZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR813SCQ/summary/ENCFF596BHZ.w5 128 32 2.0 mean 1892 CHIP:H3K27ac:C57BL/6 embryonic facial prominence embryo (12.5 days) +1893 ENCFF767LND /home/drk/tillage/datasets/mouse/chip/encode/ENCSR821GHK/summary/ENCFF767LND.w5 128 32 2.0 mean 1893 CHIP:H3K9me3:C57BL/6 kidney embryo (16.5 days) +1894 ENCFF451RNW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR825OWH/summary/ENCFF451RNW.w5 128 32 2.0 mean 1894 CHIP:H3K4me1:C57BL/6 lung embryo (14.5 days) +1895 ENCFF266KHC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR825ZJV/summary/ENCFF266KHC.w5 128 32 2.0 mean 1895 CHIP:H3K27ac:C57BL/6 forebrain embryo (10.5 days) +1896 ENCFF555JQE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR826TJQ/summary/ENCFF555JQE.w5 128 32 2.0 mean 1896 CHIP:H3K4me1:C57BL/6 neural tube embryo (15.5 days) +1897 ENCFF702GVB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR829YGD/summary/ENCFF702GVB.w5 128 32 2.0 mean 1897 CHIP:H3K4me1:C57BL/6 intestine embryo (16.5 days) +1898 ENCFF074EMQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR831EKS/summary/ENCFF074EMQ.w5 128 32 2.0 mean 1898 CHIP:H3K4me2:C57BL/6 hindbrain embryo (15.5 days) +1899 ENCFF900BKA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR831YAX/summary/ENCFF900BKA.w5 128 32 2.0 mean 1899 CHIP:H3K27me3:C57BL/6 forebrain embryo (14.5 days) +1900 ENCFF314QNW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR835BZO/summary/ENCFF314QNW.w5 128 32 2.0 mean 1900 CHIP:H3K4me3:C57BL/6 forebrain embryo (13.5 days) +1901 ENCFF637PDW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR837SKW/summary/ENCFF637PDW.w5 128 32 2.0 mean 1901 CHIP:H3K27me3:C57BL/6 hindbrain embryo (13.5 days) +1902 ENCFF916AGC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR839WFP/summary/ENCFF916AGC.w5 128 32 2.0 mean 1902 CHIP:H3K4me3:C57BL/6 lung embryo (14.5 days) +1903 ENCFF906TTD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR840QFC/summary/ENCFF906TTD.w5 128 32 2.0 mean 1903 CHIP:H3K36me3:C57BL/6 heart embryo (12.5 days) +1904 ENCFF801FFG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR846PJO/summary/ENCFF801FFG.w5 128 32 2.0 mean 1904 CHIP:H3K27ac:C57BL/6 heart embryo (16.5 days) +1905 ENCFF477UFW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR846ZCW/summary/ENCFF477UFW.w5 128 32 2.0 mean 1905 CHIP:H3K9me3:C57BL/6 liver embryo (11.5 days) +1906 ENCFF736SAK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR846ZTT/summary/ENCFF736SAK.w5 128 32 2.0 mean 1906 CHIP:H3K4me2:C57BL/6 kidney embryo (16.5 days) +1907 ENCFF550PJQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR855NKG/summary/ENCFF550PJQ.w5 128 32 2.0 mean 1907 CHIP:H3K9me3:C57BL/6 liver embryo (15.5 days) +1908 ENCFF376ZJO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR857GQI/summary/ENCFF376ZJO.w5 128 32 2.0 mean 1908 CHIP:H3K27me3:C57BL/6 midbrain embryo (15.5 days) +1909 ENCFF289ATH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR857MYS/summary/ENCFF289ATH.w5 128 32 2.0 mean 1909 CHIP:H3K9me3:129 E14TG2a.4 +1910 ENCFF283EBS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR858AUB/summary/ENCFF283EBS.w5 128 32 2.0 mean 1910 CHIP:H3K4me1:C57BL/6 lung embryo (15.5 days) +1911 ENCFF024LWP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR861MUP/summary/ENCFF024LWP.w5 128 32 2.0 mean 1911 CHIP:H3K27me3:C57BL/6 lung embryo (15.5 days) +1912 ENCFF502YGZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR863VHE/summary/ENCFF502YGZ.w5 128 32 2.0 mean 1912 CHIP:H3K27ac:C57BL/6 limb embryo (10.5 days) +1913 ENCFF377LTJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR864DSI/summary/ENCFF377LTJ.w5 128 32 2.0 mean 1913 CHIP:H3K9me3:C57BL/6 midbrain embryo (15.5 days) +1914 ENCFF462IYV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR864OWV/summary/ENCFF462IYV.w5 128 32 2.0 mean 1914 CHIP:H3K36me3:C57BL/6 hindbrain embryo (16.5 days) +1915 ENCFF345KLB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR868FWZ/summary/ENCFF345KLB.w5 128 32 2.0 mean 1915 CHIP:H3K9ac:C57BL/6 liver embryo (13.5 days) +1916 ENCFF706SAT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR871CGP/summary/ENCFF706SAT.w5 128 32 2.0 mean 1916 CHIP:H3K36me3:C57BL/6 limb embryo (14.5 days) +1917 ENCFF270YCY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR871KVM/summary/ENCFF270YCY.w5 128 32 2.0 mean 1917 CHIP:H3K27me3:C57BL/6 intestine embryo (15.5 days) +1918 ENCFF923CRK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR871YCT/summary/ENCFF923CRK.w5 128 32 2.0 mean 1918 CHIP:H3K36me3:C57BL/6 limb embryo (11.5 days) +1919 ENCFF454YLV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR872WGX/summary/ENCFF454YLV.w5 128 32 2.0 mean 1919 CHIP:H3K36me3:C57BL/6 stomach embryo (16.5 days) +1920 ENCFF059NRZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR875KRK/summary/ENCFF059NRZ.w5 128 32 2.0 mean 1920 CHIP:H3K4me1:C57BL/6 forebrain embryo (15.5 days) +1921 ENCFF104GKL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR875ZAO/summary/ENCFF104GKL.w5 128 32 2.0 mean 1921 CHIP:H3K4me3:C57BL/6 heart postnatal (0 days) +1922 ENCFF665XFW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR883ASH/summary/ENCFF665XFW.w5 128 32 2.0 mean 1922 CHIP:H3K4me1:C57BL/6 kidney embryo (16.5 days) +1923 ENCFF924XGB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR884MYD/summary/ENCFF924XGB.w5 128 32 2.0 mean 1923 CHIP:H3K27ac:C57BL/6 lung postnatal (0 days) +1924 ENCFF911YHS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR886DWH/summary/ENCFF911YHS.w5 128 32 2.0 mean 1924 CHIP:H3K4me2:C57BL/6 embryonic facial prominence embryo (14.5 days) +1925 ENCFF917SXQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR886IHN/summary/ENCFF917SXQ.w5 128 32 2.0 mean 1925 CHIP:H3K4me1:C57BL/6 heart embryo (11.5 days) +1926 ENCFF720WRB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR889MYY/summary/ENCFF720WRB.w5 128 32 2.0 mean 1926 CHIP:H3K9ac:C57BL/6 liver embryo (15.5 days) +1927 ENCFF704GSX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR891SAW/summary/ENCFF704GSX.w5 128 32 2.0 mean 1927 CHIP:H3K27ac:C57BL/6 neural tube embryo (12.5 days) +1928 ENCFF052RHA /home/drk/tillage/datasets/mouse/chip/encode/ENCSR894JAS/summary/ENCFF052RHA.w5 128 32 2.0 mean 1928 CHIP:H3K9me3:C57BL/6 forebrain embryo (11.5 days) +1929 ENCFF018LWM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR895BMP/summary/ENCFF018LWM.w5 128 32 2.0 mean 1929 CHIP:H3K27ac:C57BL/6 lung embryo (15.5 days) +1930 ENCFF897QOI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR897WBY/summary/ENCFF897QOI.w5 128 32 2.0 mean 1930 CHIP:H3K27ac:C57BL/6 limb embryo (11.5 days) +1931 ENCFF753BLB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR898WIS/summary/ENCFF753BLB.w5 128 32 2.0 mean 1931 CHIP:H3K9ac:C57BL/6 liver embryo (14.5 days) +1932 ENCFF402FVP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR905FFU/summary/ENCFF402FVP.w5 128 32 2.0 mean 1932 CHIP:H3K27ac:C57BL/6 limb embryo (13.5 days) +1933 ENCFF177GZH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR906QEK/summary/ENCFF177GZH.w5 128 32 2.0 mean 1933 CHIP:TCF12:B10.H-2aH-4bp/Wts CH12.LX +1934 ENCFF217MZD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR906RTN/summary/ENCFF217MZD.w5 128 32 2.0 mean 1934 CHIP:H3K27me3:C57BL/6 limb embryo (10.5 days) +1935 ENCFF924CKT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR906UJW/summary/ENCFF924CKT.w5 128 32 2.0 mean 1935 CHIP:H3K4me2:C57BL/6 forebrain postnatal (0 days) +1936 ENCFF043PGB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR907CPZ/summary/ENCFF043PGB.w5 128 32 2.0 mean 1936 CHIP:H3K4me1:C57BL/6 stomach embryo (16.5 days) +1937 ENCFF586VVE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR912IXU/summary/ENCFF586VVE.w5 128 32 2.0 mean 1937 CHIP:H3K9ac:C57BL/6 kidney embryo (16.5 days) +1938 ENCFF091TEN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR914QGB/summary/ENCFF091TEN.w5 128 32 2.0 mean 1938 CHIP:H3K4me1:C57BL/6 embryonic facial prominence embryo (10.5 days) +1939 ENCFF703KOJ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR916CBN/summary/ENCFF703KOJ.w5 128 32 2.0 mean 1939 CHIP:H3K4me3:C57BL/6 stomach postnatal (0 days) +1940 ENCFF254FBD /home/drk/tillage/datasets/mouse/chip/encode/ENCSR918ZSJ/summary/ENCFF254FBD.w5 128 32 2.0 mean 1940 CHIP:H3K4me1:C57BL/6 neural tube embryo (13.5 days) +1941 ENCFF815RWW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR919DDC/summary/ENCFF815RWW.w5 128 32 2.0 mean 1941 CHIP:H3K36me3:C57BL/6 intestine embryo (15.5 days) +1942 ENCFF632RYE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR921ILW/summary/ENCFF632RYE.w5 128 32 2.0 mean 1942 CHIP:H3K4me1:C57BL/6 hindbrain embryo (15.5 days) +1943 ENCFF045FHU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR924FJQ/summary/ENCFF045FHU.w5 128 32 2.0 mean 1943 CHIP:H3K4me2:C57BL/6 lung embryo (14.5 days) +1944 ENCFF703BHN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR928CYU/summary/ENCFF703BHN.w5 128 32 2.0 mean 1944 CHIP:H3K4me3:C57BL/6 hindbrain embryo (11.5 days) +1945 ENCFF386EYG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR928IYZ/summary/ENCFF386EYG.w5 128 32 2.0 mean 1945 CHIP:H3K27me3:C57BL/6 neural tube embryo (15.5 days) +1946 ENCFF908XOQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR929GXP/summary/ENCFF908XOQ.w5 128 32 2.0 mean 1946 CHIP:H3K27me3:C57BL/6 midbrain embryo (14.5 days) +1947 ENCFF604BOY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR929SEW/summary/ENCFF604BOY.w5 128 32 2.0 mean 1947 CHIP:H3K27ac:C57BL/6 stomach embryo (15.5 days) +1948 ENCFF197JHX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR930AFR/summary/ENCFF197JHX.w5 128 32 2.0 mean 1948 CHIP:H3K9me3:C57BL/6 neural tube embryo (12.5 days) +1949 ENCFF559KOX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR932BNP/summary/ENCFF559KOX.w5 128 32 2.0 mean 1949 CHIP:H3K36me3:C57BL/6 liver embryo (11.5 days) +1950 ENCFF840MYF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR938MUD/summary/ENCFF840MYF.w5 128 32 2.0 mean 1950 CHIP:H3K4me3:C57BL/6 limb embryo (12.5 days) +1951 ENCFF629BDF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR940CMI/summary/ENCFF629BDF.w5 128 32 2.0 mean 1951 CHIP:H3K4me1:C57BL/6 stomach postnatal (0 days) +1952 ENCFF951TKF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR943QUH/summary/ENCFF951TKF.w5 128 32 2.0 mean 1952 CHIP:H3K9me3:C57BL/6 limb embryo (12.5 days) +1953 ENCFF375ONW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR944XPB/summary/ENCFF375ONW.w5 128 32 2.0 mean 1953 CHIP:H3K27me3:C57BL/6 forebrain embryo (11.5 days) +1954 ENCFF039QOO /home/drk/tillage/datasets/mouse/chip/encode/ENCSR946JEA/summary/ENCFF039QOO.w5 128 32 2.0 mean 1954 CHIP:H3K27me3:C57BL/6 stomach embryo (16.5 days) +1955 ENCFF224IHW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR946RLM/summary/ENCFF224IHW.w5 128 32 2.0 mean 1955 CHIP:H3K9ac:C57BL/6 intestine postnatal (0 days) +1956 ENCFF672TTE /home/drk/tillage/datasets/mouse/chip/encode/ENCSR947QOH/summary/ENCFF672TTE.w5 128 32 2.0 mean 1956 CHIP:H3K4me2:C57BL/6 hindbrain embryo (13.5 days) +1957 ENCFF583JQM /home/drk/tillage/datasets/mouse/chip/encode/ENCSR948RYN/summary/ENCFF583JQM.w5 128 32 2.0 mean 1957 CHIP:H3K9ac:C57BL/6 liver embryo (11.5 days) +1958 ENCFF966CDF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR948VPV/summary/ENCFF966CDF.w5 128 32 2.0 mean 1958 CHIP:H3K9me3:C57BL/6 midbrain embryo (14.5 days) +1959 ENCFF446FGV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR951UWY/summary/ENCFF446FGV.w5 128 32 2.0 mean 1959 CHIP:H3K36me3:C57BL/6 midbrain postnatal (0 days) +1960 ENCFF455WJT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR953HNY/summary/ENCFF455WJT.w5 128 32 2.0 mean 1960 CHIP:H3K4me3:C57BL/6 kidney embryo (16.5 days) +1961 ENCFF224DGI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR953KTY/summary/ENCFF224DGI.w5 128 32 2.0 mean 1961 CHIP:H3K36me3:C57BL/6 intestine embryo (14.5 days) +1962 ENCFF787DNX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR953LFI/summary/ENCFF787DNX.w5 128 32 2.0 mean 1962 CHIP:H3K27me3:C57BL/6 hindbrain embryo (14.5 days) +1963 ENCFF802SIF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR956ZCG/summary/ENCFF802SIF.w5 128 32 2.0 mean 1963 CHIP:H3K9ac:C57BL/6 heart embryo (15.5 days) +1964 ENCFF937IQZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR961MEP/summary/ENCFF937IQZ.w5 128 32 2.0 mean 1964 CHIP:H3K9me3:C57BL/6 limb embryo (11.5 days) +1965 ENCFF420WNH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR961PNM/summary/ENCFF420WNH.w5 128 32 2.0 mean 1965 CHIP:H3K9ac:C57BL/6 kidney embryo (15.5 days) +1966 ENCFF996XRG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR962MBB/summary/ENCFF996XRG.w5 128 32 2.0 mean 1966 CHIP:H3K36me3:C57BL/6 hindbrain embryo (10.5 days) +1967 ENCFF551QTH /home/drk/tillage/datasets/mouse/chip/encode/ENCSR963OLG/summary/ENCFF551QTH.w5 128 32 2.0 mean 1967 CHIP:H3K27me3:C57BL/6 heart embryo (11.5 days) +1968 ENCFF708MBZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR964RRJ/summary/ENCFF708MBZ.w5 128 32 2.0 mean 1968 CHIP:H3K9ac:C57BL/6 embryonic facial prominence embryo (14.5 days) +1969 ENCFF046SEF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR965JWF/summary/ENCFF046SEF.w5 128 32 2.0 mean 1969 CHIP:H3K9me3:C57BL/6 heart postnatal (0 days) +1970 ENCFF541IJI /home/drk/tillage/datasets/mouse/chip/encode/ENCSR966AIB/summary/ENCFF541IJI.w5 128 32 2.0 mean 1970 CHIP:H3K27ac:C57BL/6 forebrain embryo (12.5 days) +1971 ENCFF483COK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR966RAG/summary/ENCFF483COK.w5 128 32 2.0 mean 1971 CHIP:H3K9ac:C57BL/6 liver postnatal (0 days) +1972 ENCFF293RMV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR966TCN/summary/ENCFF293RMV.w5 128 32 2.0 mean 1972 CHIP:H3K27me3:C57BL/6 midbrain embryo (10.5 days) +1973 ENCFF566GYY /home/drk/tillage/datasets/mouse/chip/encode/ENCSR968NPX/summary/ENCFF566GYY.w5 128 32 2.0 mean 1973 CHIP:H3K9ac:C57BL/6 heart embryo (12.5 days) +1974 ENCFF587CHP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR969CPK/summary/ENCFF587CHP.w5 128 32 2.0 mean 1974 CHIP:H3K27me3:C57BL/6 stomach postnatal (0 days) +1975 ENCFF703RAT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR969NXN/summary/ENCFF703RAT.w5 128 32 2.0 mean 1975 CHIP:H3K36me3:C57BL/6 lung embryo (16.5 days) +1976 ENCFF063VTT /home/drk/tillage/datasets/mouse/chip/encode/ENCSR972LUE/summary/ENCFF063VTT.w5 128 32 2.0 mean 1976 CHIP:H3K9me3:C57BL/6 hindbrain embryo (12.5 days) +1977 ENCFF789SIC /home/drk/tillage/datasets/mouse/chip/encode/ENCSR972MRN/summary/ENCFF789SIC.w5 128 32 2.0 mean 1977 CHIP:H3K36me3:DBA/2 MEL cell line +1978 ENCFF330KGP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR972MUO/summary/ENCFF330KGP.w5 128 32 2.0 mean 1978 CHIP:H3K36me3:C57BL/6 hindbrain embryo (13.5 days) +1979 ENCFF230BUR /home/drk/tillage/datasets/mouse/chip/encode/ENCSR972WEN/summary/ENCFF230BUR.w5 128 32 2.0 mean 1979 CHIP:H3K4me1:C57BL/6 embryonic facial prominence embryo (14.5 days) +1980 ENCFF237WUZ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR973AYQ/summary/ENCFF237WUZ.w5 128 32 2.0 mean 1980 CHIP:H3K4me2:C57BL/6 neural tube embryo (13.5 days) +1981 ENCFF193AYX /home/drk/tillage/datasets/mouse/chip/encode/ENCSR973SOG/summary/ENCFF193AYX.w5 128 32 2.0 mean 1981 CHIP:USF1:B10.H-2aH-4bp/Wts CH12.LX +1982 ENCFF633GEF /home/drk/tillage/datasets/mouse/chip/encode/ENCSR973UGS/summary/ENCFF633GEF.w5 128 32 2.0 mean 1982 CHIP:H3K27me3:C57BL/6 liver postnatal (0 days) +1983 ENCFF417JCK /home/drk/tillage/datasets/mouse/chip/encode/ENCSR975QEX/summary/ENCFF417JCK.w5 128 32 2.0 mean 1983 CHIP:H3K9me3:C57BL/6 stomach embryo (15.5 days) +1984 ENCFF260BXV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR975QSF/summary/ENCFF260BXV.w5 128 32 2.0 mean 1984 CHIP:H3K4me1:C57BL/6 forebrain embryo (11.5 days) +1985 ENCFF968CFU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR976GSO/summary/ENCFF968CFU.w5 128 32 2.0 mean 1985 CHIP:H3K9me3:C57BL/6 stomach embryo (14.5 days) +1986 ENCFF127DNN /home/drk/tillage/datasets/mouse/chip/encode/ENCSR977DCO/summary/ENCFF127DNN.w5 128 32 2.0 mean 1986 CHIP:H3K4me1:C57BL/6 neural tube embryo (14.5 days) +1987 ENCFF650BRL /home/drk/tillage/datasets/mouse/chip/encode/ENCSR980YXJ/summary/ENCFF650BRL.w5 128 32 2.0 mean 1987 CHIP:NRF1:B10.H-2aH-4bp/Wts CH12.LX +1988 ENCFF549QHQ /home/drk/tillage/datasets/mouse/chip/encode/ENCSR982LJQ/summary/ENCFF549QHQ.w5 128 32 2.0 mean 1988 CHIP:EP300:C57BL/6 liver embryo (14.5 days) +1989 ENCFF898VDV /home/drk/tillage/datasets/mouse/chip/encode/ENCSR984OHL/summary/ENCFF898VDV.w5 128 32 2.0 mean 1989 CHIP:H3K27me3:C57BL/6 hindbrain embryo (10.5 days) +1990 ENCFF068KSP /home/drk/tillage/datasets/mouse/chip/encode/ENCSR985UTP/summary/ENCFF068KSP.w5 128 32 2.0 mean 1990 CHIP:H3K4me3:C57BL/6 liver embryo (13.5 days) +1991 ENCFF581XWU /home/drk/tillage/datasets/mouse/chip/encode/ENCSR985ZTV/summary/ENCFF581XWU.w5 128 32 2.0 mean 1991 CHIP:CTCF:C57BL/6 midbrain postnatal (0 days) +1992 ENCFF199SCG /home/drk/tillage/datasets/mouse/chip/encode/ENCSR988BRP/summary/ENCFF199SCG.w5 128 32 2.0 mean 1992 CHIP:H3K27ac:C57BL/6 limb embryo (15.5 days) +1993 ENCFF773NIS /home/drk/tillage/datasets/mouse/chip/encode/ENCSR989LUY/summary/ENCFF773NIS.w5 128 32 2.0 mean 1993 CHIP:H3K27ac:C57BL/6 midbrain embryo (10.5 days) +1994 ENCFF727YWB /home/drk/tillage/datasets/mouse/chip/encode/ENCSR990JMN/summary/ENCFF727YWB.w5 128 32 2.0 mean 1994 CHIP:H3K4me3:C57BL/6 embryonic facial prominence embryo (15.5 days) +1995 ENCFF739NQW /home/drk/tillage/datasets/mouse/chip/encode/ENCSR992SHK/summary/ENCFF739NQW.w5 128 32 2.0 mean 1995 CHIP:H3K4me1:C57BL/6 forebrain embryo (10.5 days) +1996 GSM1094254 /home/drk/tillage/datasets/mouse/chip/geo/GSM1094254/summary/coverage.w5 256 64 1.0 sum 1996 CHIP:CEBPb:CEBPB_ChIP-seq / CEBPB_ChIP-seq / Primary dermal fibroblasts +1997 GSM1094255 /home/drk/tillage/datasets/mouse/chip/geo/GSM1094255/summary/coverage.w5 256 64 1.0 sum 1997 CHIP:CEBPb:CEBPB_Tg_ChIP-seq / CEBPB_Tg_ChIP-seq / Primary dermal fibroblasts +1998 GSM1094256 /home/drk/tillage/datasets/mouse/chip/geo/GSM1094256/summary/coverage.w5 256 64 1.0 sum 1998 CHIP:ATF4:ATF4_ChIP-seq / ATF4_ChIP-seq / Primary dermal fibroblasts +1999 GSM1176709 /home/drk/tillage/datasets/mouse/chip/geo/GSM1176709/summary/coverage.w5 256 64 1.0 sum 1999 CHIP:SREBP1:SREBP1 (ZT02) / SREBP1_ZT02, liver cells / . +2000 GSM1176710 /home/drk/tillage/datasets/mouse/chip/geo/GSM1176710/summary/coverage.w5 256 64 1.0 sum 2000 CHIP:SREBP1:SREBP1 (ZT06) / SREBP1_ZT06, liver cells / . +2001 GSM1176711 /home/drk/tillage/datasets/mouse/chip/geo/GSM1176711/summary/coverage.w5 256 64 1.0 sum 2001 CHIP:SREBP1:SREBP1 (ZT10) / SREBP1_ZT10, liver cells / . +2002 GSM1176712 /home/drk/tillage/datasets/mouse/chip/geo/GSM1176712/summary/coverage.w5 256 64 1.0 sum 2002 CHIP:SREBP1:SREBP1 (ZT14) / SREBP1_ZT14, liver cells / . +2003 GSM1176713 /home/drk/tillage/datasets/mouse/chip/geo/GSM1176713/summary/coverage.w5 256 64 1.0 sum 2003 CHIP:SREBP1:SREBP1 (ZT18) / SREBP1_ZT18, liver cells / . +2004 GSM1176714 /home/drk/tillage/datasets/mouse/chip/geo/GSM1176714/summary/coverage.w5 256 64 1.0 sum 2004 CHIP:SREBP1:SREBP1 (ZT22) / SREBP1_ZT22, liver cells / . +2005 GSM1176721 /home/drk/tillage/datasets/mouse/chip/geo/GSM1176721/summary/coverage.w5 256 64 1.0 sum 2005 CHIP:RPB2:Polr2b (ZT02) / RPB2_ZT02, liver cells / . +2006 GSM1176722 /home/drk/tillage/datasets/mouse/chip/geo/GSM1176722/summary/coverage.w5 256 64 1.0 sum 2006 CHIP:RPB2:Polr2b (ZT06) / RPB2_ZT06, liver cells / . +2007 GSM1176723 /home/drk/tillage/datasets/mouse/chip/geo/GSM1176723/summary/coverage.w5 256 64 1.0 sum 2007 CHIP:RPB2:Polr2b (ZT10) / RPB2_ZT10, liver cells / . +2008 GSM1176724 /home/drk/tillage/datasets/mouse/chip/geo/GSM1176724/summary/coverage.w5 256 64 1.0 sum 2008 CHIP:RPB2:Polr2b (ZT14) / RPB2_ZT14, liver cells / . +2009 GSM1176725 /home/drk/tillage/datasets/mouse/chip/geo/GSM1176725/summary/coverage.w5 256 64 1.0 sum 2009 CHIP:RPB2:Polr2b (ZT18) / RPB2_ZT18, liver cells / . +2010 GSM1176726 /home/drk/tillage/datasets/mouse/chip/geo/GSM1176726/summary/coverage.w5 256 64 1.0 sum 2010 CHIP:RPB2:Polr2b (ZT22) / RPB2_ZT22, liver cells / . +2011 GSM1198156 /home/drk/tillage/datasets/mouse/chip/geo/GSM1198156/summary/coverage.w5 256 64 1.0 sum 2011 CHIP:H3K9ac:H3K9ac ChIPSeq Cre_cohort1 / H3K9ac ChIPSeq Cre_cohort1 / . +2012 GSM1198157 /home/drk/tillage/datasets/mouse/chip/geo/GSM1198157/summary/coverage.w5 256 64 1.0 sum 2012 CHIP:H3K9ac:H3K9ac ChIPSeq WT_cohort1 / H3K9ac ChIPSeq WT_cohort1 / . +2013 GSM1198158 /home/drk/tillage/datasets/mouse/chip/geo/GSM1198158/summary/coverage.w5 256 64 1.0 sum 2013 CHIP:H3K9ac:H3K9ac ChIPSeq HAHA_cohort1 / H3K9ac ChIPSeq HAHA_cohort1 / . +2014 GSM1198159 /home/drk/tillage/datasets/mouse/chip/geo/GSM1198159/summary/coverage.w5 256 64 1.0 sum 2014 CHIP:H3K9ac:H3K9ac ChIPSeq KA_cohort1 / H3K9ac ChIPSeq KA_cohort1 / . +2015 GSM1198162 /home/drk/tillage/datasets/mouse/chip/geo/GSM1198162/summary/coverage.w5 256 64 1.0 sum 2015 CHIP:H3K9ac:H3K9ac ChIPSeq YF_cohort2 / H3K9ac ChIPSeq YF_cohort2 / . +2016 GSM1198163 /home/drk/tillage/datasets/mouse/chip/geo/GSM1198163/summary/coverage.w5 256 64 1.0 sum 2016 CHIP:H3K9ac:H3K9ac ChIPSeq HEBI_cohort2 / H3K9ac ChIPSeq HEBI_cohort2 / . +2017 GSM1236494 /home/drk/tillage/datasets/mouse/chip/geo/GSM1236494/summary/coverage.w5 256 64 1.0 sum 2017 CHIP:SMRT:SMRT ChIPSeq 5PM (ZT10) / liver / . +2018 GSM1236495 /home/drk/tillage/datasets/mouse/chip/geo/GSM1236495/summary/coverage.w5 256 64 1.0 sum 2018 CHIP:SMRT:SMRT ChIPSeq 5AM (ZT22) / liver / . +2019 GSM1301669 /home/drk/tillage/datasets/mouse/chip/geo/GSM1301669/summary/coverage.w5 256 64 1.0 sum 2019 CHIP:CLOCK:ZT8 CLOCK ChIP Seq-1 / Liver / . +2020 GSM1301671 /home/drk/tillage/datasets/mouse/chip/geo/GSM1301671/summary/coverage.w5 256 64 1.0 sum 2020 CHIP:BMAL1:ZT8 BMAL1 ChIP Seq-1 / Liver / . +2021 GSM1301673 /home/drk/tillage/datasets/mouse/chip/geo/GSM1301673/summary/coverage.w5 256 64 1.0 sum 2021 CHIP:CRY1:ZT20 CRY1 ChIP Seq / Liver / . +2022 GSM1437733 /home/drk/tillage/datasets/mouse/chip/geo/GSM1437733/summary/coverage.w5 256 64 1.0 sum 2022 CHIP:E4BP4:E4BP4 Liver ZT22 ChIP-seq / liver / . +2023 GSM1437734 /home/drk/tillage/datasets/mouse/chip/geo/GSM1437734/summary/coverage.w5 256 64 1.0 sum 2023 CHIP:RORalpha:RORalpha Liver ZT22 ChIP-seq / liver / . +2024 GSM1446062 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446062/summary/coverage.w5 256 64 1.0 sum 2024 CHIP:GR:ChIP-seq, Liver_GR_GRdim_6am / Liver tissue / . +2025 GSM1446063 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446063/summary/coverage.w5 256 64 1.0 sum 2025 CHIP:GR:ChIP-seq, Liver_GR_GRdim_6pm / Liver tissue / . +2026 GSM1446064 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446064/summary/coverage.w5 256 64 1.0 sum 2026 CHIP:GR:ChIP-seq, Liver_GR_GRdim_pred_6am / Liver tissue / . +2027 GSM1446065 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446065/summary/coverage.w5 256 64 1.0 sum 2027 CHIP:GR:ChIP-seq, Liver_GR_WT_6am / Liver tissue / . +2028 GSM1446066 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446066/summary/coverage.w5 256 64 1.0 sum 2028 CHIP:GR:ChIP-seq, Liver_GR_WT_6pm / Liver tissue / . +2029 GSM1446067 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446067/summary/coverage.w5 256 64 1.0 sum 2029 CHIP:GR:ChIP-seq, Liver_GR_WT_pred_6am / Liver tissue / . +2030 GSM1446068 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446068/summary/coverage.w5 256 64 1.0 sum 2030 CHIP:RNAPII:ChIP-seq, Liver_RNAPII_WT_6am / Liver tissue / . +2031 GSM1446069 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446069/summary/coverage.w5 256 64 1.0 sum 2031 CHIP:RNAPII:ChIP-seq, Liver_RNAPII_WT_pred_6am / Liver tissue / . +2032 GSM1446070 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446070/summary/coverage.w5 256 64 1.0 sum 2032 CHIP:CEBPb:ChIP-seq, Liver_CEBPb_WT_6pm / Liver tissue / . +2033 GSM1446071 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446071/summary/coverage.w5 256 64 1.0 sum 2033 CHIP:GR:ChIP-exo, Liver_GR_GRdim_6am / Liver tissue / . +2034 GSM1446072 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446072/summary/coverage.w5 256 64 1.0 sum 2034 CHIP:GR:ChIP-exo, Liver_GR_GRdim_6pm / Liver tissue / . +2035 GSM1446073 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446073/summary/coverage.w5 256 64 1.0 sum 2035 CHIP:GR:ChIP-exo, Liver_GR_GRdim_pred_6am / Liver tissue / . +2036 GSM1446074 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446074/summary/coverage.w5 256 64 1.0 sum 2036 CHIP:GR:ChIP-exo, Liver_GR_WT_6am / Liver tissue / . +2037 GSM1446075 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446075/summary/coverage.w5 256 64 1.0 sum 2037 CHIP:GR:ChIP-exo, Liver_GR_WT_6pm / Liver tissue / . +2038 GSM1446076 /home/drk/tillage/datasets/mouse/chip/geo/GSM1446076/summary/coverage.w5 256 64 1.0 sum 2038 CHIP:GR:ChIP-exo, Liver_GR_WT_pred_6am / Liver tissue / . +2039 GSM1479709 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479709/summary/coverage.w5 256 64 1.0 sum 2039 CHIP:PolII:PolII ZT 2 WT / Liver, WT, ZT 2, PolII ChIP / . +2040 GSM1479710 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479710/summary/coverage.w5 256 64 1.0 sum 2040 CHIP:PolII:PolII ZT 6 WT / Liver, WT, ZT 6, PolII ChIP / . +2041 GSM1479711 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479711/summary/coverage.w5 256 64 1.0 sum 2041 CHIP:PolII:PolII ZT 10 WT / Liver, WT, ZT 10, PolII ChIP / . +2042 GSM1479712 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479712/summary/coverage.w5 256 64 1.0 sum 2042 CHIP:PolII:PolII ZT 14 WT / Liver, WT, ZT 14, PolII ChIP / . +2043 GSM1479713 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479713/summary/coverage.w5 256 64 1.0 sum 2043 CHIP:PolII:PolII ZT 18 WT / Liver, WT, ZT 18, PolII ChIP / . +2044 GSM1479714 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479714/summary/coverage.w5 256 64 1.0 sum 2044 CHIP:PolII:PolII ZT 22 WT / Liver, WT, ZT 22, PolII ChIP / . +2045 GSM1479715 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479715/summary/coverage.w5 256 64 1.0 sum 2045 CHIP:PolII:PolII ZT 26 WT / Liver, WT, ZT 26, PolII ChIP / . +2046 GSM1479716 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479716/summary/coverage.w5 256 64 1.0 sum 2046 CHIP:PolII:PolII ZT 2 Bmal1 KO / Liver, Bmal1 KO, ZT 2, PolII ChIP / . +2047 GSM1479717 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479717/summary/coverage.w5 256 64 1.0 sum 2047 CHIP:PolII:PolII ZT 6 Bmal1 KO / Liver, Bmal1 KO, ZT 6, PolII ChIP / . +2048 GSM1479718 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479718/summary/coverage.w5 256 64 1.0 sum 2048 CHIP:PolII:PolII ZT 10 Bmal1 KO / Liver, Bmal1 KO, ZT 10, PolII ChIP / . +2049 GSM1479719 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479719/summary/coverage.w5 256 64 1.0 sum 2049 CHIP:PolII:PolII ZT 14 Bmal1 KO / Liver, Bmal1 KO, ZT 14, PolII ChIP / . +2050 GSM1479720 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479720/summary/coverage.w5 256 64 1.0 sum 2050 CHIP:PolII:PolII ZT 18 Bmal1 KO / Liver, Bmal1 KO, ZT 18, PolII ChIP / . +2051 GSM1479721 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479721/summary/coverage.w5 256 64 1.0 sum 2051 CHIP:PolII:PolII ZT 22 Bmal1 KO / Liver, Bmal1 KO, ZT 22, PolII ChIP / . +2052 GSM1479722 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479722/summary/coverage.w5 256 64 1.0 sum 2052 CHIP:PolII:PolII ZT 26 Bmal1 KO / Liver, Bmal1 KO, ZT 26, PolII ChIP / . +2053 GSM1479723 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479723/summary/coverage.w5 256 64 1.0 sum 2053 CHIP:H3K27ac:H3K27ac ZT 2 WT / Liver, WT, ZT 2, H3K27ac ChIP / . +2054 GSM1479724 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479724/summary/coverage.w5 256 64 1.0 sum 2054 CHIP:H3K27ac:H3K27ac ZT 6 WT / Liver, WT, ZT 6, H3K27ac ChIP / . +2055 GSM1479725 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479725/summary/coverage.w5 256 64 1.0 sum 2055 CHIP:H3K27ac:H3K27ac ZT 10 WT / Liver, WT, ZT 10, H3K27ac ChIP / . +2056 GSM1479726 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479726/summary/coverage.w5 256 64 1.0 sum 2056 CHIP:H3K27ac:H3K27ac ZT 14 WT / Liver, WT, ZT 14, H3K27ac ChIP / . +2057 GSM1479727 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479727/summary/coverage.w5 256 64 1.0 sum 2057 CHIP:H3K27ac:H3K27ac ZT 18 WT / Liver, WT, ZT 18, H3K27ac ChIP / . +2058 GSM1479728 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479728/summary/coverage.w5 256 64 1.0 sum 2058 CHIP:H3K27ac:H3K27ac ZT 22 WT / Liver, WT, ZT 22, H3K27ac ChIP / . +2059 GSM1479729 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479729/summary/coverage.w5 256 64 1.0 sum 2059 CHIP:H3K27ac:H3K27ac ZT 26 WT / Liver, WT, ZT 26, H3K27ac ChIP / . +2060 GSM1479730 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479730/summary/coverage.w5 256 64 1.0 sum 2060 CHIP:H3K27ac:H3K27ac ZT 2 Bmal1 KO / Liver, Bmal1 KO, ZT 2, H3K27ac ChIP / . +2061 GSM1479731 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479731/summary/coverage.w5 256 64 1.0 sum 2061 CHIP:H3K27ac:H3K27ac ZT 6 Bmal1 KO / Liver, Bmal1 KO, ZT 6, H3K27ac ChIP / . +2062 GSM1479732 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479732/summary/coverage.w5 256 64 1.0 sum 2062 CHIP:H3K27ac:H3K27ac ZT 10 Bmal1 KO / Liver, Bmal1 KO, ZT 10, H3K27ac ChIP / . +2063 GSM1479733 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479733/summary/coverage.w5 256 64 1.0 sum 2063 CHIP:H3K27ac:H3K27ac ZT 14 Bmal1 KO / Liver, Bmal1 KO, ZT 14, H3K27ac ChIP / . +2064 GSM1479734 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479734/summary/coverage.w5 256 64 1.0 sum 2064 CHIP:H3K27ac:H3K27ac ZT 18 Bmal1 KO / Liver, Bmal1 KO, ZT 18, H3K27ac ChIP / . +2065 GSM1479735 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479735/summary/coverage.w5 256 64 1.0 sum 2065 CHIP:H3K27ac:H3K27ac ZT 22 Bmal1 KO / Liver, Bmal1 KO, ZT 22, H3K27ac ChIP / . +2066 GSM1479736 /home/drk/tillage/datasets/mouse/chip/geo/GSM1479736/summary/coverage.w5 256 64 1.0 sum 2066 CHIP:H3K27ac:H3K27ac ZT 26 Bmal1 KO / Liver, Bmal1 KO, ZT 26, H3K27ac ChIP / . +2067 GSM1631168 /home/drk/tillage/datasets/mouse/chip/geo/GSM1631168/summary/coverage.w5 256 64 1.0 sum 2067 CHIP:CEBPb:ChIP-seq, Liver_CEBPb_WT_6am / Liver tissue / . +2068 GSM1631170 /home/drk/tillage/datasets/mouse/chip/geo/GSM1631170/summary/coverage.w5 256 64 1.0 sum 2068 CHIP:CEBPb:ChIP-seq, Liver_CEBPb_WT_pred_6am / Liver tissue / . +2069 GSM1659681 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659681/summary/coverage.w5 256 64 1.0 sum 2069 CHIP:NR1D1:Rev-erbalpha_eWAT_ZT10_rep1 / eWAT / . +2070 GSM1659684 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659684/summary/coverage.w5 256 64 1.0 sum 2070 CHIP:NR1D1:Rev-erbalpha_brain_ZT10 / brain(ventral tegmental area) / . +2071 GSM1659686 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659686/summary/coverage.w5 256 64 1.0 sum 2071 CHIP:NR1D1:Rev-erbalpha_WT_liver_ZT10_rep1 / liver / . +2072 GSM1659688 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659688/summary/coverage.w5 256 64 1.0 sum 2072 CHIP:NR1D1:129mice_WT_liver_ZT10_rep1 / liver / . +2073 GSM1659690 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659690/summary/coverage.w5 256 64 1.0 sum 2073 CHIP:NR1D1:Rev-erbalpha_DBD_mutant_liver_ZT10_rep1 / liver / . +2074 GSM1659692 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659692/summary/coverage.w5 256 64 1.0 sum 2074 CHIP:RORA:RORalpha_liver_ZT10 / liver / . +2075 GSM1659693 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659693/summary/coverage.w5 256 64 1.0 sum 2075 CHIP:RORG:RORgamma_liver_ZT10 / liver / . +2076 GSM1659694 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659694/summary/coverage.w5 256 64 1.0 sum 2076 CHIP:RORG:RORgamma_liver_ZT22 / liver / . +2077 GSM1659695 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659695/summary/coverage.w5 256 64 1.0 sum 2077 CHIP:HDAC3:HDAC3_liver_reverbGFP_ZT10_rep1 / liver / . +2078 GSM1659697 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659697/summary/coverage.w5 256 64 1.0 sum 2078 CHIP:HDAC3:HDAC3_liver_Rev-erbsflCRE(DBDm)_ZT10_rep1 / liver / . +2079 GSM1659699 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659699/summary/coverage.w5 256 64 1.0 sum 2079 CHIP:HDAC3:liver_ZT10 / liver / . +2080 GSM1659700 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659700/summary/coverage.w5 256 64 1.0 sum 2080 CHIP:HDAC3:HDAC3_liver_reverbalphaKO_betaKD_ZT10 / liver / . +2081 GSM1659701 /home/drk/tillage/datasets/mouse/chip/geo/GSM1659701/summary/coverage.w5 256 64 1.0 sum 2081 CHIP:HDAC3:HDAC3_KO_liver_ZT10_rep1 / liver / . +2082 GSM1855803 /home/drk/tillage/datasets/mouse/chip/geo/GSM1855803/summary/coverage.w5 256 64 1.0 sum 2082 CHIP:HSF1:HSF1 ZT14 / Liver / . +2083 GSM2218846 /home/drk/tillage/datasets/mouse/chip/geo/GSM2218846/summary/coverage.w5 256 64 1.0 sum 2083 CHIP:Rev-erba:Rev-erba ChIP-seq at ZT10 (Wild Type) Replicate 1 / liver / . +2084 GSM2218849 /home/drk/tillage/datasets/mouse/chip/geo/GSM2218849/summary/coverage.w5 256 64 1.0 sum 2084 CHIP:Rev-erba:Rev-erba ChIP-seq at ZT10 (HNF6 KO) Replicate 1 / liver / . +2085 GSM2390508 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390508/summary/coverage.w5 256 64 1.0 sum 2085 CHIP:RPC4:ZT02, rep1, CONTROL / Liver / . +2086 GSM2390509 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390509/summary/coverage.w5 256 64 1.0 sum 2086 CHIP:RPC4:ZT06, rep1, CONTROL / Liver / . +2087 GSM2390510 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390510/summary/coverage.w5 256 64 1.0 sum 2087 CHIP:RPC4:ZT10, rep1, CONTROL / Liver / . +2088 GSM2390511 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390511/summary/coverage.w5 256 64 1.0 sum 2088 CHIP:RPC4:ZT14, rep1, CONTROL / Liver / . +2089 GSM2390512 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390512/summary/coverage.w5 256 64 1.0 sum 2089 CHIP:RPC4:ZT18, rep1, CONTROL / Liver / . +2090 GSM2390513 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390513/summary/coverage.w5 256 64 1.0 sum 2090 CHIP:RPC4:ZT22, rep1, CONTROL / Liver / . +2091 GSM2390532 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390532/summary/coverage.w5 256 64 1.0 sum 2091 CHIP:RPC4:ZT02, rep1, CONSTANTLY FED / Liver / . +2092 GSM2390533 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390533/summary/coverage.w5 256 64 1.0 sum 2092 CHIP:RPC4:ZT06, rep1, CONSTANTLY FED / Liver / . +2093 GSM2390534 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390534/summary/coverage.w5 256 64 1.0 sum 2093 CHIP:RPC4:ZT10, rep1, CONSTANTLY FED / Liver / . +2094 GSM2390535 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390535/summary/coverage.w5 256 64 1.0 sum 2094 CHIP:RPC4:ZT14, rep1, CONSTANTLY FED / Liver / . +2095 GSM2390536 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390536/summary/coverage.w5 256 64 1.0 sum 2095 CHIP:RPC4:ZT18, rep1, CONSTANTLY FED / Liver / . +2096 GSM2390537 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390537/summary/coverage.w5 256 64 1.0 sum 2096 CHIP:RPC4:ZT22, rep1, CONSTANTLY FED / Liver / . +2097 GSM2390538 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390538/summary/coverage.w5 256 64 1.0 sum 2097 CHIP:RPC4:ZT26, rep1, CONSTANTLY FED / Liver / . +2098 GSM2390560 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390560/summary/coverage.w5 256 64 1.0 sum 2098 CHIP:RPC4:ZT02, rep1, Arntl KO / Liver / . +2099 GSM2390561 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390561/summary/coverage.w5 256 64 1.0 sum 2099 CHIP:RPC4:ZT06, rep1, Arntl KO / Liver / . +2100 GSM2390562 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390562/summary/coverage.w5 256 64 1.0 sum 2100 CHIP:RPC4:ZT10, rep1, Arntl KO / Liver / . +2101 GSM2390563 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390563/summary/coverage.w5 256 64 1.0 sum 2101 CHIP:RPC4:ZT14, rep1, Arntl KO / Liver / . +2102 GSM2390564 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390564/summary/coverage.w5 256 64 1.0 sum 2102 CHIP:RPC4:ZT18, rep1, Arntl KO / Liver / . +2103 GSM2390565 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390565/summary/coverage.w5 256 64 1.0 sum 2103 CHIP:RPC4:ZT22, rep1, Arntl KO / Liver / . +2104 GSM2390584 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390584/summary/coverage.w5 256 64 1.0 sum 2104 CHIP:RPC4:ZT02, rep1, Maf1 KO / Liver / . +2105 GSM2390585 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390585/summary/coverage.w5 256 64 1.0 sum 2105 CHIP:RPC4:ZT06, rep1, Maf1 KO / Liver / . +2106 GSM2390586 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390586/summary/coverage.w5 256 64 1.0 sum 2106 CHIP:RPC4:ZT10, rep1, Maf1 KO / Liver / . +2107 GSM2390587 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390587/summary/coverage.w5 256 64 1.0 sum 2107 CHIP:RPC4:ZT14, rep1, Maf1 KO / Liver / . +2108 GSM2390588 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390588/summary/coverage.w5 256 64 1.0 sum 2108 CHIP:RPC4:ZT18, rep1, Maf1 KO / Liver / . +2109 GSM2390589 /home/drk/tillage/datasets/mouse/chip/geo/GSM2390589/summary/coverage.w5 256 64 1.0 sum 2109 CHIP:RPC4:ZT22, rep1, Maf1 KO / Liver / . +2110 GSM3212796 /home/drk/tillage/datasets/mouse/chip/geo/GSM3212796/summary/coverage.w5 256 64 1.0 sum 2110 CHIP:BMAL1:ZT4 control rep1 / gastrocnemius / . +2111 GSM3212798 /home/drk/tillage/datasets/mouse/chip/geo/GSM3212798/summary/coverage.w5 256 64 1.0 sum 2111 CHIP:RNA:RNAP2_ZT4 control rep1 / gastrocnemius / . +2112 GSM3212801 /home/drk/tillage/datasets/mouse/chip/geo/GSM3212801/summary/coverage.w5 256 64 1.0 sum 2112 CHIP:Rev-erba:Reverba_ZT8 control REP1 / gastrocnemius / . +2113 GSM647022 /home/drk/tillage/datasets/mouse/chip/geo/GSM647022/summary/coverage.w5 256 64 1.0 sum 2113 CHIP:HDAC3:ZT10-1 / Liver / . +2114 GSM647025 /home/drk/tillage/datasets/mouse/chip/geo/GSM647025/summary/coverage.w5 256 64 1.0 sum 2114 CHIP:HDAC3:ZT22-1 / Liver / . +2115 GSM647027 /home/drk/tillage/datasets/mouse/chip/geo/GSM647027/summary/coverage.w5 256 64 1.0 sum 2115 CHIP:NCoR:ZT10 / Liver / . +2116 GSM647028 /home/drk/tillage/datasets/mouse/chip/geo/GSM647028/summary/coverage.w5 256 64 1.0 sum 2116 CHIP:NCoR:ZT22 / Liver / . +2117 GSM647029 /home/drk/tillage/datasets/mouse/chip/geo/GSM647029/summary/coverage.w5 256 64 1.0 sum 2117 CHIP:NR1D1:ZT10 / Liver / . +2118 GSM647030 /home/drk/tillage/datasets/mouse/chip/geo/GSM647030/summary/coverage.w5 256 64 1.0 sum 2118 CHIP:NR1D1:ZT22 / Liver / . +2119 GSM647031 /home/drk/tillage/datasets/mouse/chip/geo/GSM647031/summary/coverage.w5 256 64 1.0 sum 2119 CHIP:PolII:ZT10 / Liver / . +2120 GSM647032 /home/drk/tillage/datasets/mouse/chip/geo/GSM647032/summary/coverage.w5 256 64 1.0 sum 2120 CHIP:PolII:ZT22 / Liver / . +2121 GSM647035 /home/drk/tillage/datasets/mouse/chip/geo/GSM647035/summary/coverage.w5 256 64 1.0 sum 2121 CHIP:H3K9ac:ZT10 / Liver / . +2122 GSM647036 /home/drk/tillage/datasets/mouse/chip/geo/GSM647036/summary/coverage.w5 256 64 1.0 sum 2122 CHIP:H3K9ac:ZT22 / Liver / . +2123 GSM647037 /home/drk/tillage/datasets/mouse/chip/geo/GSM647037/summary/coverage.w5 256 64 1.0 sum 2123 CHIP:H3K9ac:HDAC3 KO liver at ZT10 / Liver / . +2124 GSM840528 /home/drk/tillage/datasets/mouse/chip/geo/GSM840528/summary/coverage.w5 256 64 1.0 sum 2124 CHIP:REV-ERB:REV-ERB alpha / liver_REV-ERB alpha_ChIP / . +2125 GSM840529 /home/drk/tillage/datasets/mouse/chip/geo/GSM840529/summary/coverage.w5 256 64 1.0 sum 2125 CHIP:REV-ERB:REV-ERB beta / liver_REV-ERB beta_ChIP / . +2126 GSM864668 /home/drk/tillage/datasets/mouse/chip/geo/GSM864668/summary/coverage.w5 256 64 1.0 sum 2126 CHIP:LXR:WT Bexarotene / C57BL/6 wild type mouse liver / . +2127 GSM864669 /home/drk/tillage/datasets/mouse/chip/geo/GSM864669/summary/coverage.w5 256 64 1.0 sum 2127 CHIP:LXR:WT Control / C57BL/6 wild type mouse liver / . +2128 GSM864670 /home/drk/tillage/datasets/mouse/chip/geo/GSM864670/summary/coverage.w5 256 64 1.0 sum 2128 CHIP:LXR:WT T0901317 / C57BL/6 wild type mouse liver / . +2129 GSM864671 /home/drk/tillage/datasets/mouse/chip/geo/GSM864671/summary/coverage.w5 256 64 1.0 sum 2129 CHIP:PPARA:WT Control / C57BL/6 wild type mouse liver / . +2130 GSM864672 /home/drk/tillage/datasets/mouse/chip/geo/GSM864672/summary/coverage.w5 256 64 1.0 sum 2130 CHIP:PPARA:LXRdKO Control / C57BL/6 LXRdKO mouse liver / . +2131 GSM864673 /home/drk/tillage/datasets/mouse/chip/geo/GSM864673/summary/coverage.w5 256 64 1.0 sum 2131 CHIP:RXR:WT Bexarotene / C57BL/6 wild type mouse liver / . +2132 GSM864674 /home/drk/tillage/datasets/mouse/chip/geo/GSM864674/summary/coverage.w5 256 64 1.0 sum 2132 CHIP:RXR:WT Control / C57BL/6 wild type mouse liver / . +2133 GSM864675 /home/drk/tillage/datasets/mouse/chip/geo/GSM864675/summary/coverage.w5 256 64 1.0 sum 2133 CHIP:RXR:WT T0901317 / C57BL/6 wild type mouse liver / . +2134 GSM864676 /home/drk/tillage/datasets/mouse/chip/geo/GSM864676/summary/coverage.w5 256 64 1.0 sum 2134 CHIP:RXR:LXRdKO Bexarotene / C57BL/6 LXRdKO mouse liver / . +2135 GSM864677 /home/drk/tillage/datasets/mouse/chip/geo/GSM864677/summary/coverage.w5 256 64 1.0 sum 2135 CHIP:RXR:LXRdKO Control / C57BL/6 LXRdKO mouse liver / . +2136 GSM864678 /home/drk/tillage/datasets/mouse/chip/geo/GSM864678/summary/coverage.w5 256 64 1.0 sum 2136 CHIP:RXR:LXRdKO T0901317 / C57BL/6 LXRdKO mouse liver / . +2137 GSM864679 /home/drk/tillage/datasets/mouse/chip/geo/GSM864679/summary/coverage.w5 256 64 1.0 sum 2137 CHIP:PolII:WT Control replicate 1 / C57BL/6 wild type mouse liver / . +2138 GSM864681 /home/drk/tillage/datasets/mouse/chip/geo/GSM864681/summary/coverage.w5 256 64 1.0 sum 2138 CHIP:PolII:WT Bexarotene replicate 1 / C57BL/6 wild type mouse liver / . +2139 GSM864683 /home/drk/tillage/datasets/mouse/chip/geo/GSM864683/summary/coverage.w5 256 64 1.0 sum 2139 CHIP:PolII:WT T0901317 replicate 1 / C57BL/6 wild type mouse liver / . +2140 GSM864685 /home/drk/tillage/datasets/mouse/chip/geo/GSM864685/summary/coverage.w5 256 64 1.0 sum 2140 CHIP:PolII:LXRdKO Control replicate 1 / C57BL/6 LXRdKO mouse liver / . +2141 GSM864687 /home/drk/tillage/datasets/mouse/chip/geo/GSM864687/summary/coverage.w5 256 64 1.0 sum 2141 CHIP:PolII:LXRdKO Bexarotene replicate 1 / C57BL/6 LXRdKO mouse liver / . +2142 GSM864689 /home/drk/tillage/datasets/mouse/chip/geo/GSM864689/summary/coverage.w5 256 64 1.0 sum 2142 CHIP:PolII:LXRdKO T0901317 replicate 1 / C57BL/6 LXRdKO mouse liver / . +2143 GSM873424 /home/drk/tillage/datasets/mouse/chip/geo/GSM873424/summary/coverage.w5 256 64 1.0 sum 2143 CHIP:CHOP:Chop+/+ Tm ChIP-seq / Mouse embronic fibroblast / Mouse embryonic fibroblast +2144 GSM873425 /home/drk/tillage/datasets/mouse/chip/geo/GSM873425/summary/coverage.w5 256 64 1.0 sum 2144 CHIP:CHOP:Chop -/- Tm ChIP-seq / Mouse embronic fibroblast / Mouse embryonic fibroblast +2145 GSM873426 /home/drk/tillage/datasets/mouse/chip/geo/GSM873426/summary/coverage.w5 256 64 1.0 sum 2145 CHIP:ATF4:Atf4+/+ Tm ChIP-seq / Mouse embronic fibroblast / Mouse embryonic fibroblast +2146 GSM873427 /home/drk/tillage/datasets/mouse/chip/geo/GSM873427/summary/coverage.w5 256 64 1.0 sum 2146 CHIP:ATF4:Atf4-/ Tm ChIP-seq / Mouse embronic fibroblast / Mouse embryonic fibroblast +2147 GSM874950 /home/drk/tillage/datasets/mouse/chip/geo/GSM874950/summary/coverage.w5 256 64 1.0 sum 2147 CHIP:RPB2:Polr2b_ZT02 / RPB2 IP / . +2148 GSM874951 /home/drk/tillage/datasets/mouse/chip/geo/GSM874951/summary/coverage.w5 256 64 1.0 sum 2148 CHIP:RPB2:Polr2b_ZT06 / RPB2 IP / . +2149 GSM874952 /home/drk/tillage/datasets/mouse/chip/geo/GSM874952/summary/coverage.w5 256 64 1.0 sum 2149 CHIP:RPB2:Polr2b_ZT10 / RPB2 IP / . +2150 GSM874953 /home/drk/tillage/datasets/mouse/chip/geo/GSM874953/summary/coverage.w5 256 64 1.0 sum 2150 CHIP:RPB2:Polr2b_ZT14 / RPB2 IP / . +2151 GSM874954 /home/drk/tillage/datasets/mouse/chip/geo/GSM874954/summary/coverage.w5 256 64 1.0 sum 2151 CHIP:RPB2:Polr2b_ZT18 / RPB2 IP / . +2152 GSM874955 /home/drk/tillage/datasets/mouse/chip/geo/GSM874955/summary/coverage.w5 256 64 1.0 sum 2152 CHIP:RPB2:Polr2b_ZT22 / RPB2 IP / . +2153 GSM874956 /home/drk/tillage/datasets/mouse/chip/geo/GSM874956/summary/coverage.w5 256 64 1.0 sum 2153 CHIP:RPB2:Polr2b_ZT26 / RPB2 IP / . +2154 GSM874957 /home/drk/tillage/datasets/mouse/chip/geo/GSM874957/summary/coverage.w5 256 64 1.0 sum 2154 CHIP:H3K4me3:H3K4me3_ZT02 / H3K4me3 IP / . +2155 GSM874958 /home/drk/tillage/datasets/mouse/chip/geo/GSM874958/summary/coverage.w5 256 64 1.0 sum 2155 CHIP:H3K4me3:H3K4me3_ZT06 / H3K4me3 IP / . +2156 GSM874959 /home/drk/tillage/datasets/mouse/chip/geo/GSM874959/summary/coverage.w5 256 64 1.0 sum 2156 CHIP:H3K4me3:H3K4me3_ZT10 / H3K4me3 IP / . +2157 GSM874960 /home/drk/tillage/datasets/mouse/chip/geo/GSM874960/summary/coverage.w5 256 64 1.0 sum 2157 CHIP:H3K4me3:H3K4me3_ZT14 / H3K4me3 IP / . +2158 GSM874961 /home/drk/tillage/datasets/mouse/chip/geo/GSM874961/summary/coverage.w5 256 64 1.0 sum 2158 CHIP:H3K4me3:H3K4me3_ZT18 / H3K4me3 IP / . +2159 GSM874962 /home/drk/tillage/datasets/mouse/chip/geo/GSM874962/summary/coverage.w5 256 64 1.0 sum 2159 CHIP:H3K4me3:H3K4me3_ZT22 / H3K4me3 IP / . +2160 GSM874963 /home/drk/tillage/datasets/mouse/chip/geo/GSM874963/summary/coverage.w5 256 64 1.0 sum 2160 CHIP:H3K4me3:H3K4me3_ZT26 / H3K4me3 IP / . +2161 GSM874964 /home/drk/tillage/datasets/mouse/chip/geo/GSM874964/summary/coverage.w5 256 64 1.0 sum 2161 CHIP:H3K36me3:H3K36me3_ZT02 / H3K36me3 IP / . +2162 GSM874965 /home/drk/tillage/datasets/mouse/chip/geo/GSM874965/summary/coverage.w5 256 64 1.0 sum 2162 CHIP:H3K36me3:H3K36me3_ZT06 / H3K36me3 IP / . +2163 GSM874966 /home/drk/tillage/datasets/mouse/chip/geo/GSM874966/summary/coverage.w5 256 64 1.0 sum 2163 CHIP:H3K36me3:H3K36me3_ZT10 / H3K36me3 IP / . +2164 GSM874967 /home/drk/tillage/datasets/mouse/chip/geo/GSM874967/summary/coverage.w5 256 64 1.0 sum 2164 CHIP:H3K36me3:H3K36me3_ZT14 / H3K36me3 IP / . +2165 GSM874968 /home/drk/tillage/datasets/mouse/chip/geo/GSM874968/summary/coverage.w5 256 64 1.0 sum 2165 CHIP:H3K36me3:H3K36me3_ZT18 / H3K36me3 IP / . +2166 GSM874969 /home/drk/tillage/datasets/mouse/chip/geo/GSM874969/summary/coverage.w5 256 64 1.0 sum 2166 CHIP:H3K36me3:H3K36me3_ZT22 / H3K36me3 IP / . +2167 GSM874970 /home/drk/tillage/datasets/mouse/chip/geo/GSM874970/summary/coverage.w5 256 64 1.0 sum 2167 CHIP:H3K36me3:H3K36me3_ZT26 / H3K36me3 IP / . +2168 ENCFF130ROA /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000AHX/summary/coverage.w5 768 384 0.3 sum_sqrt 2168 RNA:C3H10T1/2 +2169 ENCFF641HYO /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000AHY/summary/coverage.w5 768 384 0.3 sum_sqrt 2169 RNA:C2C12 +2170 ENCFF018XGZ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000AHZ/summary/coverage.w5 768 384 0.3 sum_sqrt 2170 RNA:C3H10T1/2 +2171 ENCFF777WNL /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000AIA/summary/coverage.w5 768 384 0.3 sum_sqrt 2171 RNA:myocyte originated from C2C12 +2172 ENCFF798FMB+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000AJU/summary/coverage+.w5 768 384 0.3 sum_sqrt 2173 RNA:C57BL/6J liver tissue adult (8 weeks) +2173 ENCFF798FMB- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000AJU/summary/coverage-.w5 768 384 0.3 sum_sqrt 2172 RNA:C57BL/6J liver tissue adult (8 weeks) +2174 ENCFF419YHB+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000AJV/summary/coverage+.w5 768 384 0.3 sum_sqrt 2175 RNA:B10.H-2aH-4bp/Wts CH12.LX +2175 ENCFF419YHB- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000AJV/summary/coverage-.w5 768 384 0.3 sum_sqrt 2174 RNA:B10.H-2aH-4bp/Wts CH12.LX +2176 ENCFF085QPW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYP/summary/coverage+.w5 768 384 0.3 sum_sqrt 2177 RNA:C57BL/6J colon tissue adult (8 weeks) +2177 ENCFF085QPW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYP/summary/coverage-.w5 768 384 0.3 sum_sqrt 2176 RNA:C57BL/6J colon tissue adult (8 weeks) +2178 ENCFF558JWN+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2179 RNA:C57BL/6J heart tissue adult (8 weeks) +2179 ENCFF558JWN- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2178 RNA:C57BL/6J heart tissue adult (8 weeks) +2180 ENCFF878WGQ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYR/summary/coverage+.w5 768 384 0.3 sum_sqrt 2181 RNA:C57BL/6J kidney tissue adult (8 weeks) +2181 ENCFF878WGQ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYR/summary/coverage-.w5 768 384 0.3 sum_sqrt 2180 RNA:C57BL/6J kidney tissue adult (8 weeks) +2182 ENCFF995EIJ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYS/summary/coverage+.w5 768 384 0.3 sum_sqrt 2183 RNA:C57BL/6J liver tissue adult (8 weeks) +2183 ENCFF995EIJ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYS/summary/coverage-.w5 768 384 0.3 sum_sqrt 2182 RNA:C57BL/6J liver tissue adult (8 weeks) +2184 ENCFF070OKD+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYT/summary/coverage+.w5 768 384 0.3 sum_sqrt 2185 RNA:C57BL/6J lung tissue adult (8 weeks) +2185 ENCFF070OKD- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYT/summary/coverage-.w5 768 384 0.3 sum_sqrt 2184 RNA:C57BL/6J lung tissue adult (8 weeks) +2186 ENCFF828PDL+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYU/summary/coverage+.w5 768 384 0.3 sum_sqrt 2187 RNA:C57BL/6J spleen tissue adult (8 weeks) +2187 ENCFF828PDL- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYU/summary/coverage-.w5 768 384 0.3 sum_sqrt 2186 RNA:C57BL/6J spleen tissue adult (8 weeks) +2188 ENCFF978UIY+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYV/summary/coverage+.w5 768 384 0.3 sum_sqrt 2189 RNA:C57BL/6J thymus tissue adult (8 weeks) +2189 ENCFF978UIY- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYV/summary/coverage-.w5 768 384 0.3 sum_sqrt 2188 RNA:C57BL/6J thymus tissue adult (8 weeks) +2190 ENCFF216HBX+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYW/summary/coverage+.w5 768 384 0.3 sum_sqrt 2191 RNA:C57BL/6J testis tissue male adult (8 weeks) +2191 ENCFF216HBX- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYW/summary/coverage-.w5 768 384 0.3 sum_sqrt 2190 RNA:C57BL/6J testis tissue male adult (8 weeks) +2192 ENCFF341NKU+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYX/summary/coverage+.w5 768 384 0.3 sum_sqrt 2193 RNA:C57BL/6J adrenal gland tissue adult (8 weeks) +2193 ENCFF341NKU- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYX/summary/coverage-.w5 768 384 0.3 sum_sqrt 2192 RNA:C57BL/6J adrenal gland tissue adult (8 weeks) +2194 ENCFF721GSY+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYY/summary/coverage+.w5 768 384 0.3 sum_sqrt 2195 RNA:C57BL/6J duodenum tissue adult (8 weeks) +2195 ENCFF721GSY- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYY/summary/coverage-.w5 768 384 0.3 sum_sqrt 2194 RNA:C57BL/6J duodenum tissue adult (8 weeks) +2196 ENCFF871LPW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2197 RNA:C57BL/6J gonadal fat pad tissue adult (8 weeks) +2197 ENCFF871LPW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BYZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2196 RNA:C57BL/6J gonadal fat pad tissue adult (8 weeks) +2198 ENCFF708IGB+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZA/summary/coverage+.w5 768 384 0.3 sum_sqrt 2199 RNA:C57BL/6J large intestine tissue adult (8 weeks) +2199 ENCFF708IGB- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZA/summary/coverage-.w5 768 384 0.3 sum_sqrt 2198 RNA:C57BL/6J large intestine tissue adult (8 weeks) +2200 ENCFF791XDM+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZB/summary/coverage+.w5 768 384 0.3 sum_sqrt 2201 RNA:C57BL/6J mammary gland tissue adult (8 weeks) +2201 ENCFF791XDM- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZB/summary/coverage-.w5 768 384 0.3 sum_sqrt 2200 RNA:C57BL/6J mammary gland tissue adult (8 weeks) +2202 ENCFF946YVP+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZC/summary/coverage+.w5 768 384 0.3 sum_sqrt 2203 RNA:C57BL/6J ovary tissue female adult (8 weeks) +2203 ENCFF946YVP- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZC/summary/coverage-.w5 768 384 0.3 sum_sqrt 2202 RNA:C57BL/6J ovary tissue female adult (8 weeks) +2204 ENCFF119WXY+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZD/summary/coverage+.w5 768 384 0.3 sum_sqrt 2205 RNA:C57BL/6J small intestine tissue adult (8 weeks) +2205 ENCFF119WXY- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZD/summary/coverage-.w5 768 384 0.3 sum_sqrt 2204 RNA:C57BL/6J small intestine tissue adult (8 weeks) +2206 ENCFF764FGD+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZE/summary/coverage+.w5 768 384 0.3 sum_sqrt 2207 RNA:C57BL/6J stomach tissue adult (8 weeks) +2207 ENCFF764FGD- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZE/summary/coverage-.w5 768 384 0.3 sum_sqrt 2206 RNA:C57BL/6J stomach tissue adult (8 weeks) +2208 ENCFF811XDE+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZF/summary/coverage+.w5 768 384 0.3 sum_sqrt 2209 RNA:C57BL/6J subcutaneous adipose tissue tissue adult (8 weeks) +2209 ENCFF811XDE- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZF/summary/coverage-.w5 768 384 0.3 sum_sqrt 2208 RNA:C57BL/6J subcutaneous adipose tissue tissue adult (8 weeks) +2210 ENCFF538MHW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZG/summary/coverage+.w5 768 384 0.3 sum_sqrt 2211 RNA:C57BL/6J central nervous system tissue embryo (11.5 days) +2211 ENCFF538MHW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZG/summary/coverage-.w5 768 384 0.3 sum_sqrt 2210 RNA:C57BL/6J central nervous system tissue embryo (11.5 days) +2212 ENCFF789TGW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZH/summary/coverage+.w5 768 384 0.3 sum_sqrt 2213 RNA:C57BL/6J liver tissue embryo (14 days) +2213 ENCFF789TGW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZH/summary/coverage-.w5 768 384 0.3 sum_sqrt 2212 RNA:C57BL/6J liver tissue embryo (14 days) +2214 ENCFF050YCX+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZI/summary/coverage+.w5 768 384 0.3 sum_sqrt 2215 RNA:C57BL/6J liver tissue embryo (14.5 days) +2215 ENCFF050YCX- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZI/summary/coverage-.w5 768 384 0.3 sum_sqrt 2214 RNA:C57BL/6J liver tissue embryo (14.5 days) +2216 ENCFF418HXP+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2217 RNA:C57BL/6J brain tissue embryo (14.5 days) +2217 ENCFF418HXP- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2216 RNA:C57BL/6J brain tissue embryo (14.5 days) +2218 ENCFF030OOM+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZK/summary/coverage+.w5 768 384 0.3 sum_sqrt 2219 RNA:C57BL/6J central nervous system tissue embryo (14 days) +2219 ENCFF030OOM- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZK/summary/coverage-.w5 768 384 0.3 sum_sqrt 2218 RNA:C57BL/6J central nervous system tissue embryo (14 days) +2220 ENCFF731HCO+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZL/summary/coverage+.w5 768 384 0.3 sum_sqrt 2221 RNA:C57BL/6J central nervous system tissue embryo (18 days) +2221 ENCFF731HCO- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZL/summary/coverage-.w5 768 384 0.3 sum_sqrt 2220 RNA:C57BL/6J central nervous system tissue embryo (18 days) +2222 ENCFF657TAP+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZM/summary/coverage+.w5 768 384 0.3 sum_sqrt 2223 RNA:C57BL/6J cerebellum tissue adult (8 weeks) +2223 ENCFF657TAP- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZM/summary/coverage-.w5 768 384 0.3 sum_sqrt 2222 RNA:C57BL/6J cerebellum tissue adult (8 weeks) +2224 ENCFF578CGA+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZN/summary/coverage+.w5 768 384 0.3 sum_sqrt 2225 RNA:C57BL/6J limb tissue embryo (14.5 days) +2225 ENCFF578CGA- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZN/summary/coverage-.w5 768 384 0.3 sum_sqrt 2224 RNA:C57BL/6J limb tissue embryo (14.5 days) +2226 ENCFF087VWF+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZO/summary/coverage+.w5 768 384 0.3 sum_sqrt 2227 RNA:C57BL/6J liver tissue embryo (18 days) +2227 ENCFF087VWF- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZO/summary/coverage-.w5 768 384 0.3 sum_sqrt 2226 RNA:C57BL/6J liver tissue embryo (18 days) +2228 ENCFF014SEQ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZP/summary/coverage+.w5 768 384 0.3 sum_sqrt 2229 RNA:C57BL/6J placenta tissue adult (8 weeks) +2229 ENCFF014SEQ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZP/summary/coverage-.w5 768 384 0.3 sum_sqrt 2228 RNA:C57BL/6J placenta tissue adult (8 weeks) +2230 ENCFF528QPO+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2231 RNA:C57BL/6J urinary bladder tissue adult (8 weeks) +2231 ENCFF528QPO- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2230 RNA:C57BL/6J urinary bladder tissue adult (8 weeks) +2232 ENCFF436AYK+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZR/summary/coverage+.w5 768 384 0.3 sum_sqrt 2233 RNA:C57BL/6J cortical plate tissue adult (8 weeks) +2233 ENCFF436AYK- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZR/summary/coverage-.w5 768 384 0.3 sum_sqrt 2232 RNA:C57BL/6J cortical plate tissue adult (8 weeks) +2234 ENCFF297ZOV+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZS/summary/coverage+.w5 768 384 0.3 sum_sqrt 2235 RNA:C57BL/6J frontal cortex tissue adult (8 weeks) +2235 ENCFF297ZOV- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000BZS/summary/coverage-.w5 768 384 0.3 sum_sqrt 2234 RNA:C57BL/6J frontal cortex tissue adult (8 weeks) +2236 ENCFF916LFR /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CGT/summary/coverage.w5 768 384 0.3 sum_sqrt 2236 RNA:B6NCrl bone marrow tissue male adult (8 weeks) +2237 ENCFF427XNM /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CGU/summary/coverage.w5 768 384 0.3 sum_sqrt 2237 RNA:Bruce4 ES-Bruce4 +2238 ENCFF496FSJ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CGV/summary/coverage.w5 768 384 0.3 sum_sqrt 2238 RNA:C57BL/6J embryonic fibroblast male embryo (13.5 weeks) +2239 ENCFF894JED /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CGW/summary/coverage.w5 768 384 0.3 sum_sqrt 2239 RNA:B6NCrl spleen tissue male adult (8 weeks) +2240 ENCFF571YEJ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CGX/summary/coverage.w5 768 384 0.3 sum_sqrt 2240 RNA:B6NCrl cerebellum tissue male adult (8 weeks) +2241 ENCFF994GPI /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CGY/summary/coverage.w5 768 384 0.3 sum_sqrt 2241 RNA:B6NCrl cortical plate tissue male adult (8 weeks) +2242 ENCFF718VKG /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CGZ/summary/coverage.w5 768 384 0.3 sum_sqrt 2242 RNA:B6NCrl heart tissue male adult (8 weeks) +2243 ENCFF883OKA /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHA/summary/coverage.w5 768 384 0.3 sum_sqrt 2243 RNA:B6NCrl kidney tissue male adult (8 weeks) +2244 ENCFF827GSU /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHB/summary/coverage.w5 768 384 0.3 sum_sqrt 2244 RNA:B6NCrl liver tissue male adult (8 weeks) +2245 ENCFF764ATZ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHC/summary/coverage.w5 768 384 0.3 sum_sqrt 2245 RNA:B6NCrl lung tissue male adult (8 weeks) +2246 ENCFF697CAD /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHD/summary/coverage.w5 768 384 0.3 sum_sqrt 2246 RNA:B6NCrl placenta tissue female adult (8 weeks) +2247 ENCFF110GAK /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHE/summary/coverage.w5 768 384 0.3 sum_sqrt 2247 RNA:B6NCrl liver tissue embryo (14.5 days) +2248 ENCFF291REK /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHF/summary/coverage.w5 768 384 0.3 sum_sqrt 2248 RNA:B6NCrl heart tissue embryo (14.5 days) +2249 ENCFF704KGU /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHG/summary/coverage.w5 768 384 0.3 sum_sqrt 2249 RNA:B6NCrl brain tissue embryo (14.5 days) +2250 ENCFF453ESI /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHH/summary/coverage.w5 768 384 0.3 sum_sqrt 2250 RNA:B6NCrl testis tissue male adult (8 weeks) +2251 ENCFF050AYJ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHI/summary/coverage.w5 768 384 0.3 sum_sqrt 2251 RNA:B6NCrl small intestine tissue male adult (8 weeks) +2252 ENCFF751ZMZ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHJ/summary/coverage.w5 768 384 0.3 sum_sqrt 2252 RNA:B6NCrl limb tissue embryo (14.5 days) +2253 ENCFF355GZI /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHK/summary/coverage.w5 768 384 0.3 sum_sqrt 2253 RNA:B6NCrl thymus tissue male adult (8 weeks) +2254 ENCFF126WZZ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHL/summary/coverage.w5 768 384 0.3 sum_sqrt 2254 RNA:B6NCrl olfactory bulb tissue male adult (8 weeks) +2255 ENCFF185RSB /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHM/summary/coverage.w5 768 384 0.3 sum_sqrt 2255 RNA:MEL MEL +2256 ENCFF275WMD /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHN/summary/coverage.w5 768 384 0.3 sum_sqrt 2256 RNA:B6NCrl brown adipose tissue tissue male adult (24 weeks) +2257 ENCFF575SGC /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHO/summary/coverage.w5 768 384 0.3 sum_sqrt 2257 RNA:Bruce4 bone marrow macrophage male adult (8 weeks) +2258 ENCFF135YCH /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHR/summary/coverage.w5 768 384 0.3 sum_sqrt 2258 RNA:B10.H-2aH-4bp/Wts CH12.LX +2259 ENCFF991WZB+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHS/summary/coverage+.w5 768 384 0.3 sum_sqrt 2260 RNA:erythroblast embryo (14.5 days) +2260 ENCFF991WZB- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHS/summary/coverage-.w5 768 384 0.3 sum_sqrt 2259 RNA:erythroblast embryo (14.5 days) +2261 ENCFF527GLP+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHT/summary/coverage+.w5 768 384 0.3 sum_sqrt 2262 RNA:BALB/cJ leukemia stem cell adult (10-12 weeks) +2262 ENCFF527GLP- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHT/summary/coverage-.w5 768 384 0.3 sum_sqrt 2261 RNA:BALB/cJ leukemia stem cell adult (10-12 weeks) +2263 ENCFF775WTP+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHU/summary/coverage+.w5 768 384 0.3 sum_sqrt 2264 RNA:BALB/cJ hematopoietic multipotent progenitor cell +2264 ENCFF775WTP- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHU/summary/coverage-.w5 768 384 0.3 sum_sqrt 2263 RNA:BALB/cJ hematopoietic multipotent progenitor cell +2265 ENCFF060OVZ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHV/summary/coverage+.w5 768 384 0.3 sum_sqrt 2266 RNA:G1E +2266 ENCFF060OVZ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHV/summary/coverage-.w5 768 384 0.3 sum_sqrt 2265 RNA:G1E +2267 ENCFF033AFD+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHW/summary/coverage+.w5 768 384 0.3 sum_sqrt 2268 RNA:G1E-ER4 treated with 10 nM 17B-estradiol for 14 hours +2268 ENCFF033AFD- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHW/summary/coverage-.w5 768 384 0.3 sum_sqrt 2267 RNA:G1E-ER4 treated with 10 nM 17B-estradiol for 14 hours +2269 ENCFF166GVX+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHX/summary/coverage+.w5 768 384 0.3 sum_sqrt 2270 RNA:G1E-ER4 treated with 10 nM 17B-estradiol for 7 hours +2270 ENCFF166GVX- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHX/summary/coverage-.w5 768 384 0.3 sum_sqrt 2269 RNA:G1E-ER4 treated with 10 nM 17B-estradiol for 7 hours +2271 ENCFF104EKC+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHY/summary/coverage+.w5 768 384 0.3 sum_sqrt 2272 RNA:G1E-ER4 +2272 ENCFF104EKC- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHY/summary/coverage-.w5 768 384 0.3 sum_sqrt 2271 RNA:G1E-ER4 +2273 ENCFF821EFN+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2274 RNA:G1E-ER4 treated with 10 nM 17B-estradiol for 3 hours +2274 ENCFF821EFN- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CHZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2273 RNA:G1E-ER4 treated with 10 nM 17B-estradiol for 3 hours +2275 ENCFF450NDQ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CIA/summary/coverage+.w5 768 384 0.3 sum_sqrt 2276 RNA:G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +2276 ENCFF450NDQ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CIA/summary/coverage-.w5 768 384 0.3 sum_sqrt 2275 RNA:G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +2277 ENCFF323RIK+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CIB/summary/coverage+.w5 768 384 0.3 sum_sqrt 2278 RNA:G1E-ER4 treated with 10 nM 17B-estradiol for 30 hours +2278 ENCFF323RIK- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CIB/summary/coverage-.w5 768 384 0.3 sum_sqrt 2277 RNA:G1E-ER4 treated with 10 nM 17B-estradiol for 30 hours +2279 ENCFF583SXP+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CIC/summary/coverage+.w5 768 384 0.3 sum_sqrt 2280 RNA:megakaryocyte embryo (14.5 days) +2280 ENCFF583SXP- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CIC/summary/coverage-.w5 768 384 0.3 sum_sqrt 2279 RNA:megakaryocyte embryo (14.5 days) +2281 ENCFF679WOA /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CID/summary/coverage.w5 768 384 0.3 sum_sqrt 2281 RNA:MEL MEL +2282 ENCFF205YLQ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CIE/summary/coverage.w5 768 384 0.3 sum_sqrt 2282 RNA:MEL MEL treated with 2% dimethyl sulfoxide for 5 days +2283 ENCFF315IFZ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CIF/summary/coverage+.w5 768 384 0.3 sum_sqrt 2284 RNA:C57BL/6J megakaryocyte-erythroid progenitor cell female adult (5 weeks) +2284 ENCFF315IFZ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CIF/summary/coverage-.w5 768 384 0.3 sum_sqrt 2283 RNA:C57BL/6J megakaryocyte-erythroid progenitor cell female adult (5 weeks) +2285 ENCFF064XQN /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CIG/summary/coverage.w5 768 384 0.3 sum_sqrt 2285 RNA:G1E +2286 ENCFF326KZM /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CIH/summary/coverage.w5 768 384 0.3 sum_sqrt 2286 RNA:G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +2287 ENCFF968GGL+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CWC/summary/coverage+.w5 768 384 0.3 sum_sqrt 2288 RNA:129/Ola ES-E14 +2288 ENCFF968GGL- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CWC/summary/coverage-.w5 768 384 0.3 sum_sqrt 2287 RNA:129/Ola ES-E14 +2289 ENCFF110JSW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CWD/summary/coverage+.w5 768 384 0.3 sum_sqrt 2290 RNA:B10.H-2aH-4bp/Wts CH12.LX +2290 ENCFF110JSW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CWD/summary/coverage-.w5 768 384 0.3 sum_sqrt 2289 RNA:B10.H-2aH-4bp/Wts CH12.LX +2291 ENCFF559VXC+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CWE/summary/coverage+.w5 768 384 0.3 sum_sqrt 2292 RNA:MEL MEL +2292 ENCFF559VXC- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CWE/summary/coverage-.w5 768 384 0.3 sum_sqrt 2291 RNA:MEL MEL +2293 ENCFF333VTF+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CWF/summary/coverage+.w5 768 384 0.3 sum_sqrt 2294 RNA:MEL MEL treated with 2% dimethyl sulfoxide for 5 days +2294 ENCFF333VTF- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR000CWF/summary/coverage-.w5 768 384 0.3 sum_sqrt 2293 RNA:MEL MEL treated with 2% dimethyl sulfoxide for 5 days +2295 ENCFF633YBL /home/drk/tillage/datasets/mouse/rna/encode/ENCSR004XCU/summary/coverage.w5 768 384 0.3 sum_sqrt 2295 RNA:B6NCrl neural tube tissue embryo (15.5 days) +2296 ENCFF226DFS+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR007NGM/summary/coverage+.w5 768 384 0.3 sum_sqrt 2297 RNA:B6CASTF1/J adrenal gland tissue male (2 months) +2297 ENCFF226DFS- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR007NGM/summary/coverage-.w5 768 384 0.3 sum_sqrt 2296 RNA:B6CASTF1/J adrenal gland tissue male (2 months) +2298 ENCFF706QTS /home/drk/tillage/datasets/mouse/rna/encode/ENCSR017JEG/summary/coverage.w5 768 384 0.3 sum_sqrt 2298 RNA:B6NCrl hindbrain tissue postnatal (0 days) +2299 ENCFF600EOH /home/drk/tillage/datasets/mouse/rna/encode/ENCSR020DGG/summary/coverage.w5 768 384 0.3 sum_sqrt 2299 RNA:B6NCrl heart tissue embryo (16.5 days) +2300 ENCFF565MXW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR025JRA/summary/coverage+.w5 768 384 0.3 sum_sqrt 2301 RNA:C57BL/6J megakaryocyte male adult (5-6 weeks) +2301 ENCFF565MXW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR025JRA/summary/coverage-.w5 768 384 0.3 sum_sqrt 2300 RNA:C57BL/6J megakaryocyte male adult (5-6 weeks) +2302 ENCFF060UIY+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR027FCH/summary/coverage+.w5 768 384 0.3 sum_sqrt 2303 RNA:C57BL/6J megakaryocyte progenitor cell male adult (5-6 weeks) +2303 ENCFF060UIY- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR027FCH/summary/coverage-.w5 768 384 0.3 sum_sqrt 2302 RNA:C57BL/6J megakaryocyte progenitor cell male adult (5-6 weeks) +2304 ENCFF214MXT /home/drk/tillage/datasets/mouse/rna/encode/ENCSR027VRF/summary/coverage.w5 768 384 0.3 sum_sqrt 2304 RNA:Patski Patski +2305 ENCFF972RDL+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR035DLJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2306 RNA:B6NTac;B6NCrl/Lap heart tissue postnatal (0 days) +2306 ENCFF972RDL- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR035DLJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2305 RNA:B6NTac;B6NCrl/Lap heart tissue postnatal (0 days) +2307 ENCFF370AJT /home/drk/tillage/datasets/mouse/rna/encode/ENCSR039ADS/summary/coverage.w5 768 384 0.3 sum_sqrt 2307 RNA:B6NCrl lung tissue embryo (14.5 days) +2308 ENCFF715GSS /home/drk/tillage/datasets/mouse/rna/encode/ENCSR049UJU/summary/coverage.w5 768 384 0.3 sum_sqrt 2308 RNA:B6NCrl heart tissue embryo (10.5 days) +2309 ENCFF533ZBZ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR062VTB/summary/coverage.w5 768 384 0.3 sum_sqrt 2309 RNA:B6NCrl kidney tissue embryo (15.5 days) +2310 ENCFF100JHY+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR069PIG/summary/coverage+.w5 768 384 0.3 sum_sqrt 2311 RNA:C57BL/6J monocyte adult (5-6 weeks) +2311 ENCFF100JHY- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR069PIG/summary/coverage-.w5 768 384 0.3 sum_sqrt 2310 RNA:C57BL/6J monocyte adult (5-6 weeks) +2312 ENCFF611NTZ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR074WOD/summary/coverage+.w5 768 384 0.3 sum_sqrt 2313 RNA:C57BL/6J neutrophil adult (5-6 weeks) +2313 ENCFF611NTZ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR074WOD/summary/coverage-.w5 768 384 0.3 sum_sqrt 2312 RNA:C57BL/6J neutrophil adult (5-6 weeks) +2314 ENCFF084ZJY /home/drk/tillage/datasets/mouse/rna/encode/ENCSR080EVZ/summary/coverage.w5 768 384 0.3 sum_sqrt 2314 RNA:B6NCrl forebrain tissue embryo (16.5 days) +2315 ENCFF206RDG+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR083OKX/summary/coverage+.w5 768 384 0.3 sum_sqrt 2316 RNA:B6CASTF1/J heart tissue female (4 days) +2316 ENCFF206RDG- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR083OKX/summary/coverage-.w5 768 384 0.3 sum_sqrt 2315 RNA:B6CASTF1/J heart tissue female (4 days) +2317 ENCFF553RHO+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR085AJX/summary/coverage+.w5 768 384 0.3 sum_sqrt 2318 RNA:C57BL/6J hematopoietic stem cell male adult (5-6 weeks) +2318 ENCFF553RHO- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR085AJX/summary/coverage-.w5 768 384 0.3 sum_sqrt 2317 RNA:C57BL/6J hematopoietic stem cell male adult (5-6 weeks) +2319 ENCFF623LLW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR086MHH/summary/coverage+.w5 768 384 0.3 sum_sqrt 2320 RNA:5xFAD/CAST gastrocnemius tissue male postnatal (8-10 months) +2320 ENCFF623LLW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR086MHH/summary/coverage-.w5 768 384 0.3 sum_sqrt 2319 RNA:5xFAD/CAST gastrocnemius tissue male postnatal (8-10 months) +2321 ENCFF903FAA+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR093SMP/summary/coverage+.w5 768 384 0.3 sum_sqrt 2322 RNA:5xFAD/CAST gastrocnemius tissue female postnatal (8-10 months) +2322 ENCFF903FAA- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR093SMP/summary/coverage-.w5 768 384 0.3 sum_sqrt 2321 RNA:5xFAD/CAST gastrocnemius tissue female postnatal (8-10 months) +2323 ENCFF847QWE+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR095TCB/summary/coverage+.w5 768 384 0.3 sum_sqrt 2324 RNA:B6CASTF1/J heart tissue male (4 days) +2324 ENCFF847QWE- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR095TCB/summary/coverage-.w5 768 384 0.3 sum_sqrt 2323 RNA:B6CASTF1/J heart tissue male (4 days) +2325 ENCFF929ZTM+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR095VGF/summary/coverage+.w5 768 384 0.3 sum_sqrt 2326 RNA:B6CASTF1/J gastrocnemius tissue male (18-20 months) +2326 ENCFF929ZTM- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR095VGF/summary/coverage-.w5 768 384 0.3 sum_sqrt 2325 RNA:B6CASTF1/J gastrocnemius tissue male (18-20 months) +2327 ENCFF019ESS /home/drk/tillage/datasets/mouse/rna/encode/ENCSR096STK/summary/coverage.w5 768 384 0.3 sum_sqrt 2327 RNA:B6NCrl liver tissue postnatal (0 days) +2328 ENCFF764QQE+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR102ZZC/summary/coverage+.w5 768 384 0.3 sum_sqrt 2329 RNA:B6CASTF1/J gastrocnemius tissue male (4 days) +2329 ENCFF764QQE- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR102ZZC/summary/coverage-.w5 768 384 0.3 sum_sqrt 2328 RNA:B6CASTF1/J gastrocnemius tissue male (4 days) +2330 ENCFF825BVO /home/drk/tillage/datasets/mouse/rna/encode/ENCSR115TWD/summary/coverage.w5 768 384 0.3 sum_sqrt 2330 RNA:B6NCrl neural tube tissue embryo (13.5 days) +2331 ENCFF138AEF+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR121HYR/summary/coverage+.w5 768 384 0.3 sum_sqrt 2332 RNA:C57BL/6J megakaryocyte-erythroid progenitor cell male adult (5-6 weeks) +2332 ENCFF138AEF- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR121HYR/summary/coverage-.w5 768 384 0.3 sum_sqrt 2331 RNA:C57BL/6J megakaryocyte-erythroid progenitor cell male adult (5-6 weeks) +2333 ENCFF052JNL+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR128FBT/summary/coverage+.w5 768 384 0.3 sum_sqrt 2334 RNA:B6CASTF1/J layer of hippocampus tissue female (14 days) +2334 ENCFF052JNL- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR128FBT/summary/coverage-.w5 768 384 0.3 sum_sqrt 2333 RNA:B6CASTF1/J layer of hippocampus tissue female (14 days) +2335 ENCFF959YOC+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR129DEA/summary/coverage+.w5 768 384 0.3 sum_sqrt 2336 RNA:5xFAD/CAST adrenal gland tissue male postnatal (8-10 months) +2336 ENCFF959YOC- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR129DEA/summary/coverage-.w5 768 384 0.3 sum_sqrt 2335 RNA:5xFAD/CAST adrenal gland tissue male postnatal (8-10 months) +2337 ENCFF370IQE+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR133LMN/summary/coverage+.w5 768 384 0.3 sum_sqrt 2338 RNA:B6CASTF1/J gastrocnemius tissue male (10 days) +2338 ENCFF370IQE- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR133LMN/summary/coverage-.w5 768 384 0.3 sum_sqrt 2337 RNA:B6CASTF1/J gastrocnemius tissue male (10 days) +2339 ENCFF798DPR+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR133SAI/summary/coverage+.w5 768 384 0.3 sum_sqrt 2340 RNA:C57BL/6J common myeloid progenitor male adult (5-6 weeks) +2340 ENCFF798DPR- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR133SAI/summary/coverage-.w5 768 384 0.3 sum_sqrt 2339 RNA:C57BL/6J common myeloid progenitor male adult (5-6 weeks) +2341 ENCFF464JOW /home/drk/tillage/datasets/mouse/rna/encode/ENCSR137GMB/summary/coverage.w5 768 384 0.3 sum_sqrt 2341 RNA:B6NCrl left cerebral cortex tissue adult (6 months) +2342 ENCFF740FBB /home/drk/tillage/datasets/mouse/rna/encode/ENCSR150CUE/summary/coverage.w5 768 384 0.3 sum_sqrt 2342 RNA:B6NCrl heart tissue embryo (12.5 days) +2343 ENCFF242IJW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR158WTT/summary/coverage+.w5 768 384 0.3 sum_sqrt 2344 RNA:B6CASTF1/J adrenal gland tissue female (4 days) +2344 ENCFF242IJW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR158WTT/summary/coverage-.w5 768 384 0.3 sum_sqrt 2343 RNA:B6CASTF1/J adrenal gland tissue female (4 days) +2345 ENCFF527VWZ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR160IIN/summary/coverage.w5 768 384 0.3 sum_sqrt 2345 RNA:B6NCrl forebrain tissue embryo (11.5 days) +2346 ENCFF620ZFH+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR164BAZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2347 RNA:C57BL/6J heart tissue female adult (10 weeks) +2347 ENCFF620ZFH- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR164BAZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2346 RNA:C57BL/6J heart tissue female adult (10 weeks) +2348 ENCFF979NRI+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR170SVO/summary/coverage+.w5 768 384 0.3 sum_sqrt 2349 RNA:C57BL/6J small intestine tissue female adult (10 weeks) +2349 ENCFF979NRI- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR170SVO/summary/coverage-.w5 768 384 0.3 sum_sqrt 2348 RNA:C57BL/6J small intestine tissue female adult (10 weeks) +2350 ENCFF712EIS /home/drk/tillage/datasets/mouse/rna/encode/ENCSR173PJN/summary/coverage.w5 768 384 0.3 sum_sqrt 2350 RNA:B6NCrl kidney tissue postnatal (0 days) +2351 ENCFF293OVC /home/drk/tillage/datasets/mouse/rna/encode/ENCSR178GUS/summary/coverage.w5 768 384 0.3 sum_sqrt 2351 RNA:B6NCrl stomach tissue postnatal (0 days) +2352 ENCFF889UIO+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR181KQJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2353 RNA:C57BL/6J G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +2353 ENCFF889UIO- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR181KQJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2352 RNA:C57BL/6J G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +2354 ENCFF484SND /home/drk/tillage/datasets/mouse/rna/encode/ENCSR185LWM/summary/coverage.w5 768 384 0.3 sum_sqrt 2354 RNA:B6NCrl forebrain tissue embryo (14.5 days) +2355 ENCFF353RSI+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR195UUB/summary/coverage+.w5 768 384 0.3 sum_sqrt 2356 RNA:B6CASTF1/J adrenal gland tissue female (10 days) +2356 ENCFF353RSI- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR195UUB/summary/coverage-.w5 768 384 0.3 sum_sqrt 2355 RNA:B6CASTF1/J adrenal gland tissue female (10 days) +2357 ENCFF552LSX+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR201FJT/summary/coverage+.w5 768 384 0.3 sum_sqrt 2358 RNA:B6CASTF1/J heart tissue male (2 months) +2358 ENCFF552LSX- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR201FJT/summary/coverage-.w5 768 384 0.3 sum_sqrt 2357 RNA:B6CASTF1/J heart tissue male (2 months) +2359 ENCFF224JVW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR216KLZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2360 RNA:C57BL/6J liver tissue female adult (10 weeks) +2360 ENCFF224JVW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR216KLZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2359 RNA:C57BL/6J liver tissue female adult (10 weeks) +2361 ENCFF525YIT /home/drk/tillage/datasets/mouse/rna/encode/ENCSR216NEG/summary/coverage.w5 768 384 0.3 sum_sqrt 2361 RNA:B6NCrl limb tissue embryo (14.5 days) +2362 ENCFF550SIW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR219ZXZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2363 RNA:B6CASTF1/J left cerebral cortex tissue female (2 months) +2363 ENCFF550SIW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR219ZXZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2362 RNA:B6CASTF1/J left cerebral cortex tissue female (2 months) +2364 ENCFF117WFD+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR236ZIE/summary/coverage+.w5 768 384 0.3 sum_sqrt 2365 RNA:C57BL/6J hematopoietic stem cell male adult (5-6 weeks) +2365 ENCFF117WFD- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR236ZIE/summary/coverage-.w5 768 384 0.3 sum_sqrt 2364 RNA:C57BL/6J hematopoietic stem cell male adult (5-6 weeks) +2366 ENCFF261PKU+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR238FNF/summary/coverage+.w5 768 384 0.3 sum_sqrt 2367 RNA:B6CASTF1/J gastrocnemius tissue female (10 days) +2367 ENCFF261PKU- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR238FNF/summary/coverage-.w5 768 384 0.3 sum_sqrt 2366 RNA:B6CASTF1/J gastrocnemius tissue female (10 days) +2368 ENCFF119KMU+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR247IMK/summary/coverage+.w5 768 384 0.3 sum_sqrt 2369 RNA:C57BL/6J erythroblast male adult (5-6 weeks) +2369 ENCFF119KMU- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR247IMK/summary/coverage-.w5 768 384 0.3 sum_sqrt 2368 RNA:C57BL/6J erythroblast male adult (5-6 weeks) +2370 ENCFF191ETV+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR248KDJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2371 RNA:B6CASTF1/J left cerebral cortex tissue female (25 days) +2371 ENCFF191ETV- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR248KDJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2370 RNA:B6CASTF1/J left cerebral cortex tissue female (25 days) +2372 ENCFF648BAP+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR248XKS/summary/coverage+.w5 768 384 0.3 sum_sqrt 2373 RNA:C57BL/6J pancreas tissue female adult (10 weeks) +2373 ENCFF648BAP- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR248XKS/summary/coverage-.w5 768 384 0.3 sum_sqrt 2372 RNA:C57BL/6J pancreas tissue female adult (10 weeks) +2374 ENCFF259ZQF+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR255SDF/summary/coverage+.w5 768 384 0.3 sum_sqrt 2375 RNA:B6NTac;B6NCrl/Lap midbrain tissue postnatal (0 days) +2375 ENCFF259ZQF- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR255SDF/summary/coverage-.w5 768 384 0.3 sum_sqrt 2374 RNA:B6NTac;B6NCrl/Lap midbrain tissue postnatal (0 days) +2376 ENCFF249OVC+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR266ESZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2377 RNA:C57BL/6J testis tissue male adult (10 weeks) +2377 ENCFF249OVC- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR266ESZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2376 RNA:C57BL/6J testis tissue male adult (10 weeks) +2378 ENCFF072AST+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR270SAI/summary/coverage+.w5 768 384 0.3 sum_sqrt 2379 RNA:B6CASTF1/J adrenal gland tissue female (36 days) +2379 ENCFF072AST- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR270SAI/summary/coverage-.w5 768 384 0.3 sum_sqrt 2378 RNA:B6CASTF1/J adrenal gland tissue female (36 days) +2380 ENCFF375HPI+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR277DPB/summary/coverage+.w5 768 384 0.3 sum_sqrt 2381 RNA:C57BL/6J granulocyte monocyte progenitor cell male adult (5-6 weeks) +2381 ENCFF375HPI- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR277DPB/summary/coverage-.w5 768 384 0.3 sum_sqrt 2380 RNA:C57BL/6J granulocyte monocyte progenitor cell male adult (5-6 weeks) +2382 ENCFF215KWO+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR280DCZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2383 RNA:B6CASTF1/J layer of hippocampus tissue female (36 days) +2383 ENCFF215KWO- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR280DCZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2382 RNA:B6CASTF1/J layer of hippocampus tissue female (36 days) +2384 ENCFF117QWC /home/drk/tillage/datasets/mouse/rna/encode/ENCSR284AMY/summary/coverage.w5 768 384 0.3 sum_sqrt 2384 RNA:B6NCrl liver tissue embryo (11.5 days) +2385 ENCFF475VSW /home/drk/tillage/datasets/mouse/rna/encode/ENCSR284YKY/summary/coverage.w5 768 384 0.3 sum_sqrt 2385 RNA:B6NCrl heart tissue embryo (13.5 days) +2386 ENCFF155UMX /home/drk/tillage/datasets/mouse/rna/encode/ENCSR285WZV/summary/coverage.w5 768 384 0.3 sum_sqrt 2386 RNA:B6NCrl hindbrain tissue embryo (16.5 days) +2387 ENCFF590WKI+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR288BJQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2388 RNA:B6CASTF1/J adrenal gland tissue female (2 months) +2388 ENCFF590WKI- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR288BJQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2387 RNA:B6CASTF1/J adrenal gland tissue female (2 months) +2389 ENCFF402HQP+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR288TLO/summary/coverage+.w5 768 384 0.3 sum_sqrt 2390 RNA:C57BL/6J adipose tissue tissue male adult (10 weeks) +2390 ENCFF402HQP- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR288TLO/summary/coverage-.w5 768 384 0.3 sum_sqrt 2389 RNA:C57BL/6J adipose tissue tissue male adult (10 weeks) +2391 ENCFF172NDF /home/drk/tillage/datasets/mouse/rna/encode/ENCSR290RRR/summary/coverage.w5 768 384 0.3 sum_sqrt 2391 RNA:B6NCrl stomach tissue embryo (14.5 days) +2392 ENCFF082SFZ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR292LSH/summary/coverage+.w5 768 384 0.3 sum_sqrt 2393 RNA:B6CASTF1/J layer of hippocampus tissue male (25 days) +2393 ENCFF082SFZ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR292LSH/summary/coverage-.w5 768 384 0.3 sum_sqrt 2392 RNA:B6CASTF1/J layer of hippocampus tissue male (25 days) +2394 ENCFF998JBB+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR299GYD/summary/coverage+.w5 768 384 0.3 sum_sqrt 2395 RNA:5xFAD/CAST left cerebral cortex tissue male postnatal (8-10 months) +2395 ENCFF998JBB- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR299GYD/summary/coverage-.w5 768 384 0.3 sum_sqrt 2394 RNA:5xFAD/CAST left cerebral cortex tissue male postnatal (8-10 months) +2396 ENCFF843LEJ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR300EHA/summary/coverage+.w5 768 384 0.3 sum_sqrt 2397 RNA:B6CASTF1/J adrenal gland tissue male (25 days) +2397 ENCFF843LEJ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR300EHA/summary/coverage-.w5 768 384 0.3 sum_sqrt 2396 RNA:B6CASTF1/J adrenal gland tissue male (25 days) +2398 ENCFF329ATO+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR301QXH/summary/coverage+.w5 768 384 0.3 sum_sqrt 2399 RNA:B6CASTF1/J adrenal gland tissue male (36 days) +2399 ENCFF329ATO- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR301QXH/summary/coverage-.w5 768 384 0.3 sum_sqrt 2398 RNA:B6CASTF1/J adrenal gland tissue male (36 days) +2400 ENCFF377BYA /home/drk/tillage/datasets/mouse/rna/encode/ENCSR304RDL/summary/coverage.w5 768 384 0.3 sum_sqrt 2400 RNA:B6NCrl forebrain tissue embryo (10.5 days) +2401 ENCFF099WCR /home/drk/tillage/datasets/mouse/rna/encode/ENCSR307BCA/summary/coverage.w5 768 384 0.3 sum_sqrt 2401 RNA:B6NCrl midbrain tissue embryo (11.5 days) +2402 ENCFF056ZOQ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR321WYK/summary/coverage+.w5 768 384 0.3 sum_sqrt 2403 RNA:B6CASTF1/J left cerebral cortex tissue female (10 days) +2403 ENCFF056ZOQ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR321WYK/summary/coverage-.w5 768 384 0.3 sum_sqrt 2402 RNA:B6CASTF1/J left cerebral cortex tissue female (10 days) +2404 ENCFF937MXH+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR330DDD/summary/coverage+.w5 768 384 0.3 sum_sqrt 2405 RNA:B6CASTF1/J layer of hippocampus tissue male (2 months) +2405 ENCFF937MXH- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR330DDD/summary/coverage-.w5 768 384 0.3 sum_sqrt 2404 RNA:B6CASTF1/J layer of hippocampus tissue male (2 months) +2406 ENCFF357DMT /home/drk/tillage/datasets/mouse/rna/encode/ENCSR331XCE/summary/coverage.w5 768 384 0.3 sum_sqrt 2406 RNA:B6NCrl intestine tissue postnatal (0 days) +2407 ENCFF845QAI /home/drk/tillage/datasets/mouse/rna/encode/ENCSR337FYI/summary/coverage.w5 768 384 0.3 sum_sqrt 2407 RNA:B6NCrl neural tube tissue embryo (11.5 days) +2408 ENCFF663BGW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR338WMA/summary/coverage+.w5 768 384 0.3 sum_sqrt 2409 RNA:B6CASTF1/J layer of hippocampus tissue female (25 days) +2409 ENCFF663BGW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR338WMA/summary/coverage-.w5 768 384 0.3 sum_sqrt 2408 RNA:B6CASTF1/J layer of hippocampus tissue female (25 days) +2410 ENCFF476BCJ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR340NCF/summary/coverage.w5 768 384 0.3 sum_sqrt 2410 RNA:C57BL/6J megakaryocyte male adult (5-6 weeks) +2411 ENCFF409AEZ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR343YLB/summary/coverage.w5 768 384 0.3 sum_sqrt 2411 RNA:B6NCrl midbrain tissue embryo (14.5 days) +2412 ENCFF862IJF /home/drk/tillage/datasets/mouse/rna/encode/ENCSR347SQR/summary/coverage.w5 768 384 0.3 sum_sqrt 2412 RNA:B6NCrl limb tissue embryo (13.5 days) +2413 ENCFF431GTN /home/drk/tillage/datasets/mouse/rna/encode/ENCSR362AIZ/summary/coverage.w5 768 384 0.3 sum_sqrt 2413 RNA:B6NCrl forebrain tissue postnatal (0 days) +2414 ENCFF598EPR+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR363LZO/summary/coverage+.w5 768 384 0.3 sum_sqrt 2415 RNA:B6CASTF1/J heart tissue male (36 days) +2415 ENCFF598EPR- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR363LZO/summary/coverage-.w5 768 384 0.3 sum_sqrt 2414 RNA:B6CASTF1/J heart tissue male (36 days) +2416 ENCFF321JKX+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR367VMG/summary/coverage+.w5 768 384 0.3 sum_sqrt 2417 RNA:B6CASTF1/J gastrocnemius tissue female (4 days) +2417 ENCFF321JKX- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR367VMG/summary/coverage-.w5 768 384 0.3 sum_sqrt 2416 RNA:B6CASTF1/J gastrocnemius tissue female (4 days) +2418 ENCFF369LDA /home/drk/tillage/datasets/mouse/rna/encode/ENCSR367ZPZ/summary/coverage.w5 768 384 0.3 sum_sqrt 2418 RNA:B6NCrl midbrain tissue embryo (16.5 days) +2419 ENCFF646UNN /home/drk/tillage/datasets/mouse/rna/encode/ENCSR370SFB/summary/coverage.w5 768 384 0.3 sum_sqrt 2419 RNA:B6NCrl intestine tissue embryo (15.5 days) +2420 ENCFF958YYN+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR390XPU/summary/coverage+.w5 768 384 0.3 sum_sqrt 2421 RNA:B6CASTF1/J layer of hippocampus tissue female (18-20 months) +2421 ENCFF958YYN- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR390XPU/summary/coverage-.w5 768 384 0.3 sum_sqrt 2420 RNA:B6CASTF1/J layer of hippocampus tissue female (18-20 months) +2422 ENCFF724FDF+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR394YLM/summary/coverage+.w5 768 384 0.3 sum_sqrt 2423 RNA:C57BL/6J kidney tissue female adult (10 weeks) +2423 ENCFF724FDF- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR394YLM/summary/coverage-.w5 768 384 0.3 sum_sqrt 2422 RNA:C57BL/6J kidney tissue female adult (10 weeks) +2424 ENCFF380MBW /home/drk/tillage/datasets/mouse/rna/encode/ENCSR401BSG/summary/coverage.w5 768 384 0.3 sum_sqrt 2424 RNA:B6NCrl hindbrain tissue embryo (15.5 days) +2425 ENCFF758HEN+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR414YUZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2426 RNA:B6CASTF1/J adrenal gland tissue female (14 days) +2426 ENCFF758HEN- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR414YUZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2425 RNA:B6CASTF1/J adrenal gland tissue female (14 days) +2427 ENCFF928OMS /home/drk/tillage/datasets/mouse/rna/encode/ENCSR420QTO/summary/coverage.w5 768 384 0.3 sum_sqrt 2427 RNA:B6NCrl hindbrain tissue embryo (12.5 days) +2428 ENCFF109UBY+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR421HSE/summary/coverage+.w5 768 384 0.3 sum_sqrt 2429 RNA:B6CASTF1/J layer of hippocampus tissue male (14 days) +2429 ENCFF109UBY- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR421HSE/summary/coverage-.w5 768 384 0.3 sum_sqrt 2428 RNA:B6CASTF1/J layer of hippocampus tissue male (14 days) +2430 ENCFF705URO+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR432KHK/summary/coverage+.w5 768 384 0.3 sum_sqrt 2431 RNA:B6CASTF1/J gastrocnemius tissue male (25 days) +2431 ENCFF705URO- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR432KHK/summary/coverage-.w5 768 384 0.3 sum_sqrt 2430 RNA:B6CASTF1/J gastrocnemius tissue male (25 days) +2432 ENCFF232HKB /home/drk/tillage/datasets/mouse/rna/encode/ENCSR438XCG/summary/coverage.w5 768 384 0.3 sum_sqrt 2432 RNA:B6NCrl thymus tissue postnatal (0 days) +2433 ENCFF688MNZ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR442XRH/summary/coverage+.w5 768 384 0.3 sum_sqrt 2434 RNA:B6CASTF1/J adrenal gland tissue female (18-20 months) +2434 ENCFF688MNZ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR442XRH/summary/coverage-.w5 768 384 0.3 sum_sqrt 2433 RNA:B6CASTF1/J adrenal gland tissue female (18-20 months) +2435 ENCFF031DAO /home/drk/tillage/datasets/mouse/rna/encode/ENCSR448MXQ/summary/coverage.w5 768 384 0.3 sum_sqrt 2435 RNA:B6NCrl liver tissue embryo (13.5 days) +2436 ENCFF224MAK /home/drk/tillage/datasets/mouse/rna/encode/ENCSR457RRW/summary/coverage.w5 768 384 0.3 sum_sqrt 2436 RNA:B6NCrl lung tissue embryo (15.5 days) +2437 ENCFF642BYK+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR465PBJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2438 RNA:B6CASTF1/J heart tissue female adult (18-20 months) +2438 ENCFF642BYK- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR465PBJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2437 RNA:B6CASTF1/J heart tissue female adult (18-20 months) +2439 ENCFF799ZHZ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR466KZY/summary/coverage.w5 768 384 0.3 sum_sqrt 2439 RNA:B6NCrl stomach tissue embryo (16.5 days) +2440 ENCFF423PCA+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR475TDY/summary/coverage+.w5 768 384 0.3 sum_sqrt 2441 RNA:C57BL/6J G1E +2441 ENCFF423PCA- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR475TDY/summary/coverage-.w5 768 384 0.3 sum_sqrt 2440 RNA:C57BL/6J G1E +2442 ENCFF195TGW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR484UKA/summary/coverage+.w5 768 384 0.3 sum_sqrt 2443 RNA:B6CASTF1/J heart tissue male (10 days) +2443 ENCFF195TGW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR484UKA/summary/coverage-.w5 768 384 0.3 sum_sqrt 2442 RNA:B6CASTF1/J heart tissue male (10 days) +2444 ENCFF522FQT+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR486QNN/summary/coverage+.w5 768 384 0.3 sum_sqrt 2445 RNA:B6CASTF1/J gastrocnemius tissue female (36 days) +2445 ENCFF522FQT- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR486QNN/summary/coverage-.w5 768 384 0.3 sum_sqrt 2444 RNA:B6CASTF1/J gastrocnemius tissue female (36 days) +2446 ENCFF573OLV+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR495QVE/summary/coverage+.w5 768 384 0.3 sum_sqrt 2447 RNA:B6CASTF1/J adrenal gland tissue female (25 days) +2447 ENCFF573OLV- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR495QVE/summary/coverage-.w5 768 384 0.3 sum_sqrt 2446 RNA:B6CASTF1/J adrenal gland tissue female (25 days) +2448 ENCFF941BJU+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR496EJP/summary/coverage+.w5 768 384 0.3 sum_sqrt 2449 RNA:B6CASTF1/J heart tissue female (10 days) +2449 ENCFF941BJU- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR496EJP/summary/coverage-.w5 768 384 0.3 sum_sqrt 2448 RNA:B6CASTF1/J heart tissue female (10 days) +2450 ENCFF325VHZ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR496PRU/summary/coverage+.w5 768 384 0.3 sum_sqrt 2451 RNA:B6CASTF1/J left cerebral cortex tissue male (2 months) +2451 ENCFF325VHZ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR496PRU/summary/coverage-.w5 768 384 0.3 sum_sqrt 2450 RNA:B6CASTF1/J left cerebral cortex tissue male (2 months) +2452 ENCFF528WPB+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR502ORP/summary/coverage+.w5 768 384 0.3 sum_sqrt 2453 RNA:B6CASTF1/J gastrocnemius tissue female (18-20 months) +2453 ENCFF528WPB- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR502ORP/summary/coverage-.w5 768 384 0.3 sum_sqrt 2452 RNA:B6CASTF1/J gastrocnemius tissue female (18-20 months) +2454 ENCFF846JTZ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR504GEG/summary/coverage.w5 768 384 0.3 sum_sqrt 2454 RNA:B6NCrl kidney tissue embryo (14.5 days) +2455 ENCFF369QHZ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR508GWZ/summary/coverage.w5 768 384 0.3 sum_sqrt 2455 RNA:B6NCrl neural tube tissue embryo (12.5 days) +2456 ENCFF875KEQ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR510HZQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2457 RNA:B6CASTF1/J gastrocnemius tissue female (25 days) +2457 ENCFF875KEQ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR510HZQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2456 RNA:B6CASTF1/J gastrocnemius tissue female (25 days) +2458 ENCFF546VVR+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR511LAZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2459 RNA:5xFAD/CAST adrenal gland tissue female postnatal (8-10 months) +2459 ENCFF546VVR- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR511LAZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2458 RNA:5xFAD/CAST adrenal gland tissue female postnatal (8-10 months) +2460 ENCFF846BUP+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR516DAX/summary/coverage+.w5 768 384 0.3 sum_sqrt 2461 RNA:B6CASTF1/J left cerebral cortex tissue male (10 days) +2461 ENCFF846BUP- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR516DAX/summary/coverage-.w5 768 384 0.3 sum_sqrt 2460 RNA:B6CASTF1/J left cerebral cortex tissue male (10 days) +2462 ENCFF441JYG+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR516UNF/summary/coverage+.w5 768 384 0.3 sum_sqrt 2463 RNA:C57BL/6J ovary tissue female adult (10 weeks) +2463 ENCFF441JYG- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR516UNF/summary/coverage-.w5 768 384 0.3 sum_sqrt 2462 RNA:C57BL/6J ovary tissue female adult (10 weeks) +2464 ENCFF433JKY+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR518GDK/summary/coverage+.w5 768 384 0.3 sum_sqrt 2465 RNA:C57BL/6J sigmoid colon tissue female adult (10 weeks) +2465 ENCFF433JKY- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR518GDK/summary/coverage-.w5 768 384 0.3 sum_sqrt 2464 RNA:C57BL/6J sigmoid colon tissue female adult (10 weeks) +2466 ENCFF561IRA /home/drk/tillage/datasets/mouse/rna/encode/ENCSR526SEX/summary/coverage.w5 768 384 0.3 sum_sqrt 2466 RNA:B6NCrl heart tissue postnatal (0 days) +2467 ENCFF705SAG /home/drk/tillage/datasets/mouse/rna/encode/ENCSR537GNQ/summary/coverage.w5 768 384 0.3 sum_sqrt 2467 RNA:B6NCrl kidney tissue embryo (16.5 days) +2468 ENCFF306LNI+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR538CVD/summary/coverage+.w5 768 384 0.3 sum_sqrt 2469 RNA:C57BL/6J G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +2469 ENCFF306LNI- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR538CVD/summary/coverage-.w5 768 384 0.3 sum_sqrt 2468 RNA:C57BL/6J G1E-ER4 treated with 10 nM 17B-estradiol for 24 hours +2470 ENCFF222WST /home/drk/tillage/datasets/mouse/rna/encode/ENCSR538WYL/summary/coverage.w5 768 384 0.3 sum_sqrt 2470 RNA:B6NCrl embryonic facial prominence tissue embryo (13.5 days) +2471 ENCFF723OSL /home/drk/tillage/datasets/mouse/rna/encode/ENCSR541XZK/summary/coverage.w5 768 384 0.3 sum_sqrt 2471 RNA:B6NCrl limb tissue embryo (11.5 days) +2472 ENCFF048KKW+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR548BTE/summary/coverage+.w5 768 384 0.3 sum_sqrt 2473 RNA:B6CASTF1/J layer of hippocampus tissue male (36 days) +2473 ENCFF048KKW- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR548BTE/summary/coverage-.w5 768 384 0.3 sum_sqrt 2472 RNA:B6CASTF1/J layer of hippocampus tissue male (36 days) +2474 ENCFF086KLE+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR549HQM/summary/coverage+.w5 768 384 0.3 sum_sqrt 2475 RNA:5xFAD/CAST layer of hippocampus tissue male postnatal (8-10 months) +2475 ENCFF086KLE- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR549HQM/summary/coverage-.w5 768 384 0.3 sum_sqrt 2474 RNA:5xFAD/CAST layer of hippocampus tissue male postnatal (8-10 months) +2476 ENCFF063VOV+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR549QME/summary/coverage+.w5 768 384 0.3 sum_sqrt 2477 RNA:C57BL/6J megakaryocyte-erythroid progenitor cell male adult (5-6 weeks) +2477 ENCFF063VOV- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR549QME/summary/coverage-.w5 768 384 0.3 sum_sqrt 2476 RNA:C57BL/6J megakaryocyte-erythroid progenitor cell male adult (5-6 weeks) +2478 ENCFF336RLJ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR554PHF/summary/coverage+.w5 768 384 0.3 sum_sqrt 2479 RNA:C57BL/6J brain tissue male adult (10 weeks) +2479 ENCFF336RLJ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR554PHF/summary/coverage-.w5 768 384 0.3 sum_sqrt 2478 RNA:C57BL/6J brain tissue male adult (10 weeks) +2480 ENCFF762UWA /home/drk/tillage/datasets/mouse/rna/encode/ENCSR557RMA/summary/coverage.w5 768 384 0.3 sum_sqrt 2480 RNA:B6NCrl midbrain tissue embryo (15.5 days) +2481 ENCFF331JZW /home/drk/tillage/datasets/mouse/rna/encode/ENCSR558PXY/summary/coverage.w5 768 384 0.3 sum_sqrt 2481 RNA:C57BL/6J erythroid progenitor cell male adult (5-6 weeks) +2482 ENCFF669KGM /home/drk/tillage/datasets/mouse/rna/encode/ENCSR559TRB/summary/coverage.w5 768 384 0.3 sum_sqrt 2482 RNA:B6NCrl hindbrain tissue embryo (14.5 days) +2483 ENCFF229KHK+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR571OZD/summary/coverage+.w5 768 384 0.3 sum_sqrt 2484 RNA:B6CASTF1/J adrenal gland tissue male (14 days) +2484 ENCFF229KHK- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR571OZD/summary/coverage-.w5 768 384 0.3 sum_sqrt 2483 RNA:B6CASTF1/J adrenal gland tissue male (14 days) +2485 ENCFF645WMC+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR573TZD/summary/coverage+.w5 768 384 0.3 sum_sqrt 2486 RNA:B6CASTF1/J heart tissue female (14 days) +2486 ENCFF645WMC- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR573TZD/summary/coverage-.w5 768 384 0.3 sum_sqrt 2485 RNA:B6CASTF1/J heart tissue female (14 days) +2487 ENCFF311NRK+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR576HNP/summary/coverage+.w5 768 384 0.3 sum_sqrt 2488 RNA:B6CASTF1/J layer of hippocampus tissue male (18-20 months) +2488 ENCFF311NRK- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR576HNP/summary/coverage-.w5 768 384 0.3 sum_sqrt 2487 RNA:B6CASTF1/J layer of hippocampus tissue male (18-20 months) +2489 ENCFF271VFJ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR579FCW/summary/coverage.w5 768 384 0.3 sum_sqrt 2489 RNA:B6NCrl spleen tissue postnatal (0 days) +2490 ENCFF779UHC+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR584OSM/summary/coverage+.w5 768 384 0.3 sum_sqrt 2491 RNA:5xFAD/CAST heart tissue male postnatal (8-10 months) +2491 ENCFF779UHC- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR584OSM/summary/coverage-.w5 768 384 0.3 sum_sqrt 2490 RNA:5xFAD/CAST heart tissue male postnatal (8-10 months) +2492 ENCFF921TZJ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR597UZW/summary/coverage.w5 768 384 0.3 sum_sqrt 2492 RNA:B6NCrl heart tissue embryo (15.5 days) +2493 ENCFF242LFQ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR603KGM/summary/coverage+.w5 768 384 0.3 sum_sqrt 2494 RNA:B6CASTF1/J gastrocnemius tissue male (14 days) +2494 ENCFF242LFQ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR603KGM/summary/coverage-.w5 768 384 0.3 sum_sqrt 2493 RNA:B6CASTF1/J gastrocnemius tissue male (14 days) +2495 ENCFF390LUQ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR611PTP/summary/coverage.w5 768 384 0.3 sum_sqrt 2495 RNA:B6NCrl liver tissue embryo (15.5 days) +2496 ENCFF914NSR+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR621FZG/summary/coverage+.w5 768 384 0.3 sum_sqrt 2497 RNA:B6CASTF1/J heart tissue female (25 days) +2497 ENCFF914NSR- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR621FZG/summary/coverage-.w5 768 384 0.3 sum_sqrt 2496 RNA:B6CASTF1/J heart tissue female (25 days) +2498 ENCFF080WKU /home/drk/tillage/datasets/mouse/rna/encode/ENCSR636CWO/summary/coverage.w5 768 384 0.3 sum_sqrt 2498 RNA:B6NCrl embryonic facial prominence tissue embryo (15.5 days) +2499 ENCFF772FQM+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR640PLU/summary/coverage+.w5 768 384 0.3 sum_sqrt 2500 RNA:B6CASTF1/J gastrocnemius tissue male (36 days) +2500 ENCFF772FQM- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR640PLU/summary/coverage-.w5 768 384 0.3 sum_sqrt 2499 RNA:B6CASTF1/J gastrocnemius tissue male (36 days) +2501 ENCFF024JDL /home/drk/tillage/datasets/mouse/rna/encode/ENCSR647QBV/summary/coverage.w5 768 384 0.3 sum_sqrt 2501 RNA:B6NCrl forebrain tissue embryo (12.5 days) +2502 ENCFF184EFQ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR648YEP/summary/coverage.w5 768 384 0.3 sum_sqrt 2502 RNA:B6NCrl liver tissue embryo (12.5 days) +2503 ENCFF066XKF+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR652VMD/summary/coverage+.w5 768 384 0.3 sum_sqrt 2504 RNA:B6CASTF1/J adrenal gland tissue male (10 days) +2504 ENCFF066XKF- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR652VMD/summary/coverage-.w5 768 384 0.3 sum_sqrt 2503 RNA:B6CASTF1/J adrenal gland tissue male (10 days) +2505 ENCFF401CCV /home/drk/tillage/datasets/mouse/rna/encode/ENCSR661TLW/summary/coverage.w5 768 384 0.3 sum_sqrt 2505 RNA:C57BL/6J erythroblast male adult (5-6 weeks) +2506 ENCFF825CAO /home/drk/tillage/datasets/mouse/rna/encode/ENCSR667TOX/summary/coverage.w5 768 384 0.3 sum_sqrt 2506 RNA:B6NCrl neural tube tissue postnatal (0 days) +2507 ENCFF815URA /home/drk/tillage/datasets/mouse/rna/encode/ENCSR691OPQ/summary/coverage.w5 768 384 0.3 sum_sqrt 2507 RNA:B6NCrl heart tissue embryo (11.5 days) +2508 ENCFF107XNF+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR702MVT/summary/coverage+.w5 768 384 0.3 sum_sqrt 2509 RNA:B6CASTF1/J left cerebral cortex tissue male (14 days) +2509 ENCFF107XNF- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR702MVT/summary/coverage-.w5 768 384 0.3 sum_sqrt 2508 RNA:B6CASTF1/J left cerebral cortex tissue male (14 days) +2510 ENCFF793LOI+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR712PLG/summary/coverage+.w5 768 384 0.3 sum_sqrt 2511 RNA:B6CASTF1/J left cerebral cortex tissue female adult (18-20 months) +2511 ENCFF793LOI- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR712PLG/summary/coverage-.w5 768 384 0.3 sum_sqrt 2510 RNA:B6CASTF1/J left cerebral cortex tissue female adult (18-20 months) +2512 ENCFF319ESD+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR713OCQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2513 RNA:C57BL/6J adrenal gland tissue female adult (10 weeks) +2513 ENCFF319ESD- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR713OCQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2512 RNA:C57BL/6J adrenal gland tissue female adult (10 weeks) +2514 ENCFF207BTV /home/drk/tillage/datasets/mouse/rna/encode/ENCSR719NAJ/summary/coverage.w5 768 384 0.3 sum_sqrt 2514 RNA:B6NCrl midbrain tissue postnatal (0 days) +2515 ENCFF900NMC+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR723SZV/summary/coverage+.w5 768 384 0.3 sum_sqrt 2516 RNA:B6NTac;B6NCrl/Lap forebrain tissue postnatal (0 days) +2516 ENCFF900NMC- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR723SZV/summary/coverage-.w5 768 384 0.3 sum_sqrt 2515 RNA:B6NTac;B6NCrl/Lap forebrain tissue postnatal (0 days) +2517 ENCFF070OYM+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR723XFW/summary/coverage+.w5 768 384 0.3 sum_sqrt 2518 RNA:5xFAD/CAST left cerebral cortex tissue female postnatal (8-10 months) +2518 ENCFF070OYM- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR723XFW/summary/coverage-.w5 768 384 0.3 sum_sqrt 2517 RNA:5xFAD/CAST left cerebral cortex tissue female postnatal (8-10 months) +2519 ENCFF679LHL /home/drk/tillage/datasets/mouse/rna/encode/ENCSR727FHP/summary/coverage.w5 768 384 0.3 sum_sqrt 2519 RNA:B6NCrl heart tissue embryo (14.5 days) +2520 ENCFF983PEH /home/drk/tillage/datasets/mouse/rna/encode/ENCSR739PEB/summary/coverage.w5 768 384 0.3 sum_sqrt 2520 RNA:B6NCrl adrenal gland tissue postnatal (0 days) +2521 ENCFF347XMG+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR748DUR/summary/coverage+.w5 768 384 0.3 sum_sqrt 2522 RNA:B6CASTF1/J adrenal gland tissue male (18-20 months) +2522 ENCFF347XMG- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR748DUR/summary/coverage-.w5 768 384 0.3 sum_sqrt 2521 RNA:B6CASTF1/J adrenal gland tissue male (18-20 months) +2523 ENCFF599QYI+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR749BAG/summary/coverage+.w5 768 384 0.3 sum_sqrt 2524 RNA:B6NTac;B6NCrl/Lap hindbrain tissue postnatal (0 days) +2524 ENCFF599QYI- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR749BAG/summary/coverage-.w5 768 384 0.3 sum_sqrt 2523 RNA:B6NTac;B6NCrl/Lap hindbrain tissue postnatal (0 days) +2525 ENCFF751EJF /home/drk/tillage/datasets/mouse/rna/encode/ENCSR750YSX/summary/coverage.w5 768 384 0.3 sum_sqrt 2525 RNA:B6NCrl limb tissue embryo (12.5 days) +2526 ENCFF604LEL /home/drk/tillage/datasets/mouse/rna/encode/ENCSR752RGN/summary/coverage.w5 768 384 0.3 sum_sqrt 2526 RNA:B6NCrl forebrain tissue embryo (15.5 days) +2527 ENCFF640IMV+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR757VTG/summary/coverage+.w5 768 384 0.3 sum_sqrt 2528 RNA:B6CASTF1/J left cerebral cortex tissue female (4 days) +2528 ENCFF640IMV- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR757VTG/summary/coverage-.w5 768 384 0.3 sum_sqrt 2527 RNA:B6CASTF1/J left cerebral cortex tissue female (4 days) +2529 ENCFF844UWJ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR760TOE/summary/coverage.w5 768 384 0.3 sum_sqrt 2529 RNA:B6NCrl hindbrain tissue embryo (11.5 days) +2530 ENCFF367GHQ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR764GMN/summary/coverage+.w5 768 384 0.3 sum_sqrt 2531 RNA:B6CASTF1/J left cerebral cortex tissue male (25 days) +2531 ENCFF367GHQ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR764GMN/summary/coverage-.w5 768 384 0.3 sum_sqrt 2530 RNA:B6CASTF1/J left cerebral cortex tissue male (25 days) +2532 ENCFF686AKO /home/drk/tillage/datasets/mouse/rna/encode/ENCSR764OPZ/summary/coverage.w5 768 384 0.3 sum_sqrt 2532 RNA:B6NCrl midbrain tissue embryo (10.5 days) +2533 ENCFF107GSD+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR767PMR/summary/coverage+.w5 768 384 0.3 sum_sqrt 2534 RNA:B6CASTF1/J heart tissue male (14 days) +2534 ENCFF107GSD- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR767PMR/summary/coverage-.w5 768 384 0.3 sum_sqrt 2533 RNA:B6CASTF1/J heart tissue male (14 days) +2535 ENCFF209VXZ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR767VHR/summary/coverage+.w5 768 384 0.3 sum_sqrt 2536 RNA:C57BL/6J common myeloid progenitor male adult (5-6 weeks) +2536 ENCFF209VXZ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR767VHR/summary/coverage-.w5 768 384 0.3 sum_sqrt 2535 RNA:C57BL/6J common myeloid progenitor male adult (5-6 weeks) +2537 ENCFF177JFM /home/drk/tillage/datasets/mouse/rna/encode/ENCSR772FQU/summary/coverage.w5 768 384 0.3 sum_sqrt 2537 RNA:B6NCrl urinary bladder tissue postnatal (0 days) +2538 ENCFF966QYL+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR774DTO/summary/coverage+.w5 768 384 0.3 sum_sqrt 2539 RNA:B6CASTF1/J left cerebral cortex tissue female (14 days) +2539 ENCFF966QYL- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR774DTO/summary/coverage-.w5 768 384 0.3 sum_sqrt 2538 RNA:B6CASTF1/J left cerebral cortex tissue female (14 days) +2540 ENCFF074DII+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR791IFR/summary/coverage+.w5 768 384 0.3 sum_sqrt 2541 RNA:B6CASTF1/J heart tissue female (36 days) +2541 ENCFF074DII- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR791IFR/summary/coverage-.w5 768 384 0.3 sum_sqrt 2540 RNA:B6CASTF1/J heart tissue female (36 days) +2542 ENCFF229ZQV+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR791UVS/summary/coverage+.w5 768 384 0.3 sum_sqrt 2543 RNA:B6CASTF1/J gastrocnemius tissue female (2 months) +2543 ENCFF229ZQV- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR791UVS/summary/coverage-.w5 768 384 0.3 sum_sqrt 2542 RNA:B6CASTF1/J gastrocnemius tissue female (2 months) +2544 ENCFF304NKC /home/drk/tillage/datasets/mouse/rna/encode/ENCSR792RJV/summary/coverage.w5 768 384 0.3 sum_sqrt 2544 RNA:B6NCrl midbrain tissue embryo (13.5 days) +2545 ENCFF648EZR+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR795WFC/summary/coverage+.w5 768 384 0.3 sum_sqrt 2546 RNA:B6CASTF1/J heart tissue male adult (18-20 months) +2546 ENCFF648EZR- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR795WFC/summary/coverage-.w5 768 384 0.3 sum_sqrt 2545 RNA:B6CASTF1/J heart tissue male adult (18-20 months) +2547 ENCFF075FNE+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR809JHL/summary/coverage+.w5 768 384 0.3 sum_sqrt 2548 RNA:B6CASTF1/J gastrocnemius tissue male (2 months) +2548 ENCFF075FNE- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR809JHL/summary/coverage-.w5 768 384 0.3 sum_sqrt 2547 RNA:B6CASTF1/J gastrocnemius tissue male (2 months) +2549 ENCFF566YKN /home/drk/tillage/datasets/mouse/rna/encode/ENCSR809VYL/summary/coverage.w5 768 384 0.3 sum_sqrt 2549 RNA:B6NCrl embryonic facial prominence tissue embryo (10.5 days) +2550 ENCFF335UMY /home/drk/tillage/datasets/mouse/rna/encode/ENCSR823VEE/summary/coverage.w5 768 384 0.3 sum_sqrt 2550 RNA:B6NCrl embryonic facial prominence tissue embryo (14.5 days) +2551 ENCFF588ORC /home/drk/tillage/datasets/mouse/rna/encode/ENCSR826HIQ/summary/coverage.w5 768 384 0.3 sum_sqrt 2551 RNA:B6NCrl liver tissue embryo (16.5 days) +2552 ENCFF778AVN+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR826IXR/summary/coverage+.w5 768 384 0.3 sum_sqrt 2553 RNA:C57BL/6J G1E +2553 ENCFF778AVN- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR826IXR/summary/coverage-.w5 768 384 0.3 sum_sqrt 2552 RNA:C57BL/6J G1E +2554 ENCFF814UQP /home/drk/tillage/datasets/mouse/rna/encode/ENCSR830IVQ/summary/coverage.w5 768 384 0.3 sum_sqrt 2554 RNA:B6NCrl limb tissue embryo (15.5 days) +2555 ENCFF476VHS+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR833HPM/summary/coverage+.w5 768 384 0.3 sum_sqrt 2556 RNA:C57BL/6J granulocyte monocyte progenitor cell male adult (5-6 weeks) +2556 ENCFF476VHS- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR833HPM/summary/coverage-.w5 768 384 0.3 sum_sqrt 2555 RNA:C57BL/6J granulocyte monocyte progenitor cell male adult (5-6 weeks) +2557 ENCFF873KES+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR838GNN/summary/coverage+.w5 768 384 0.3 sum_sqrt 2558 RNA:B6CASTF1/J heart tissue male (25 days) +2558 ENCFF873KES- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR838GNN/summary/coverage-.w5 768 384 0.3 sum_sqrt 2557 RNA:B6CASTF1/J heart tissue male (25 days) +2559 ENCFF875AYS+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR841KGE/summary/coverage+.w5 768 384 0.3 sum_sqrt 2560 RNA:B6CASTF1/J left cerebral cortex tissue male (4 days) +2560 ENCFF875AYS- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR841KGE/summary/coverage-.w5 768 384 0.3 sum_sqrt 2559 RNA:B6CASTF1/J left cerebral cortex tissue male (4 days) +2561 ENCFF362QOV+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR847AMQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 2562 RNA:C57BL/6J erythroid progenitor cell male adult (5-6 weeks) +2562 ENCFF362QOV- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR847AMQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 2561 RNA:C57BL/6J erythroid progenitor cell male adult (5-6 weeks) +2563 ENCFF150IZB /home/drk/tillage/datasets/mouse/rna/encode/ENCSR848GST/summary/coverage.w5 768 384 0.3 sum_sqrt 2563 RNA:B6NCrl intestine tissue embryo (16.5 days) +2564 ENCFF391KZF /home/drk/tillage/datasets/mouse/rna/encode/ENCSR848HOX/summary/coverage.w5 768 384 0.3 sum_sqrt 2564 RNA:B6NCrl embryonic facial prominence tissue embryo (11.5 days) +2565 ENCFF372LRI /home/drk/tillage/datasets/mouse/rna/encode/ENCSR848LXY/summary/coverage.w5 768 384 0.3 sum_sqrt 2565 RNA:C57BL/6J megakaryocyte progenitor cell male adult (5-6 weeks) +2566 ENCFF210SHO /home/drk/tillage/datasets/mouse/rna/encode/ENCSR851HEC/summary/coverage.w5 768 384 0.3 sum_sqrt 2566 RNA:B6NCrl embryonic facial prominence tissue embryo (12.5 days) +2567 ENCFF843AFE+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR864IYS/summary/coverage+.w5 768 384 0.3 sum_sqrt 2568 RNA:B6CASTF1/J left cerebral cortex tissue male (36 days) +2568 ENCFF843AFE- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR864IYS/summary/coverage-.w5 768 384 0.3 sum_sqrt 2567 RNA:B6CASTF1/J left cerebral cortex tissue male (36 days) +2569 ENCFF927QNP /home/drk/tillage/datasets/mouse/rna/encode/ENCSR867YNV/summary/coverage.w5 768 384 0.3 sum_sqrt 2569 RNA:B6NCrl liver tissue embryo (14.5 days) +2570 ENCFF877PYA+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR870AQU/summary/coverage+.w5 768 384 0.3 sum_sqrt 2571 RNA:C57BL/6J lung tissue female adult (10 weeks) +2571 ENCFF877PYA- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR870AQU/summary/coverage-.w5 768 384 0.3 sum_sqrt 2570 RNA:C57BL/6J lung tissue female adult (10 weeks) +2572 ENCFF675JFB+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR886CRW/summary/coverage+.w5 768 384 0.3 sum_sqrt 2573 RNA:B6CASTF1/J gastrocnemius tissue female (14 days) +2573 ENCFF675JFB- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR886CRW/summary/coverage-.w5 768 384 0.3 sum_sqrt 2572 RNA:B6CASTF1/J gastrocnemius tissue female (14 days) +2574 ENCFF838PEV+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR886NLC/summary/coverage+.w5 768 384 0.3 sum_sqrt 2575 RNA:5xFAD/CAST layer of hippocampus tissue female postnatal (8-10 months) +2575 ENCFF838PEV- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR886NLC/summary/coverage-.w5 768 384 0.3 sum_sqrt 2574 RNA:5xFAD/CAST layer of hippocampus tissue female postnatal (8-10 months) +2576 ENCFF034IPR+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR893IBO/summary/coverage+.w5 768 384 0.3 sum_sqrt 2577 RNA:B6CASTF1/J layer of hippocampus tissue female (10 days) +2577 ENCFF034IPR- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR893IBO/summary/coverage-.w5 768 384 0.3 sum_sqrt 2576 RNA:B6CASTF1/J layer of hippocampus tissue female (10 days) +2578 ENCFF914ZQG /home/drk/tillage/datasets/mouse/rna/encode/ENCSR898LNL/summary/coverage.w5 768 384 0.3 sum_sqrt 2578 RNA:B6NCrl layer of hippocampus tissue adult (6 months) +2579 ENCFF527IMQ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR906YQZ/summary/coverage.w5 768 384 0.3 sum_sqrt 2579 RNA:B6NCrl stomach tissue embryo (15.5 days) +2580 ENCFF711HUC /home/drk/tillage/datasets/mouse/rna/encode/ENCSR908JWT/summary/coverage.w5 768 384 0.3 sum_sqrt 2580 RNA:B6NCrl midbrain tissue embryo (12.5 days) +2581 ENCFF892JIV+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR912TCG/summary/coverage+.w5 768 384 0.3 sum_sqrt 2582 RNA:B6CASTF1/J layer of hippocampus tissue female (2 months) +2582 ENCFF892JIV- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR912TCG/summary/coverage-.w5 768 384 0.3 sum_sqrt 2581 RNA:B6CASTF1/J layer of hippocampus tissue female (2 months) +2583 ENCFF175ORJ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR920ABO/summary/coverage+.w5 768 384 0.3 sum_sqrt 2584 RNA:B6CASTF1/J adrenal gland tissue male (4 days) +2584 ENCFF175ORJ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR920ABO/summary/coverage-.w5 768 384 0.3 sum_sqrt 2583 RNA:B6CASTF1/J adrenal gland tissue male (4 days) +2585 ENCFF830QLY /home/drk/tillage/datasets/mouse/rna/encode/ENCSR921PRX/summary/coverage.w5 768 384 0.3 sum_sqrt 2585 RNA:B6NCrl hindbrain tissue embryo (13.5 days) +2586 ENCFF042EBZ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR928OXI/summary/coverage.w5 768 384 0.3 sum_sqrt 2586 RNA:B6NCrl neural tube tissue embryo (14.5 days) +2587 ENCFF893AZR /home/drk/tillage/datasets/mouse/rna/encode/ENCSR932TRU/summary/coverage.w5 768 384 0.3 sum_sqrt 2587 RNA:B6NCrl intestine tissue embryo (14.5 days) +2588 ENCFF078POD /home/drk/tillage/datasets/mouse/rna/encode/ENCSR943LKA/summary/coverage.w5 768 384 0.3 sum_sqrt 2588 RNA:B6NCrl hindbrain tissue embryo (10.5 days) +2589 ENCFF538OJZ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR946HWC/summary/coverage.w5 768 384 0.3 sum_sqrt 2589 RNA:B6NCrl skeletal muscle tissue tissue postnatal (0 days) +2590 ENCFF695VOZ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR946YUY/summary/coverage+.w5 768 384 0.3 sum_sqrt 2591 RNA:B6CASTF1/J left cerebral cortex tissue female (36 days) +2591 ENCFF695VOZ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR946YUY/summary/coverage-.w5 768 384 0.3 sum_sqrt 2590 RNA:B6CASTF1/J left cerebral cortex tissue female (36 days) +2592 ENCFF737DAD+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR966JPL/summary/coverage+.w5 768 384 0.3 sum_sqrt 2593 RNA:C57BL/6J spleen tissue male adult (10 weeks) +2593 ENCFF737DAD- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR966JPL/summary/coverage-.w5 768 384 0.3 sum_sqrt 2592 RNA:C57BL/6J spleen tissue male adult (10 weeks) +2594 ENCFF391QNQ+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR967IKT/summary/coverage+.w5 768 384 0.3 sum_sqrt 2595 RNA:5xFAD/CAST heart tissue female postnatal (8-10 months) +2595 ENCFF391QNQ- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR967IKT/summary/coverage-.w5 768 384 0.3 sum_sqrt 2594 RNA:5xFAD/CAST heart tissue female postnatal (8-10 months) +2596 ENCFF403BPJ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR968QHO/summary/coverage.w5 768 384 0.3 sum_sqrt 2596 RNA:B6NCrl limb tissue embryo (10.5 days) +2597 ENCFF303UCM /home/drk/tillage/datasets/mouse/rna/encode/ENCSR970EWM/summary/coverage.w5 768 384 0.3 sum_sqrt 2597 RNA:B6NCrl forebrain tissue embryo (13.5 days) +2598 ENCFF116WGN+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR976DKW/summary/coverage+.w5 768 384 0.3 sum_sqrt 2599 RNA:B6CASTF1/J layer of hippocampus tissue male (10 days) +2599 ENCFF116WGN- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR976DKW/summary/coverage-.w5 768 384 0.3 sum_sqrt 2598 RNA:B6CASTF1/J layer of hippocampus tissue male (10 days) +2600 ENCFF570WAR /home/drk/tillage/datasets/mouse/rna/encode/ENCSR982MRY/summary/coverage.w5 768 384 0.3 sum_sqrt 2600 RNA:B6NCrl lung tissue postnatal (0 days) +2601 ENCFF376XLA+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR982PLD/summary/coverage+.w5 768 384 0.3 sum_sqrt 2602 RNA:F121-9 +2602 ENCFF376XLA- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR982PLD/summary/coverage-.w5 768 384 0.3 sum_sqrt 2601 RNA:F121-9 +2603 ENCFF258LLG+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR984JDC/summary/coverage+.w5 768 384 0.3 sum_sqrt 2604 RNA:B6CASTF1/J heart tissue female (2 months) +2604 ENCFF258LLG- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR984JDC/summary/coverage-.w5 768 384 0.3 sum_sqrt 2603 RNA:B6CASTF1/J heart tissue female (2 months) +2605 ENCFF678XDF /home/drk/tillage/datasets/mouse/rna/encode/ENCSR992WBR/summary/coverage.w5 768 384 0.3 sum_sqrt 2605 RNA:B6NCrl lung tissue embryo (16.5 days) +2606 ENCFF422YPA+ /home/drk/tillage/datasets/mouse/rna/encode/ENCSR996TVY/summary/coverage+.w5 768 384 0.3 sum_sqrt 2607 RNA:B6CASTF1/J left cerebral cortex tissue male adult (18-20 months) +2607 ENCFF422YPA- /home/drk/tillage/datasets/mouse/rna/encode/ENCSR996TVY/summary/coverage-.w5 768 384 0.3 sum_sqrt 2606 RNA:B6CASTF1/J left cerebral cortex tissue male adult (18-20 months) diff --git a/examples/targets_rna.txt b/examples/targets_rna.txt new file mode 100644 index 0000000..29c7f27 --- /dev/null +++ b/examples/targets_rna.txt @@ -0,0 +1,1544 @@ + identifier file clip clip_soft scale sum_stat strand_pair description +6068 ENCFF281BWX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAA/summary/coverage+.w5 768 384 0.3 sum_sqrt 6069 RNA:aortic smooth muscle cell male adult (21 years) and male adult (54 years) +6069 ENCFF281BWX- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAA/summary/coverage-.w5 768 384 0.3 sum_sqrt 6068 RNA:aortic smooth muscle cell male adult (21 years) and male adult (54 years) +6070 ENCFF168OLY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAB/summary/coverage+.w5 768 384 0.3 sum_sqrt 6071 RNA:bladder microvascular endothelial cell male adult (46 years) and male adult (60 years) +6071 ENCFF168OLY- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAB/summary/coverage-.w5 768 384 0.3 sum_sqrt 6070 RNA:bladder microvascular endothelial cell male adult (46 years) and male adult (60 years) +6072 ENCFF442VHH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6073 RNA:smooth muscle cell of bladder female adult (53 years) and male adult (62 years) +6073 ENCFF442VHH- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6072 RNA:smooth muscle cell of bladder female adult (53 years) and male adult (62 years) +6074 ENCFF153YEN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6075 RNA:bronchial epithelial cell female adult (40 years) and male adult (68 years) +6075 ENCFF153YEN- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6074 RNA:bronchial epithelial cell female adult (40 years) and male adult (68 years) +6076 ENCFF383NTW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAE/summary/coverage+.w5 768 384 0.3 sum_sqrt 6077 RNA:bronchial smooth muscle cell male adult (52 years) and male adult (59 years) +6077 ENCFF383NTW- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6076 RNA:bronchial smooth muscle cell male adult (52 years) and male adult (59 years) +6078 ENCFF226UWU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6079 RNA:endothelial cell of coronary artery female adult (41 years) and male adult (77 years) +6079 ENCFF226UWU- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6078 RNA:endothelial cell of coronary artery female adult (41 years) and male adult (77 years) +6080 ENCFF537AIY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6081 RNA:smooth muscle cell of the coronary artery female adult (53 years) and male adult (55 years) +6081 ENCFF537AIY- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6080 RNA:smooth muscle cell of the coronary artery female adult (53 years) and male adult (55 years) +6082 ENCFF263MKB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6083 RNA:regular cardiac myocyte female adult (51 years) and male adult (48 years) +6083 ENCFF263MKB- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6082 RNA:regular cardiac myocyte female adult (51 years) and male adult (48 years) +6084 ENCFF366CCB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAI/summary/coverage+.w5 768 384 0.3 sum_sqrt 6085 RNA:dermis blood vessel endothelial cell female child (16 years) and male child (13 years) +6085 ENCFF366CCB- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAI/summary/coverage-.w5 768 384 0.3 sum_sqrt 6084 RNA:dermis blood vessel endothelial cell female child (16 years) and male child (13 years) +6086 ENCFF805UDX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6087 RNA:dermis lymphatic vessel endothelial cell female adult (45 years) and male child (6 years) +6087 ENCFF805UDX- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6086 RNA:dermis lymphatic vessel endothelial cell female adult (45 years) and male child (6 years) +6088 ENCFF079OMS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6089 RNA:dermis microvascular lymphatic vessel endothelial cell female adult (38 years) and female adult (64 years) +6089 ENCFF079OMS- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6088 RNA:dermis microvascular lymphatic vessel endothelial cell female adult (38 years) and female adult (64 years) +6090 ENCFF114NWW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6091 RNA:nasal cavity respiratory epithelium epithelial cell of viscerocranial mucosa female adult (70 years) and male adult (46 years) +6091 ENCFF114NWW- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6090 RNA:nasal cavity respiratory epithelium epithelial cell of viscerocranial mucosa female adult (70 years) and male adult (46 years) +6092 ENCFF520FAO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6093 RNA:pulmonary artery endothelial cell male adult (23 years) and male adult (52 years) +6093 ENCFF520FAO- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6092 RNA:pulmonary artery endothelial cell male adult (23 years) and male adult (52 years) +6094 ENCFF679XSO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6095 RNA:smooth muscle cell of the pulmonary artery male adult (26 years) and male adult (28 years) +6095 ENCFF679XSO- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6094 RNA:smooth muscle cell of the pulmonary artery male adult (26 years) and male adult (28 years) +6096 ENCFF556PAF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6097 RNA:fibroblast of lung female adult (83 years) and male adult (23 years) +6097 ENCFF556PAF- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6096 RNA:fibroblast of lung female adult (83 years) and male adult (23 years) +6098 ENCFF618FQK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6099 RNA:lung microvascular endothelial cell female adult (55 years) and male adult (63 years) +6099 ENCFF618FQK- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6098 RNA:lung microvascular endothelial cell female adult (55 years) and male adult (63 years) +6100 ENCFF818OXF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6101 RNA:renal cortical epithelial cell female adult (69 years) and male adult (84 years) +6101 ENCFF818OXF- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6100 RNA:renal cortical epithelial cell female adult (69 years) and male adult (84 years) +6102 ENCFF520NFI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6103 RNA:tracheal epithelial cell male adult (21 years) and male adult (68 years) +6103 ENCFF520NFI- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6102 RNA:tracheal epithelial cell male adult (21 years) and male adult (68 years) +6104 ENCFF749ZJH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAS/summary/coverage+.w5 768 384 0.3 sum_sqrt 6105 RNA:smooth muscle cell of trachea male adult (28 years) and male adult (56 years) +6105 ENCFF749ZJH- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAS/summary/coverage-.w5 768 384 0.3 sum_sqrt 6104 RNA:smooth muscle cell of trachea male adult (28 years) and male adult (56 years) +6106 ENCFF572FWT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAT/summary/coverage+.w5 768 384 0.3 sum_sqrt 6107 RNA:epithelial cell of umbilical artery female newborn and male newborn +6107 ENCFF572FWT- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAT/summary/coverage-.w5 768 384 0.3 sum_sqrt 6106 RNA:epithelial cell of umbilical artery female newborn and male newborn +6108 ENCFF096HEZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6109 RNA:smooth muscle cell of the umbilical artery female newborn and male newborn +6109 ENCFF096HEZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6108 RNA:smooth muscle cell of the umbilical artery female newborn and male newborn +6110 ENCFF360EXL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6111 RNA:uterine smooth muscle cell female adult (48 years) and female adult (50 years) +6111 ENCFF360EXL- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AAV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6110 RNA:uterine smooth muscle cell female adult (48 years) and female adult (50 years) +6112 ENCFF470BSF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6113 RNA:GM12878 +6113 ENCFF470BSF- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6112 RNA:GM12878 +6114 ENCFF617ZDV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AED/summary/coverage+.w5 768 384 0.3 sum_sqrt 6115 RNA:GM12878 +6115 ENCFF617ZDV- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AED/summary/coverage-.w5 768 384 0.3 sum_sqrt 6114 RNA:GM12878 +6116 ENCFF993POP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEE/summary/coverage+.w5 768 384 0.3 sum_sqrt 6117 RNA:GM12878 +6117 ENCFF993POP- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6116 RNA:GM12878 +6118 ENCFF678ITA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6119 RNA:GM12878 +6119 ENCFF678ITA- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6118 RNA:GM12878 +6120 ENCFF812WIL /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEG/summary/coverage.w5 768 384 0.3 sum_sqrt 6120 RNA:GM12878 +6121 ENCFF142WGV /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEH/summary/coverage.w5 768 384 0.3 sum_sqrt 6121 RNA:GM12878 +6122 ENCFF980ZHM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6123 RNA:K562 +6123 ENCFF980ZHM- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6122 RNA:K562 +6124 ENCFF132DVY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6125 RNA:K562 +6125 ENCFF132DVY- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6124 RNA:K562 +6126 ENCFF919EXM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6127 RNA:K562 +6127 ENCFF919EXM- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6126 RNA:K562 +6128 ENCFF552CYS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6129 RNA:K562 +6129 ENCFF552CYS- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6128 RNA:K562 +6130 ENCFF991BRF /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEP/summary/coverage.w5 768 384 0.3 sum_sqrt 6130 RNA:K562 +6131 ENCFF769VLM /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEQ/summary/coverage.w5 768 384 0.3 sum_sqrt 6131 RNA:K562 +6132 ENCFF945UHI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6133 RNA:liver tissue female child (6 years) and with nonobstructive coronary artery disease; liver tissue male adult (32 years) +6133 ENCFF945UHI- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6132 RNA:liver tissue female child (6 years) and with nonobstructive coronary artery disease; liver tissue male adult (32 years) +6134 ENCFF775XPO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6135 RNA:urinary bladder tissue female embryo (20 weeks) and female embryo (24 weeks) +6135 ENCFF775XPO- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6134 RNA:urinary bladder tissue female embryo (20 weeks) and female embryo (24 weeks) +6136 ENCFF638TDD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEW/summary/coverage+.w5 768 384 0.3 sum_sqrt 6137 RNA:cerebellum tissue female embryo (19 weeks) and female embryo (37 weeks) +6137 ENCFF638TDD- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEW/summary/coverage-.w5 768 384 0.3 sum_sqrt 6136 RNA:cerebellum tissue female embryo (19 weeks) and female embryo (37 weeks) +6138 ENCFF355IUO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEX/summary/coverage+.w5 768 384 0.3 sum_sqrt 6139 RNA:diencephalon tissue female embryo (20 weeks) and male embryo (22 weeks) +6139 ENCFF355IUO- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEX/summary/coverage-.w5 768 384 0.3 sum_sqrt 6138 RNA:diencephalon tissue female embryo (20 weeks) and male embryo (22 weeks) +6140 ENCFF217HQN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEY/summary/coverage+.w5 768 384 0.3 sum_sqrt 6141 RNA:frontal cortex tissue female embryo (20 weeks) and male embryo (22 weeks) +6141 ENCFF217HQN- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEY/summary/coverage-.w5 768 384 0.3 sum_sqrt 6140 RNA:frontal cortex tissue female embryo (20 weeks) and male embryo (22 weeks) +6142 ENCFF454SXU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6143 RNA:heart tissue female embryo (19 weeks) and female embryo (28 weeks) +6143 ENCFF454SXU- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AEZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6142 RNA:heart tissue female embryo (19 weeks) and female embryo (28 weeks) +6144 ENCFF367PUX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFA/summary/coverage+.w5 768 384 0.3 sum_sqrt 6145 RNA:metanephros tissue female embryo (20 weeks) and female embryo (24 weeks) +6145 ENCFF367PUX- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFA/summary/coverage-.w5 768 384 0.3 sum_sqrt 6144 RNA:metanephros tissue female embryo (20 weeks) and female embryo (24 weeks) +6146 ENCFF630VID+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFB/summary/coverage+.w5 768 384 0.3 sum_sqrt 6147 RNA:liver tissue female embryo (20 weeks) and male embryo (22 weeks) +6147 ENCFF630VID- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFB/summary/coverage-.w5 768 384 0.3 sum_sqrt 6146 RNA:liver tissue female embryo (20 weeks) and male embryo (22 weeks) +6148 ENCFF892OBT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6149 RNA:lung tissue female embryo (20 weeks) and female embryo (24 weeks) +6149 ENCFF892OBT- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6148 RNA:lung tissue female embryo (20 weeks) and female embryo (24 weeks) +6150 ENCFF537TSF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6151 RNA:occipital lobe tissue female embryo (20 weeks) and male embryo (22 weeks) +6151 ENCFF537TSF- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6150 RNA:occipital lobe tissue female embryo (20 weeks) and male embryo (22 weeks) +6152 ENCFF470OAY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFE/summary/coverage+.w5 768 384 0.3 sum_sqrt 6153 RNA:parietal lobe tissue female embryo (24 weeks) and male embryo (22 weeks) +6153 ENCFF470OAY- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6152 RNA:parietal lobe tissue female embryo (24 weeks) and male embryo (22 weeks) +6154 ENCFF285OHV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6155 RNA:skeletal muscle tissue tissue female embryo (19 weeks) and male embryo (22 weeks) +6155 ENCFF285OHV- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6154 RNA:skeletal muscle tissue tissue female embryo (19 weeks) and male embryo (22 weeks) +6156 ENCFF198TLM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6157 RNA:skin of body tissue female embryo (24 weeks) and male embryo (22 weeks) +6157 ENCFF198TLM- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6156 RNA:skin of body tissue female embryo (24 weeks) and male embryo (22 weeks) +6158 ENCFF768YRQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6159 RNA:spinal cord tissue female embryo (24 weeks) and male embryo (22 weeks) +6159 ENCFF768YRQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6158 RNA:spinal cord tissue female embryo (24 weeks) and male embryo (22 weeks) +6160 ENCFF718MVN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFI/summary/coverage+.w5 768 384 0.3 sum_sqrt 6161 RNA:stomach tissue female embryo (40 weeks) and male embryo (36 weeks) +6161 ENCFF718MVN- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFI/summary/coverage-.w5 768 384 0.3 sum_sqrt 6160 RNA:stomach tissue female embryo (40 weeks) and male embryo (36 weeks) +6162 ENCFF709SZT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6163 RNA:temporal lobe tissue female embryo (20 weeks) and female embryo (24 weeks) +6163 ENCFF709SZT- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6162 RNA:temporal lobe tissue female embryo (20 weeks) and female embryo (24 weeks) +6164 ENCFF844XCX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6165 RNA:thyroid gland tissue female embryo (37 weeks) and female embryo (40 weeks) +6165 ENCFF844XCX- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6164 RNA:thyroid gland tissue female embryo (37 weeks) and female embryo (40 weeks) +6166 ENCFF175KXC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6167 RNA:tongue tissue female embryo (20 weeks) and female embryo (24 weeks) +6167 ENCFF175KXC- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6166 RNA:tongue tissue female embryo (20 weeks) and female embryo (24 weeks) +6168 ENCFF604YPC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6169 RNA:umbilical cord tissue male embryo (20 weeks) and male embryo (31 weeks) +6169 ENCFF604YPC- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6168 RNA:umbilical cord tissue male embryo (20 weeks) and male embryo (31 weeks) +6170 ENCFF592ZIP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6171 RNA:uterus tissue female embryo (24 weeks) and female embryo (28 weeks) +6171 ENCFF592ZIP- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6170 RNA:uterus tissue female embryo (24 weeks) and female embryo (28 weeks) +6172 ENCFF676EVQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6173 RNA:camera-type eye tissue female embryo (20 weeks) and female embryo (24 weeks) +6173 ENCFF676EVQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AFO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6172 RNA:camera-type eye tissue female embryo (20 weeks) and female embryo (24 weeks) +6174 ENCFF018EZY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000AHH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6175 RNA:heart tissue male adult (34 years) +6175 ENCFF018EZY- /home/drk/tillage/datasets/human/rna/encode/ENCSR000AHH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6174 RNA:heart tissue male adult (34 years) +6176 ENCFF549SVZ /home/drk/tillage/datasets/human/rna/encode/ENCSR000BXX/summary/coverage.w5 768 384 0.3 sum_sqrt 6176 RNA:Jurkat, Clone E6-1 +6177 ENCFF263CML /home/drk/tillage/datasets/human/rna/encode/ENCSR000BXY/summary/coverage.w5 768 384 0.3 sum_sqrt 6177 RNA:A549 treated with 0.1 nM dexamethasone for 1 hour +6178 ENCFF882JUS /home/drk/tillage/datasets/human/rna/encode/ENCSR000BXZ/summary/coverage.w5 768 384 0.3 sum_sqrt 6178 RNA:Ishikawa treated with 100 nM bisphenol A for 4 hours +6179 ENCFF188RIO /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYA/summary/coverage.w5 768 384 0.3 sum_sqrt 6179 RNA:Ishikawa treated with 0.02% dimethyl sulfoxide for 4 hours +6180 ENCFF165CZI /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYB/summary/coverage.w5 768 384 0.3 sum_sqrt 6180 RNA:Ishikawa treated with 10 nM estradiol for 4 hours +6181 ENCFF755HLM /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYC/summary/coverage.w5 768 384 0.3 sum_sqrt 6181 RNA:Ishikawa treated with 100 nM genistein for 4 hours +6182 ENCFF353TRR /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYD/summary/coverage.w5 768 384 0.3 sum_sqrt 6182 RNA:T47D treated with 100 nM bisphenol A for 4 hours +6183 ENCFF287DGM /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYE/summary/coverage.w5 768 384 0.3 sum_sqrt 6183 RNA:T47D treated with 0.02% dimethyl sulfoxide for 4 hours +6184 ENCFF660UBK /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYF/summary/coverage.w5 768 384 0.3 sum_sqrt 6184 RNA:T47D treated with 10 nM estradiol for 4 hours +6185 ENCFF353MIF /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYG/summary/coverage.w5 768 384 0.3 sum_sqrt 6185 RNA:T47D treated with 100 nM genistein for 4 hours +6186 ENCFF808QXI /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYH/summary/coverage.w5 768 384 0.3 sum_sqrt 6186 RNA:A549 treated with 1 nM dexamethasone for 1 hour +6187 ENCFF181YIA /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYI/summary/coverage.w5 768 384 0.3 sum_sqrt 6187 RNA:A549 treated with 0.5 nM dexamethasone for 1 hour +6188 ENCFF409JEW /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYJ/summary/coverage.w5 768 384 0.3 sum_sqrt 6188 RNA:A549 treated with 5 nM dexamethasone for 1 hour +6189 ENCFF949LLY /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYK/summary/coverage.w5 768 384 0.3 sum_sqrt 6189 RNA:BE2C +6190 ENCFF038WPL /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYL/summary/coverage.w5 768 384 0.3 sum_sqrt 6190 RNA:SK-N-SH +6191 ENCFF142KMX /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYM/summary/coverage.w5 768 384 0.3 sum_sqrt 6191 RNA:Panc1 +6192 ENCFF378HTE /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYN/summary/coverage.w5 768 384 0.3 sum_sqrt 6192 RNA:PFSK-1 +6193 ENCFF450BGT /home/drk/tillage/datasets/human/rna/encode/ENCSR000BYO/summary/coverage.w5 768 384 0.3 sum_sqrt 6193 RNA:U-87 MG +6194 ENCFF222BDI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000BZU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6195 RNA:H1 +6195 ENCFF222BDI- /home/drk/tillage/datasets/human/rna/encode/ENCSR000BZU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6194 RNA:H1 +6196 ENCFF516KYO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000COK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6197 RNA:K562 cytosolic fraction +6197 ENCFF516KYO- /home/drk/tillage/datasets/human/rna/encode/ENCSR000COK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6196 RNA:K562 cytosolic fraction +6198 ENCFF575YME+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CON/summary/coverage+.w5 768 384 0.3 sum_sqrt 6199 RNA:A549 +6199 ENCFF575YME- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CON/summary/coverage-.w5 768 384 0.3 sum_sqrt 6198 RNA:A549 +6200 ENCFF735EPI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000COO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6201 RNA:AG04450 +6201 ENCFF735EPI- /home/drk/tillage/datasets/human/rna/encode/ENCSR000COO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6200 RNA:AG04450 +6202 ENCFF829TKX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000COP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6203 RNA:BJ +6203 ENCFF829TKX- /home/drk/tillage/datasets/human/rna/encode/ENCSR000COP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6202 RNA:BJ +6204 ENCFF914IXY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000COQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6205 RNA:GM12878 +6205 ENCFF914IXY- /home/drk/tillage/datasets/human/rna/encode/ENCSR000COQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6204 RNA:GM12878 +6206 ENCFF696CDU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000COR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6207 RNA:GM12878 cytosolic fraction +6207 ENCFF696CDU- /home/drk/tillage/datasets/human/rna/encode/ENCSR000COR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6206 RNA:GM12878 cytosolic fraction +6208 ENCFF501KFP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000COU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6209 RNA:H1 +6209 ENCFF501KFP- /home/drk/tillage/datasets/human/rna/encode/ENCSR000COU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6208 RNA:H1 +6210 ENCFF729MSA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000COV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6211 RNA:H1 cytosolic fraction +6211 ENCFF729MSA- /home/drk/tillage/datasets/human/rna/encode/ENCSR000COV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6210 RNA:H1 cytosolic fraction +6212 ENCFF587OLO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000COW/summary/coverage+.w5 768 384 0.3 sum_sqrt 6213 RNA:H1 nuclear fraction +6213 ENCFF587OLO- /home/drk/tillage/datasets/human/rna/encode/ENCSR000COW/summary/coverage-.w5 768 384 0.3 sum_sqrt 6212 RNA:H1 nuclear fraction +6214 ENCFF888MQS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000COX/summary/coverage+.w5 768 384 0.3 sum_sqrt 6215 RNA:mammary epithelial cell female +6215 ENCFF888MQS- /home/drk/tillage/datasets/human/rna/encode/ENCSR000COX/summary/coverage-.w5 768 384 0.3 sum_sqrt 6214 RNA:mammary epithelial cell female +6216 ENCFF049VPK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000COY/summary/coverage+.w5 768 384 0.3 sum_sqrt 6217 RNA:skeletal muscle myoblast +6217 ENCFF049VPK- /home/drk/tillage/datasets/human/rna/encode/ENCSR000COY/summary/coverage-.w5 768 384 0.3 sum_sqrt 6216 RNA:skeletal muscle myoblast +6218 ENCFF005LKD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000COZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6219 RNA:endothelial cell of umbilical vein male newborn +6219 ENCFF005LKD- /home/drk/tillage/datasets/human/rna/encode/ENCSR000COZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6218 RNA:endothelial cell of umbilical vein male newborn +6220 ENCFF032IMA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPA/summary/coverage+.w5 768 384 0.3 sum_sqrt 6221 RNA:endothelial cell of umbilical vein cytosolic fraction male newborn +6221 ENCFF032IMA- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPA/summary/coverage-.w5 768 384 0.3 sum_sqrt 6220 RNA:endothelial cell of umbilical vein cytosolic fraction male newborn +6222 ENCFF804BGA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPB/summary/coverage+.w5 768 384 0.3 sum_sqrt 6223 RNA:endothelial cell of umbilical vein nuclear fraction male newborn +6223 ENCFF804BGA- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPB/summary/coverage-.w5 768 384 0.3 sum_sqrt 6222 RNA:endothelial cell of umbilical vein nuclear fraction male newborn +6224 ENCFF431NUC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6225 RNA:HepG2 nuclear fraction +6225 ENCFF431NUC- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6224 RNA:HepG2 nuclear fraction +6226 ENCFF065PJM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPE/summary/coverage+.w5 768 384 0.3 sum_sqrt 6227 RNA:HepG2 +6227 ENCFF065PJM- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6226 RNA:HepG2 +6228 ENCFF518DFP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6229 RNA:HepG2 cytosolic fraction +6229 ENCFF518DFP- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6228 RNA:HepG2 cytosolic fraction +6230 ENCFF119ONM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6231 RNA:K562 +6231 ENCFF119ONM- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6230 RNA:K562 +6232 ENCFF217JSG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6233 RNA:keratinocyte nuclear fraction female +6233 ENCFF217JSG- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6232 RNA:keratinocyte nuclear fraction female +6234 ENCFF226FBQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6235 RNA:keratinocyte cytosolic fraction female +6235 ENCFF226FBQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6234 RNA:keratinocyte cytosolic fraction female +6236 ENCFF703WGS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6237 RNA:keratinocyte female +6237 ENCFF703WGS- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6236 RNA:keratinocyte female +6238 ENCFF664JII+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6239 RNA:fibroblast of lung male adult (45 years) +6239 ENCFF664JII- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6238 RNA:fibroblast of lung male adult (45 years) +6240 ENCFF217KTR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6241 RNA:SK-N-SH treated with 6 uM all-trans-retinoic acid for 48 hours +6241 ENCFF217KTR- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6240 RNA:SK-N-SH treated with 6 uM all-trans-retinoic acid for 48 hours +6242 ENCFF983FMY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6243 RNA:GM12878 nuclear fraction +6243 ENCFF983FMY- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6242 RNA:GM12878 nuclear fraction +6244 ENCFF425GMM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6245 RNA:HeLa-S3 cytosolic fraction +6245 ENCFF425GMM- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6244 RNA:HeLa-S3 cytosolic fraction +6246 ENCFF515VVS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6247 RNA:HeLa-S3 nuclear fraction +6247 ENCFF515VVS- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6246 RNA:HeLa-S3 nuclear fraction +6248 ENCFF708PAF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6249 RNA:HeLa-S3 +6249 ENCFF708PAF- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6248 RNA:HeLa-S3 +6250 ENCFF669RVI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPS/summary/coverage+.w5 768 384 0.3 sum_sqrt 6251 RNA:K562 nuclear fraction +6251 ENCFF669RVI- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPS/summary/coverage-.w5 768 384 0.3 sum_sqrt 6250 RNA:K562 nuclear fraction +6252 ENCFF795XIC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPT/summary/coverage+.w5 768 384 0.3 sum_sqrt 6253 RNA:MCF-7 +6253 ENCFF795XIC- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPT/summary/coverage-.w5 768 384 0.3 sum_sqrt 6252 RNA:MCF-7 +6254 ENCFF124AUH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPY/summary/coverage+.w5 768 384 0.3 sum_sqrt 6255 RNA:K562 chromatin fraction +6255 ENCFF124AUH- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPY/summary/coverage-.w5 768 384 0.3 sum_sqrt 6254 RNA:K562 chromatin fraction +6256 ENCFF886LMB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6257 RNA:K562 nucleolus fraction +6257 ENCFF886LMB- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CPZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6256 RNA:K562 nucleolus fraction +6258 ENCFF246QLC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CQA/summary/coverage+.w5 768 384 0.3 sum_sqrt 6259 RNA:K562 nucleoplasmic fraction +6259 ENCFF246QLC- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CQA/summary/coverage-.w5 768 384 0.3 sum_sqrt 6258 RNA:K562 nucleoplasmic fraction +6260 ENCFF087WJT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6261 RNA:IMR-90 +6261 ENCFF087WJT- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6260 RNA:IMR-90 +6262 ENCFF717ESE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6263 RNA:A549 cytosolic fraction +6263 ENCFF717ESE- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6262 RNA:A549 cytosolic fraction +6264 ENCFF771QOV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6265 RNA:A549 nuclear fraction +6265 ENCFF771QOV- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6264 RNA:A549 nuclear fraction +6266 ENCFF096YGQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6267 RNA:IMR-90 cytosolic fraction +6267 ENCFF096YGQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6266 RNA:IMR-90 cytosolic fraction +6268 ENCFF229AJQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6269 RNA:MCF-7 nuclear fraction +6269 ENCFF229AJQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6268 RNA:MCF-7 nuclear fraction +6270 ENCFF236YOT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6271 RNA:IMR-90 nuclear fraction +6271 ENCFF236YOT- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6270 RNA:IMR-90 nuclear fraction +6272 ENCFF705NPF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6273 RNA:IMR-90 +6273 ENCFF705NPF- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6272 RNA:IMR-90 +6274 ENCFF031HEW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6275 RNA:SK-N-SH cytosolic fraction +6275 ENCFF031HEW- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6274 RNA:SK-N-SH cytosolic fraction +6276 ENCFF256CAM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTS/summary/coverage+.w5 768 384 0.3 sum_sqrt 6277 RNA:SK-N-SH nuclear fraction +6277 ENCFF256CAM- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTS/summary/coverage-.w5 768 384 0.3 sum_sqrt 6276 RNA:SK-N-SH nuclear fraction +6278 ENCFF601EJS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTT/summary/coverage+.w5 768 384 0.3 sum_sqrt 6279 RNA:SK-N-SH +6279 ENCFF601EJS- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTT/summary/coverage-.w5 768 384 0.3 sum_sqrt 6278 RNA:SK-N-SH +6280 ENCFF297JKU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6281 RNA:MCF-7 cytosolic fraction +6281 ENCFF297JKU- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6280 RNA:MCF-7 cytosolic fraction +6282 ENCFF252DFZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6283 RNA:B cell female adult (27 years) and female adult (43 years) +6283 ENCFF252DFZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6282 RNA:B cell female adult (27 years) and female adult (43 years) +6284 ENCFF779FMR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTX/summary/coverage+.w5 768 384 0.3 sum_sqrt 6285 RNA:placental pericyte female newborn and male newborn +6285 ENCFF779FMR- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTX/summary/coverage-.w5 768 384 0.3 sum_sqrt 6284 RNA:placental pericyte female newborn and male newborn +6286 ENCFF653TAB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6287 RNA:mesenchymal stem cell of adipose female adult (37 years) and female adult (42 years) +6287 ENCFF653TAB- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CTZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6286 RNA:mesenchymal stem cell of adipose female adult (37 years) and female adult (42 years) +6288 ENCFF841GMC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUA/summary/coverage+.w5 768 384 0.3 sum_sqrt 6289 RNA:hematopoietic multipotent progenitor cell +6289 ENCFF841GMC- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUA/summary/coverage-.w5 768 384 0.3 sum_sqrt 6288 RNA:hematopoietic multipotent progenitor cell +6290 ENCFF798NEI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUB/summary/coverage+.w5 768 384 0.3 sum_sqrt 6291 RNA:hair follicle dermal papilla cell female adult (47 years) and female adult (70 years) +6291 ENCFF798NEI- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUB/summary/coverage-.w5 768 384 0.3 sum_sqrt 6290 RNA:hair follicle dermal papilla cell female adult (47 years) and female adult (70 years) +6292 ENCFF023YXV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6293 RNA:CD14-positive monocyte female +6293 ENCFF023YXV- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6292 RNA:CD14-positive monocyte female +6294 ENCFF359MTP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6295 RNA:mesenchymal stem cell of the bone marrow female adult (60 years) and male adult (57 years) +6295 ENCFF359MTP- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6294 RNA:mesenchymal stem cell of the bone marrow female adult (60 years) and male adult (57 years) +6296 ENCFF314QIG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUE/summary/coverage+.w5 768 384 0.3 sum_sqrt 6297 RNA:articular chondrocyte of knee joint female adult (56 years) and male adult (64 years) +6297 ENCFF314QIG- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6296 RNA:articular chondrocyte of knee joint female adult (56 years) and male adult (64 years) +6298 ENCFF953CRN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6299 RNA:osteoblast female adult (56 years) and male adult (62 years) +6299 ENCFF953CRN- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6298 RNA:osteoblast female adult (56 years) and male adult (62 years) +6300 ENCFF143TXA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6301 RNA:vein endothelial cell male adult (48 years) and male adult (52 years) +6301 ENCFF143TXA- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6300 RNA:vein endothelial cell male adult (48 years) and male adult (52 years) +6302 ENCFF343FWG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6303 RNA:fibroblast of dermis female adult (44 years) and female adult (55 years) +6303 ENCFF343FWG- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6302 RNA:fibroblast of dermis female adult (44 years) and female adult (55 years) +6304 ENCFF804YRV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUI/summary/coverage+.w5 768 384 0.3 sum_sqrt 6305 RNA:skeletal muscle satellite cell female adult (64 years) and male adult (21 years) +6305 ENCFF804YRV- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUI/summary/coverage-.w5 768 384 0.3 sum_sqrt 6304 RNA:skeletal muscle satellite cell female adult (64 years) and male adult (21 years) +6306 ENCFF256ZZS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6307 RNA:fibroblast of the aortic adventitia female adult (24 years) and male adult (47 years) +6307 ENCFF256ZZS- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6306 RNA:fibroblast of the aortic adventitia female adult (24 years) and male adult (47 years) +6308 ENCFF805HMR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6309 RNA:thoracic aorta endothelial cell female adult (22 years) and male adult (55 years) +6309 ENCFF805HMR- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6308 RNA:thoracic aorta endothelial cell female adult (22 years) and male adult (55 years) +6310 ENCFF649CUP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6311 RNA:fibroblast of villous mesenchyme female newborn and male newborn +6311 ENCFF649CUP- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6310 RNA:fibroblast of villous mesenchyme female newborn and male newborn +6312 ENCFF233TOO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6313 RNA:subcutaneous preadipocyte female adult (62 years) and male adult (65 years) +6313 ENCFF233TOO- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6312 RNA:subcutaneous preadipocyte female adult (62 years) and male adult (65 years) +6314 ENCFF096YBH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6315 RNA:mammary epithelial cell female adult (23 years) +6315 ENCFF096YBH- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6314 RNA:mammary epithelial cell female adult (23 years) +6316 ENCFF492ICA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6317 RNA:mesenchymal stem cell of Wharton's jelly female newborn and male newborn +6317 ENCFF492ICA- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6316 RNA:mesenchymal stem cell of Wharton's jelly female newborn and male newborn +6318 ENCFF591NKN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6319 RNA:placental epithelial cell female newborn and male newborn +6319 ENCFF591NKN- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6318 RNA:placental epithelial cell female newborn and male newborn +6320 ENCFF723WDG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6321 RNA:melanocyte of skin male child (1 year) and male child (3 years) +6321 ENCFF723WDG- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6320 RNA:melanocyte of skin male child (1 year) and male child (3 years) +6322 ENCFF230ZSG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6323 RNA:melanocyte of skin female adult (52 years) and male adult (55 years) +6323 ENCFF230ZSG- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6322 RNA:melanocyte of skin female adult (52 years) and male adult (55 years) +6324 ENCFF151XTV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUT/summary/coverage+.w5 768 384 0.3 sum_sqrt 6325 RNA:mononuclear cell female adult (52 years) +6325 ENCFF151XTV- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CUT/summary/coverage-.w5 768 384 0.3 sum_sqrt 6324 RNA:mononuclear cell female adult (52 years) +6326 ENCFF853JVS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CVT/summary/coverage+.w5 768 384 0.3 sum_sqrt 6327 RNA:GM12878 nucleolus fraction +6327 ENCFF853JVS- /home/drk/tillage/datasets/human/rna/encode/ENCSR000CVT/summary/coverage-.w5 768 384 0.3 sum_sqrt 6326 RNA:GM12878 nucleolus fraction +6328 ENCFF982YRZ /home/drk/tillage/datasets/human/rna/encode/ENCSR000CWG/summary/coverage.w5 768 384 0.3 sum_sqrt 6328 RNA:K562 treated with Interferon gamma for 6 hours +6329 ENCFF594PZM /home/drk/tillage/datasets/human/rna/encode/ENCSR000CWH/summary/coverage.w5 768 384 0.3 sum_sqrt 6329 RNA:K562 treated with Interferon gamma for 30 minutes +6330 ENCFF722OTS /home/drk/tillage/datasets/human/rna/encode/ENCSR000CWI/summary/coverage.w5 768 384 0.3 sum_sqrt 6330 RNA:K562 treated with interferon alpha for 6 hours +6331 ENCFF311UQI /home/drk/tillage/datasets/human/rna/encode/ENCSR000CWJ/summary/coverage.w5 768 384 0.3 sum_sqrt 6331 RNA:K562 treated with interferon alpha for 30 minutes +6332 ENCFF987SCM /home/drk/tillage/datasets/human/rna/encode/ENCSR000CWK/summary/coverage.w5 768 384 0.3 sum_sqrt 6332 RNA:GM12891 +6333 ENCFF659BXA /home/drk/tillage/datasets/human/rna/encode/ENCSR000CWL/summary/coverage.w5 768 384 0.3 sum_sqrt 6333 RNA:GM12892 +6334 ENCFF069PST /home/drk/tillage/datasets/human/rna/encode/ENCSR000CWM/summary/coverage.w5 768 384 0.3 sum_sqrt 6334 RNA:HCT116 +6335 ENCFF106FBY /home/drk/tillage/datasets/human/rna/encode/ENCSR000CWN/summary/coverage.w5 768 384 0.3 sum_sqrt 6335 RNA:skeletal muscle myoblast +6336 ENCFF543YFY /home/drk/tillage/datasets/human/rna/encode/ENCSR000CWO/summary/coverage.w5 768 384 0.3 sum_sqrt 6336 RNA:myocyte originated from LHCN-M2 +6337 ENCFF905ISC /home/drk/tillage/datasets/human/rna/encode/ENCSR000CWP/summary/coverage.w5 768 384 0.3 sum_sqrt 6337 RNA:LHCN-M2 +6338 ENCFF808PHI /home/drk/tillage/datasets/human/rna/encode/ENCSR000CWQ/summary/coverage.w5 768 384 0.3 sum_sqrt 6338 RNA:MCF-7 +6339 ENCFF036JNR /home/drk/tillage/datasets/human/rna/encode/ENCSR000CWR/summary/coverage.w5 768 384 0.3 sum_sqrt 6339 RNA:fibroblast of lung +6340 ENCFF684NXV /home/drk/tillage/datasets/human/rna/encode/ENCSR000EYN/summary/coverage.w5 768 384 0.3 sum_sqrt 6340 RNA:GM12878 +6341 ENCFF860AKE /home/drk/tillage/datasets/human/rna/encode/ENCSR000EYO/summary/coverage.w5 768 384 0.3 sum_sqrt 6341 RNA:K562 +6342 ENCFF687EZU /home/drk/tillage/datasets/human/rna/encode/ENCSR000EYP/summary/coverage.w5 768 384 0.3 sum_sqrt 6342 RNA:H1 +6343 ENCFF587FSZ /home/drk/tillage/datasets/human/rna/encode/ENCSR000EYQ/summary/coverage.w5 768 384 0.3 sum_sqrt 6343 RNA:HeLa-S3 +6344 ENCFF995HXY /home/drk/tillage/datasets/human/rna/encode/ENCSR000EYR/summary/coverage.w5 768 384 0.3 sum_sqrt 6344 RNA:HepG2 +6345 ENCFF618GYZ /home/drk/tillage/datasets/human/rna/encode/ENCSR000EYS/summary/coverage.w5 768 384 0.3 sum_sqrt 6345 RNA:endothelial cell of umbilical vein newborn +6346 ENCFF609GRY /home/drk/tillage/datasets/human/rna/encode/ENCSR000EYT/summary/coverage.w5 768 384 0.3 sum_sqrt 6346 RNA:keratinocyte female +6347 ENCFF155OWV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR001HHK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6348 RNA:OCI-LY7 +6348 ENCFF155OWV- /home/drk/tillage/datasets/human/rna/encode/ENCSR001HHK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6347 RNA:OCI-LY7 +6349 ENCFF227YIR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR001UXR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6350 RNA:pancreas tissue female adult (30 years) +6350 ENCFF227YIR- /home/drk/tillage/datasets/human/rna/encode/ENCSR001UXR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6349 RNA:pancreas tissue female adult (30 years) +6351 ENCFF775NPV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR002CTR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6352 RNA:endodermal cell originated from HUES64 +6352 ENCFF775NPV- /home/drk/tillage/datasets/human/rna/encode/ENCSR002CTR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6351 RNA:endodermal cell originated from HUES64 +6353 ENCFF357VZF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR003BTD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6354 RNA:adrenal gland tissue female adult (47 years) +6354 ENCFF357VZF- /home/drk/tillage/datasets/human/rna/encode/ENCSR003BTD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6353 RNA:adrenal gland tissue female adult (47 years) +6355 ENCFF496WON /home/drk/tillage/datasets/human/rna/encode/ENCSR006EBD/summary/coverage.w5 768 384 0.3 sum_sqrt 6355 RNA:K562 treated with 100 nM GSK J4 for 4 hours +6356 ENCFF028ZKC /home/drk/tillage/datasets/human/rna/encode/ENCSR007OKF/summary/coverage.w5 768 384 0.3 sum_sqrt 6356 RNA:CD8-positive, alpha-beta memory T cell +6357 ENCFF719AYC /home/drk/tillage/datasets/human/rna/encode/ENCSR015EMF/summary/coverage.w5 768 384 0.3 sum_sqrt 6357 RNA:left renal cortex interstitium tissue male embryo (105 days) +6358 ENCFF952CKD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR015PUN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6359 RNA:Right ventricle myocardium inferior tissue male adult (60 years) +6359 ENCFF952CKD- /home/drk/tillage/datasets/human/rna/encode/ENCSR015PUN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6358 RNA:Right ventricle myocardium inferior tissue male adult (60 years) +6360 ENCFF233HJC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR019ICB/summary/coverage+.w5 768 384 0.3 sum_sqrt 6361 RNA:middle frontal area 46 tissue female adult (90 or above years) +6361 ENCFF233HJC- /home/drk/tillage/datasets/human/rna/encode/ENCSR019ICB/summary/coverage-.w5 768 384 0.3 sum_sqrt 6360 RNA:middle frontal area 46 tissue female adult (90 or above years) +6362 ENCFF626VIR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR019MXZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6363 RNA:HepG2 insoluble cytoplasmic fraction +6363 ENCFF626VIR- /home/drk/tillage/datasets/human/rna/encode/ENCSR019MXZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6362 RNA:HepG2 insoluble cytoplasmic fraction +6364 ENCFF401YXF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR020YQE/summary/coverage+.w5 768 384 0.3 sum_sqrt 6365 RNA:mammary epithelial cell female +6365 ENCFF401YXF- /home/drk/tillage/datasets/human/rna/encode/ENCSR020YQE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6364 RNA:mammary epithelial cell female +6366 ENCFF354WKE /home/drk/tillage/datasets/human/rna/encode/ENCSR022MON/summary/coverage.w5 768 384 0.3 sum_sqrt 6366 RNA:fibroblast of skin of scalp male embryo (97 days) +6367 ENCFF044XSJ /home/drk/tillage/datasets/human/rna/encode/ENCSR023VVO/summary/coverage.w5 768 384 0.3 sum_sqrt 6367 RNA:bipolar neuron originated from GM23338 treated with 0.5 ug/mL doxycycline hyclate for 4 days +6368 ENCFF796PND+ /home/drk/tillage/datasets/human/rna/encode/ENCSR023ZXN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6369 RNA:thyroid gland tissue male adult (54 years) +6369 ENCFF796PND- /home/drk/tillage/datasets/human/rna/encode/ENCSR023ZXN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6368 RNA:thyroid gland tissue male adult (54 years) +6370 ENCFF408WKS /home/drk/tillage/datasets/human/rna/encode/ENCSR027EJD/summary/coverage.w5 768 384 0.3 sum_sqrt 6370 RNA:muscle of back tissue female embryo (115 days) +6371 ENCFF123MLK /home/drk/tillage/datasets/human/rna/encode/ENCSR029FTY/summary/coverage.w5 768 384 0.3 sum_sqrt 6371 RNA:left renal pelvis tissue male embryo (105 days) +6372 ENCFF182JIC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR029KNZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6373 RNA:testis tissue male adult (37 years) +6373 ENCFF182JIC- /home/drk/tillage/datasets/human/rna/encode/ENCSR029KNZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6372 RNA:testis tissue male adult (37 years) +6374 ENCFF579IBH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR033XWU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6375 RNA:CD4-positive, alpha-beta T cell male adult (20 years) +6375 ENCFF579IBH- /home/drk/tillage/datasets/human/rna/encode/ENCSR033XWU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6374 RNA:CD4-positive, alpha-beta T cell male adult (20 years) +6376 ENCFF487XUM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR035SKV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6377 RNA:gastroesophageal sphincter tissue female adult (51 years) +6377 ENCFF487XUM- /home/drk/tillage/datasets/human/rna/encode/ENCSR035SKV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6376 RNA:gastroesophageal sphincter tissue female adult (51 years) +6378 ENCFF475JUK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR036SUN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6379 RNA:with mild cognitive impairment; middle frontal area 46 tissue female adult (90 or above years) +6379 ENCFF475JUK- /home/drk/tillage/datasets/human/rna/encode/ENCSR036SUN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6378 RNA:with mild cognitive impairment; middle frontal area 46 tissue female adult (90 or above years) +6380 ENCFF438BUG /home/drk/tillage/datasets/human/rna/encode/ENCSR038QZA/summary/coverage.w5 768 384 0.3 sum_sqrt 6380 RNA:with multiple sclerosis; naive thymus-derived CD4-positive, alpha-beta T cell +6381 ENCFF933QBC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR038WEK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6382 RNA:K562 membrane fraction +6382 ENCFF933QBC- /home/drk/tillage/datasets/human/rna/encode/ENCSR038WEK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6381 RNA:K562 membrane fraction +6383 ENCFF008CRB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR039ICU/summary/coverage+.w5 768 384 0.8734908657664815 sum_sqrt 6384 RNA:small intestine tissue female adult (30 years) +6384 ENCFF008CRB- /home/drk/tillage/datasets/human/rna/encode/ENCSR039ICU/summary/coverage-.w5 768 384 0.8734908657664815 sum_sqrt 6383 RNA:small intestine tissue female adult (30 years) +6385 ENCFF384MPW /home/drk/tillage/datasets/human/rna/encode/ENCSR039JPA/summary/coverage.w5 768 384 0.3 sum_sqrt 6385 RNA:with multiple sclerosis; immature natural killer cell +6386 ENCFF444LLL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR040YBR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6387 RNA:K562 nuclear fraction +6387 ENCFF444LLL- /home/drk/tillage/datasets/human/rna/encode/ENCSR040YBR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6386 RNA:K562 nuclear fraction +6388 ENCFF411VWO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR042GYH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6389 RNA:ovary tissue female adult (51 years) +6389 ENCFF411VWO- /home/drk/tillage/datasets/human/rna/encode/ENCSR042GYH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6388 RNA:ovary tissue female adult (51 years) +6390 ENCFF392VPC /home/drk/tillage/datasets/human/rna/encode/ENCSR042SVA/summary/coverage.w5 768 384 0.3 sum_sqrt 6390 RNA:with multiple sclerosis; naive thymus-derived CD4-positive, alpha-beta T cell +6391 ENCFF674RUW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR043KVA/summary/coverage+.w5 768 384 0.3 sum_sqrt 6392 RNA:neurosphere embryo (15 weeks) originated from ganglionic eminence +6392 ENCFF674RUW- /home/drk/tillage/datasets/human/rna/encode/ENCSR043KVA/summary/coverage-.w5 768 384 0.3 sum_sqrt 6391 RNA:neurosphere embryo (15 weeks) originated from ganglionic eminence +6393 ENCFF636VGI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR043RSE/summary/coverage+.w5 768 384 0.3 sum_sqrt 6394 RNA:H1 +6394 ENCFF636VGI- /home/drk/tillage/datasets/human/rna/encode/ENCSR043RSE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6393 RNA:H1 +6395 ENCFF789CEJ /home/drk/tillage/datasets/human/rna/encode/ENCSR044JAQ/summary/coverage.w5 768 384 0.3 sum_sqrt 6395 RNA:right lung tissue male embryo (105 days) +6396 ENCFF905NZB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR045GTF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6397 RNA:lung tissue female adult (47 years) +6397 ENCFF905NZB- /home/drk/tillage/datasets/human/rna/encode/ENCSR045GTF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6396 RNA:lung tissue female adult (47 years) +6398 ENCFF096UCI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR046XHI/summary/coverage+.w5 768 384 0.3 sum_sqrt 6399 RNA:ovary tissue female adult (47 years) +6399 ENCFF096UCI- /home/drk/tillage/datasets/human/rna/encode/ENCSR046XHI/summary/coverage-.w5 768 384 0.3 sum_sqrt 6398 RNA:ovary tissue female adult (47 years) +6400 ENCFF639BWZ /home/drk/tillage/datasets/human/rna/encode/ENCSR047LLJ/summary/coverage.w5 768 384 0.3 sum_sqrt 6400 RNA:heart tissue male embryo (120 days) +6401 ENCFF638TAY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR051GPK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6402 RNA:neurosphere embryo (15 weeks) originated from ganglionic eminence +6402 ENCFF638TAY- /home/drk/tillage/datasets/human/rna/encode/ENCSR051GPK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6401 RNA:neurosphere embryo (15 weeks) originated from ganglionic eminence +6403 ENCFF804UGI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR051QXW/summary/coverage+.w5 768 384 0.3 sum_sqrt 6404 RNA:chorionic villus tissue female embryo (40 weeks) +6404 ENCFF804UGI- /home/drk/tillage/datasets/human/rna/encode/ENCSR051QXW/summary/coverage-.w5 768 384 0.3 sum_sqrt 6403 RNA:chorionic villus tissue female embryo (40 weeks) +6405 ENCFF652ISB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR052FJA/summary/coverage+.w5 768 384 0.3 sum_sqrt 6406 RNA:smooth muscle cell originated from H9 +6406 ENCFF652ISB- /home/drk/tillage/datasets/human/rna/encode/ENCSR052FJA/summary/coverage-.w5 768 384 0.3 sum_sqrt 6405 RNA:smooth muscle cell originated from H9 +6407 ENCFF196HWN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR052LON/summary/coverage+.w5 768 384 0.3 sum_sqrt 6408 RNA:middle frontal area 46 tissue male adult (71 years) +6408 ENCFF196HWN- /home/drk/tillage/datasets/human/rna/encode/ENCSR052LON/summary/coverage-.w5 768 384 0.3 sum_sqrt 6407 RNA:middle frontal area 46 tissue male adult (71 years) +6409 ENCFF205IED /home/drk/tillage/datasets/human/rna/encode/ENCSR056HPM/summary/coverage.w5 768 384 0.3 sum_sqrt 6409 RNA:K562 treated with 5 uM MB-3 for 12 hours +6410 ENCFF856SMT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR058OSL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6411 RNA:HepG2 nuclear fraction +6411 ENCFF856SMT- /home/drk/tillage/datasets/human/rna/encode/ENCSR058OSL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6410 RNA:HepG2 nuclear fraction +6412 ENCFF035GJD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR061HMO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6413 RNA:middle frontal area 46 tissue female adult (90 or above years) +6413 ENCFF035GJD- /home/drk/tillage/datasets/human/rna/encode/ENCSR061HMO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6412 RNA:middle frontal area 46 tissue female adult (90 or above years) +6414 ENCFF381TII+ /home/drk/tillage/datasets/human/rna/encode/ENCSR061RDC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6415 RNA:with Alzheimer's disease, Cognitive impairment; middle frontal area 46 tissue female adult (87 years) +6415 ENCFF381TII- /home/drk/tillage/datasets/human/rna/encode/ENCSR061RDC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6414 RNA:with Alzheimer's disease, Cognitive impairment; middle frontal area 46 tissue female adult (87 years) +6416 ENCFF994NAN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR061SFU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6417 RNA:HepG2 nuclear fraction +6417 ENCFF994NAN- /home/drk/tillage/datasets/human/rna/encode/ENCSR061SFU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6416 RNA:HepG2 nuclear fraction +6418 ENCFF127NDE /home/drk/tillage/datasets/human/rna/encode/ENCSR062FHL/summary/coverage.w5 768 384 0.3 sum_sqrt 6418 RNA:K562 treated with 1% DMSO for 4 hours +6419 ENCFF639UPM /home/drk/tillage/datasets/human/rna/encode/ENCSR066FZL/summary/coverage.w5 768 384 0.3 sum_sqrt 6419 RNA:large intestine tissue male embryo (108 days) +6420 ENCFF225USB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR067UNX/summary/coverage+.w5 768 384 0.3 sum_sqrt 6421 RNA:HT1080 nuclear fraction +6421 ENCFF225USB- /home/drk/tillage/datasets/human/rna/encode/ENCSR067UNX/summary/coverage-.w5 768 384 0.3 sum_sqrt 6420 RNA:HT1080 nuclear fraction +6422 ENCFF840VTO /home/drk/tillage/datasets/human/rna/encode/ENCSR069CMT/summary/coverage.w5 768 384 0.3 sum_sqrt 6422 RNA:thymus tissue male embryo (127 days) +6423 ENCFF656OUX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR071DYD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6424 RNA:pancreas tissue female child (16 years) +6424 ENCFF656OUX- /home/drk/tillage/datasets/human/rna/encode/ENCSR071DYD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6423 RNA:pancreas tissue female child (16 years) +6425 ENCFF432ENB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR071ZLM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6426 RNA:uterus tissue female adult (51 years) +6426 ENCFF432ENB- /home/drk/tillage/datasets/human/rna/encode/ENCSR071ZLM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6425 RNA:uterus tissue female adult (51 years) +6427 ENCFF906FBF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR071ZMO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6428 RNA:kidney tissue male adult (41 years) +6428 ENCFF906FBF- /home/drk/tillage/datasets/human/rna/encode/ENCSR071ZMO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6427 RNA:kidney tissue male adult (41 years) +6429 ENCFF480GYV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR073XFZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6430 RNA:OCI-LY7 +6430 ENCFF480GYV- /home/drk/tillage/datasets/human/rna/encode/ENCSR073XFZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6429 RNA:OCI-LY7 +6431 ENCFF432CYV /home/drk/tillage/datasets/human/rna/encode/ENCSR074APH/summary/coverage.w5 768 384 0.3 sum_sqrt 6431 RNA:right lung tissue female embryo (98 days) +6432 ENCFF434LID+ /home/drk/tillage/datasets/human/rna/encode/ENCSR074FTH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6433 RNA:spleen tissue male adult (26 years) +6433 ENCFF434LID- /home/drk/tillage/datasets/human/rna/encode/ENCSR074FTH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6432 RNA:spleen tissue male adult (26 years) +6434 ENCFF073NKN /home/drk/tillage/datasets/human/rna/encode/ENCSR077AZT/summary/coverage.w5 768 384 0.3 sum_sqrt 6434 RNA:GM12878 +6435 ENCFF847OUD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR080HPT/summary/coverage+.w5 768 384 0.3 sum_sqrt 6436 RNA:omental fat pad tissue male adult (54 years) +6436 ENCFF847OUD- /home/drk/tillage/datasets/human/rna/encode/ENCSR080HPT/summary/coverage-.w5 768 384 0.3 sum_sqrt 6435 RNA:omental fat pad tissue male adult (54 years) +6437 ENCFF521SQI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR085HNI/summary/coverage+.w5 768 384 0.3 sum_sqrt 6438 RNA:liver tissue male adult (18 years) +6438 ENCFF521SQI- /home/drk/tillage/datasets/human/rna/encode/ENCSR085HNI/summary/coverage-.w5 768 384 0.3 sum_sqrt 6437 RNA:liver tissue male adult (18 years) +6439 ENCFF884CFC /home/drk/tillage/datasets/human/rna/encode/ENCSR086DZF/summary/coverage.w5 768 384 0.3 sum_sqrt 6439 RNA:muscle of leg tissue male embryo (96 days) +6440 ENCFF894JRG /home/drk/tillage/datasets/human/rna/encode/ENCSR092CNB/summary/coverage.w5 768 384 0.3 sum_sqrt 6440 RNA:IgD-negative memory B cell +6441 ENCFF865FLR /home/drk/tillage/datasets/human/rna/encode/ENCSR092KKW/summary/coverage.w5 768 384 0.3 sum_sqrt 6441 RNA:with multiple sclerosis; naive thymus-derived CD4-positive, alpha-beta T cell +6442 ENCFF387UUZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR094GVZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6443 RNA:sigmoid colon tissue female adult (53 years) +6443 ENCFF387UUZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR094GVZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6442 RNA:sigmoid colon tissue female adult (53 years) +6444 ENCFF430LQC /home/drk/tillage/datasets/human/rna/encode/ENCSR094RGI/summary/coverage.w5 768 384 0.3 sum_sqrt 6444 RNA:muscle of back tissue male embryo (96 days) +6445 ENCFF863UAA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR094VRQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6446 RNA:breast epithelium tissue male adult (37 years) +6446 ENCFF863UAA- /home/drk/tillage/datasets/human/rna/encode/ENCSR094VRQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6445 RNA:breast epithelium tissue male adult (37 years) +6447 ENCFF942QIE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR096LTX/summary/coverage+.w5 768 384 0.3 sum_sqrt 6448 RNA:spleen tissue female adult (61 years) +6448 ENCFF942QIE- /home/drk/tillage/datasets/human/rna/encode/ENCSR096LTX/summary/coverage-.w5 768 384 0.3 sum_sqrt 6447 RNA:spleen tissue female adult (61 years) +6449 ENCFF656PZD /home/drk/tillage/datasets/human/rna/encode/ENCSR096USV/summary/coverage.w5 768 384 0.3 sum_sqrt 6449 RNA:muscle of leg tissue male embryo (127 days) +6450 ENCFF071TXL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR098BUF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6451 RNA:esophagus muscularis mucosa tissue female adult (53 years) +6451 ENCFF071TXL- /home/drk/tillage/datasets/human/rna/encode/ENCSR098BUF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6450 RNA:esophagus muscularis mucosa tissue female adult (53 years) +6452 ENCFF710BHJ /home/drk/tillage/datasets/human/rna/encode/ENCSR100JNS/summary/coverage.w5 768 384 0.3 sum_sqrt 6452 RNA:K562 treated with 1% DMSO for 24 hours +6453 ENCFF689GBG /home/drk/tillage/datasets/human/rna/encode/ENCSR100VUY/summary/coverage.w5 768 384 0.3 sum_sqrt 6453 RNA:K562 treated with 10 nM Chaetocin for 24 hours +6454 ENCFF988EBT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR102TQN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6455 RNA:esophagus tissue male adult (34 years) +6455 ENCFF988EBT- /home/drk/tillage/datasets/human/rna/encode/ENCSR102TQN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6454 RNA:esophagus tissue male adult (34 years) +6456 ENCFF056NWD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR104ZDH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6457 RNA:placental basal plate tissue embryo (16 weeks) +6457 ENCFF056NWD- /home/drk/tillage/datasets/human/rna/encode/ENCSR104ZDH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6456 RNA:placental basal plate tissue embryo (16 weeks) +6458 ENCFF876APW /home/drk/tillage/datasets/human/rna/encode/ENCSR105NQB/summary/coverage.w5 768 384 0.3 sum_sqrt 6458 RNA:K562 treated with 7.5 nM Vorinostat for 12 hours +6459 ENCFF507FAA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR106SZN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6460 RNA:spleen tissue male adult (54 years) +6460 ENCFF507FAA- /home/drk/tillage/datasets/human/rna/encode/ENCSR106SZN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6459 RNA:spleen tissue male adult (54 years) +6461 ENCFF136YVC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR108MAU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6462 RNA:suprapubic skin tissue male adult (54 years) +6462 ENCFF136YVC- /home/drk/tillage/datasets/human/rna/encode/ENCSR108MAU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6461 RNA:suprapubic skin tissue male adult (54 years) +6463 ENCFF478XYM /home/drk/tillage/datasets/human/rna/encode/ENCSR108XUM/summary/coverage.w5 768 384 0.3 sum_sqrt 6463 RNA:naive thymus-derived CD4-positive, alpha-beta T cell +6464 ENCFF944XYT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR109IQO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6465 RNA:K562 +6465 ENCFF944XYT- /home/drk/tillage/datasets/human/rna/encode/ENCSR109IQO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6464 RNA:K562 +6466 ENCFF417PPZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR110BDY/summary/coverage+.w5 768 384 0.3 sum_sqrt 6467 RNA:cardiac atrium fibroblast male child (2 years) +6467 ENCFF417PPZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR110BDY/summary/coverage-.w5 768 384 0.3 sum_sqrt 6466 RNA:cardiac atrium fibroblast male child (2 years) +6468 ENCFF122XJD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR111PSY/summary/coverage+.w5 768 384 0.3 sum_sqrt 6469 RNA:activated T-cell male adult (42 years) treated with 50 U/mL Interleukin-2 for 4 hours, anti-CD3 and anti-CD28 coated beads for 4 hours +6469 ENCFF122XJD- /home/drk/tillage/datasets/human/rna/encode/ENCSR111PSY/summary/coverage-.w5 768 384 0.3 sum_sqrt 6468 RNA:activated T-cell male adult (42 years) treated with 50 U/mL Interleukin-2 for 4 hours, anti-CD3 and anti-CD28 coated beads for 4 hours +6470 ENCFF823LKJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR113HQM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6471 RNA:uterus tissue female adult (53 years) +6471 ENCFF823LKJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR113HQM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6470 RNA:uterus tissue female adult (53 years) +6472 ENCFF385SAC /home/drk/tillage/datasets/human/rna/encode/ENCSR114LNC/summary/coverage.w5 768 384 0.3 sum_sqrt 6472 RNA:K562 treated with 7.5 nM Vorinostat for 4 hours +6473 ENCFF132EEP /home/drk/tillage/datasets/human/rna/encode/ENCSR115PIZ/summary/coverage.w5 768 384 0.3 sum_sqrt 6473 RNA:K562 treated with 10 nM Chaetocin for 48 hours +6474 ENCFF767MLU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR118TVR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6475 RNA:epithelial cell of proximal tubule +6475 ENCFF767MLU- /home/drk/tillage/datasets/human/rna/encode/ENCSR118TVR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6474 RNA:epithelial cell of proximal tubule +6476 ENCFF195SMK /home/drk/tillage/datasets/human/rna/encode/ENCSR120NEA/summary/coverage.w5 768 384 0.3 sum_sqrt 6476 RNA:adrenal gland tissue female embryo (85 days) +6477 ENCFF996KKA /home/drk/tillage/datasets/human/rna/encode/ENCSR123ZCX/summary/coverage.w5 768 384 0.3 sum_sqrt 6477 RNA:stomach tissue male embryo (127 days) +6478 ENCFF642WCG /home/drk/tillage/datasets/human/rna/encode/ENCSR124KOZ/summary/coverage.w5 768 384 0.3 sum_sqrt 6478 RNA:K562 treated with 25 uM Galeterone for 24 hours +6479 ENCFF669XAP /home/drk/tillage/datasets/human/rna/encode/ENCSR125NGM/summary/coverage.w5 768 384 0.3 sum_sqrt 6479 RNA:left renal cortex interstitium tissue male embryo (105 days) +6480 ENCFF962EZE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR128CYL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6481 RNA:Panc1 +6481 ENCFF962EZE- /home/drk/tillage/datasets/human/rna/encode/ENCSR128CYL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6480 RNA:Panc1 +6482 ENCFF310GNU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR129KCJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6483 RNA:lung tissue female adult (30 years) +6483 ENCFF310GNU- /home/drk/tillage/datasets/human/rna/encode/ENCSR129KCJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6482 RNA:lung tissue female adult (30 years) +6484 ENCFF849ELK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR129VBC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6485 RNA:astrocyte +6485 ENCFF849ELK- /home/drk/tillage/datasets/human/rna/encode/ENCSR129VBC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6484 RNA:astrocyte +6486 ENCFF568EVH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR130TZW/summary/coverage+.w5 768 384 0.3 sum_sqrt 6487 RNA:posterior vena cava tissue female adult (47 years) +6487 ENCFF568EVH- /home/drk/tillage/datasets/human/rna/encode/ENCSR130TZW/summary/coverage-.w5 768 384 0.3 sum_sqrt 6486 RNA:posterior vena cava tissue female adult (47 years) +6488 ENCFF198IBF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR132VGJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6489 RNA:Right ventricle myocardium superior tissue male adult (60 years) +6489 ENCFF198IBF- /home/drk/tillage/datasets/human/rna/encode/ENCSR132VGJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6488 RNA:Right ventricle myocardium superior tissue male adult (60 years) +6490 ENCFF565QRM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR135IAL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6491 RNA:right lobe of liver tissue female adult (41 years) +6491 ENCFF565QRM- /home/drk/tillage/datasets/human/rna/encode/ENCSR135IAL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6490 RNA:right lobe of liver tissue female adult (41 years) +6492 ENCFF117HUT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR136WGP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6493 RNA:SK-N-DZ treated with dimethyl sulfoxide for 72 hours +6493 ENCFF117HUT- /home/drk/tillage/datasets/human/rna/encode/ENCSR136WGP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6492 RNA:SK-N-DZ treated with dimethyl sulfoxide for 72 hours +6494 ENCFF105TXB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR140DCD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6495 RNA:ovary tissue female adult (46 years) +6495 ENCFF105TXB- /home/drk/tillage/datasets/human/rna/encode/ENCSR140DCD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6494 RNA:ovary tissue female adult (46 years) +6496 ENCFF688KBA /home/drk/tillage/datasets/human/rna/encode/ENCSR144UVO/summary/coverage.w5 768 384 0.3 sum_sqrt 6496 RNA:muscle of leg tissue male embryo (105 days) +6497 ENCFF477AQU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR146LBD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6498 RNA:vagina tissue female adult (53 years) +6498 ENCFF477AQU- /home/drk/tillage/datasets/human/rna/encode/ENCSR146LBD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6497 RNA:vagina tissue female adult (53 years) +6499 ENCFF561SVV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR146ZKR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6500 RNA:adrenal gland tissue female adult (30 years) +6500 ENCFF561SVV- /home/drk/tillage/datasets/human/rna/encode/ENCSR146ZKR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6499 RNA:adrenal gland tissue female adult (30 years) +6501 ENCFF769QDV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR146ZLV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6502 RNA:ovary tissue female adult (59 years) +6502 ENCFF769QDV- /home/drk/tillage/datasets/human/rna/encode/ENCSR146ZLV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6501 RNA:ovary tissue female adult (59 years) +6503 ENCFF908IDY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR146ZSP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6504 RNA:HFFc6 +6504 ENCFF908IDY- /home/drk/tillage/datasets/human/rna/encode/ENCSR146ZSP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6503 RNA:HFFc6 +6505 ENCFF261GFJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR148SUU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6506 RNA:luminal epithelial cell of mammary gland female adult (22 years) +6506 ENCFF261GFJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR148SUU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6505 RNA:luminal epithelial cell of mammary gland female adult (22 years) +6507 ENCFF509OMC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR149AHS/summary/coverage+.w5 768 384 0.3 sum_sqrt 6508 RNA:posterior vena cava tissue female adult (59 years) +6508 ENCFF509OMC- /home/drk/tillage/datasets/human/rna/encode/ENCSR149AHS/summary/coverage-.w5 768 384 0.3 sum_sqrt 6507 RNA:posterior vena cava tissue female adult (59 years) +6509 ENCFF246RCD /home/drk/tillage/datasets/human/rna/encode/ENCSR150JIX/summary/coverage.w5 768 384 0.3 sum_sqrt 6509 RNA:small intestine tissue female embryo (108 days) +6510 ENCFF359ATD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR150QJY/summary/coverage+.w5 768 384 0.3 sum_sqrt 6511 RNA:subcutaneous adipose tissue tissue female adult (51 years) +6511 ENCFF359ATD- /home/drk/tillage/datasets/human/rna/encode/ENCSR150QJY/summary/coverage-.w5 768 384 0.3 sum_sqrt 6510 RNA:subcutaneous adipose tissue tissue female adult (51 years) +6512 ENCFF535NFF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR151FXS/summary/coverage+.w5 768 384 0.3 sum_sqrt 6513 RNA:CD8-positive, alpha-beta T cell male adult (21 years) +6513 ENCFF535NFF- /home/drk/tillage/datasets/human/rna/encode/ENCSR151FXS/summary/coverage-.w5 768 384 0.3 sum_sqrt 6512 RNA:CD8-positive, alpha-beta T cell male adult (21 years) +6514 ENCFF782HFV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR151NGC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6515 RNA:GM12878 +6515 ENCFF782HFV- /home/drk/tillage/datasets/human/rna/encode/ENCSR151NGC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6514 RNA:GM12878 +6516 ENCFF259EXX /home/drk/tillage/datasets/human/rna/encode/ENCSR152BBF/summary/coverage.w5 768 384 0.3 sum_sqrt 6516 RNA:naive thymus-derived CD8-positive, alpha-beta T cell +6517 ENCFF969VXC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR152FDX/summary/coverage+.w5 768 384 0.3 sum_sqrt 6518 RNA:activated naive CD4-positive, alpha-beta T cell male adult (43 years) +6518 ENCFF969VXC- /home/drk/tillage/datasets/human/rna/encode/ENCSR152FDX/summary/coverage-.w5 768 384 0.3 sum_sqrt 6517 RNA:activated naive CD4-positive, alpha-beta T cell male adult (43 years) +6519 ENCFF296KUU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR154RVC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6520 RNA:H1 and endodermal cell +6520 ENCFF296KUU- /home/drk/tillage/datasets/human/rna/encode/ENCSR154RVC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6519 RNA:H1 and endodermal cell +6521 ENCFF669TRH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR158KFO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6522 RNA:omental fat pad tissue female adult (51 years) +6522 ENCFF669TRH- /home/drk/tillage/datasets/human/rna/encode/ENCSR158KFO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6521 RNA:omental fat pad tissue female adult (51 years) +6523 ENCFF900MLZ /home/drk/tillage/datasets/human/rna/encode/ENCSR158XIJ/summary/coverage.w5 768 384 0.3 sum_sqrt 6523 RNA:thymus tissue female embryo (98 days) +6524 ENCFF664RDQ /home/drk/tillage/datasets/human/rna/encode/ENCSR159UIB/summary/coverage.w5 768 384 0.3 sum_sqrt 6524 RNA:with multiple sclerosis; naive thymus-derived CD8-positive, alpha-beta T cell +6525 ENCFF881ITI /home/drk/tillage/datasets/human/rna/encode/ENCSR160UAZ/summary/coverage.w5 768 384 0.3 sum_sqrt 6525 RNA:left renal pelvis tissue male embryo (105 days) +6526 ENCFF219KLR /home/drk/tillage/datasets/human/rna/encode/ENCSR161RSX/summary/coverage.w5 768 384 0.3 sum_sqrt 6526 RNA:K562 treated with 5 uM MB-3 for 24 hours +6527 ENCFF695GPR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR164OCT/summary/coverage+.w5 768 384 0.3 sum_sqrt 6528 RNA:NCI-H460 +6528 ENCFF695GPR- /home/drk/tillage/datasets/human/rna/encode/ENCSR164OCT/summary/coverage-.w5 768 384 0.3 sum_sqrt 6527 RNA:NCI-H460 +6529 ENCFF179DDD /home/drk/tillage/datasets/human/rna/encode/ENCSR165EQJ/summary/coverage.w5 768 384 0.3 sum_sqrt 6529 RNA:K562 treated with 5 uM MB-3 for 4 hours +6530 ENCFF013IKJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR165QTZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6531 RNA:pancreas tissue female adult (61 years) +6531 ENCFF013IKJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR165QTZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6530 RNA:pancreas tissue female adult (61 years) +6532 ENCFF681MQA /home/drk/tillage/datasets/human/rna/encode/ENCSR166IFS/summary/coverage.w5 768 384 0.3 sum_sqrt 6532 RNA:WTC11 genetically modified (insertion) using TALEN inserting M. musculus Neurog2 +6533 ENCFF686YTX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR166QLP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6534 RNA:HT1080 cytosolic fraction +6534 ENCFF686YTX- /home/drk/tillage/datasets/human/rna/encode/ENCSR166QLP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6533 RNA:HT1080 cytosolic fraction +6535 ENCFF995NEF /home/drk/tillage/datasets/human/rna/encode/ENCSR167NUS/summary/coverage.w5 768 384 0.3 sum_sqrt 6535 RNA:naive thymus-derived CD8-positive, alpha-beta T cell +6536 ENCFF992HAP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR168PXI/summary/coverage+.w5 768 384 0.3 sum_sqrt 6537 RNA:mesothelial cell of epicardium +6537 ENCFF992HAP- /home/drk/tillage/datasets/human/rna/encode/ENCSR168PXI/summary/coverage-.w5 768 384 0.3 sum_sqrt 6536 RNA:mesothelial cell of epicardium +6538 ENCFF154TLD /home/drk/tillage/datasets/human/rna/encode/ENCSR174ESD/summary/coverage.w5 768 384 0.3 sum_sqrt 6538 RNA:muscle of leg tissue male embryo (113 days) +6539 ENCFF199BDG /home/drk/tillage/datasets/human/rna/encode/ENCSR175CNQ/summary/coverage.w5 768 384 0.3 sum_sqrt 6539 RNA:thymus tissue male embryo (97 days) +6540 ENCFF060EGL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR176FKY/summary/coverage+.w5 768 384 0.3 sum_sqrt 6541 RNA:trophoblast tissue male embryo (38 weeks) +6541 ENCFF060EGL- /home/drk/tillage/datasets/human/rna/encode/ENCSR176FKY/summary/coverage-.w5 768 384 0.3 sum_sqrt 6540 RNA:trophoblast tissue male embryo (38 weeks) +6542 ENCFF817FWU /home/drk/tillage/datasets/human/rna/encode/ENCSR176WMG/summary/coverage.w5 768 384 0.3 sum_sqrt 6542 RNA:right lung tissue male embryo (96 days) +6543 ENCFF014YFU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR177XCG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6544 RNA:CD4-positive, alpha-beta memory T cell male adult (43 years) +6544 ENCFF014YFU- /home/drk/tillage/datasets/human/rna/encode/ENCSR177XCG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6543 RNA:CD4-positive, alpha-beta memory T cell male adult (43 years) +6545 ENCFF581ZGH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR181ZGR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6546 RNA:HepG2 +6546 ENCFF581ZGH- /home/drk/tillage/datasets/human/rna/encode/ENCSR181ZGR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6545 RNA:HepG2 +6547 ENCFF741HPN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR182CBU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6548 RNA:esophagus muscularis mucosa tissue male adult (37 years) +6548 ENCFF741HPN- /home/drk/tillage/datasets/human/rna/encode/ENCSR182CBU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6547 RNA:esophagus muscularis mucosa tissue male adult (37 years) +6549 ENCFF837ZJA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR184LTL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6550 RNA:mucosa of descending colon tissue female adult (61 years) +6550 ENCFF837ZJA- /home/drk/tillage/datasets/human/rna/encode/ENCSR184LTL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6549 RNA:mucosa of descending colon tissue female adult (61 years) +6551 ENCFF542TGZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR185TQB/summary/coverage+.w5 768 384 0.3 sum_sqrt 6552 RNA:aorta tissue female adult (41 years) +6552 ENCFF542TGZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR185TQB/summary/coverage-.w5 768 384 0.3 sum_sqrt 6551 RNA:aorta tissue female adult (41 years) +6553 ENCFF046XIT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR192NBO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6554 RNA:fibroblast of breast female adult (17 years) +6554 ENCFF046XIT- /home/drk/tillage/datasets/human/rna/encode/ENCSR192NBO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6553 RNA:fibroblast of breast female adult (17 years) +6555 ENCFF371QSS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR194HVU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6556 RNA:spleen tissue female adult (51 years) +6556 ENCFF371QSS- /home/drk/tillage/datasets/human/rna/encode/ENCSR194HVU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6555 RNA:spleen tissue female adult (51 years) +6557 ENCFF574YPE /home/drk/tillage/datasets/human/rna/encode/ENCSR195JRH/summary/coverage.w5 768 384 0.3 sum_sqrt 6557 RNA:K562 treated with 5 uM JQ1 for 12 hours +6558 ENCFF826YBE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR196ARY/summary/coverage+.w5 768 384 0.3 sum_sqrt 6559 RNA:fibroblast of breast female adult (26 years) +6559 ENCFF826YBE- /home/drk/tillage/datasets/human/rna/encode/ENCSR196ARY/summary/coverage-.w5 768 384 0.3 sum_sqrt 6558 RNA:fibroblast of breast female adult (26 years) +6560 ENCFF424DIT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR198QAJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6561 RNA:middle frontal area 46 tissue female adult (90 or above years) +6561 ENCFF424DIT- /home/drk/tillage/datasets/human/rna/encode/ENCSR198QAJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6560 RNA:middle frontal area 46 tissue female adult (90 or above years) +6562 ENCFF346QDJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR198TKA/summary/coverage+.w5 768 384 0.3 sum_sqrt 6563 RNA:mesangial cell NONE and female embryo (21 weeks) +6563 ENCFF346QDJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR198TKA/summary/coverage-.w5 768 384 0.3 sum_sqrt 6562 RNA:mesangial cell NONE and female embryo (21 weeks) +6564 ENCFF117JSP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR201WVA/summary/coverage+.w5 768 384 0.3 sum_sqrt 6565 RNA:SK-MEL-5 nuclear fraction +6565 ENCFF117JSP- /home/drk/tillage/datasets/human/rna/encode/ENCSR201WVA/summary/coverage-.w5 768 384 0.3 sum_sqrt 6564 RNA:SK-MEL-5 nuclear fraction +6566 ENCFF290PRJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR201XOZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6567 RNA:adrenal gland tissue female child (16 years) +6567 ENCFF290PRJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR201XOZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6566 RNA:adrenal gland tissue female child (16 years) +6568 ENCFF767NPP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR202OWR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6569 RNA:colonic mucosa tissue female adult (41 years) +6569 ENCFF767NPP- /home/drk/tillage/datasets/human/rna/encode/ENCSR202OWR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6568 RNA:colonic mucosa tissue female adult (41 years) +6570 ENCFF693TJD /home/drk/tillage/datasets/human/rna/encode/ENCSR204XBB/summary/coverage.w5 768 384 0.3 sum_sqrt 6570 RNA:renal pelvis tissue female embryo (105 days) +6571 ENCFF117CTU /home/drk/tillage/datasets/human/rna/encode/ENCSR206KFV/summary/coverage.w5 768 384 0.3 sum_sqrt 6571 RNA:K562 treated with 25 uM Galeterone for 4 hours +6572 ENCFF259TIE /home/drk/tillage/datasets/human/rna/encode/ENCSR211CKX/summary/coverage.w5 768 384 0.3 sum_sqrt 6572 RNA:with multiple sclerosis; naive thymus-derived CD8-positive, alpha-beta T cell +6573 ENCFF064TTU /home/drk/tillage/datasets/human/rna/encode/ENCSR212AMA/summary/coverage.w5 768 384 0.3 sum_sqrt 6573 RNA:kidney tissue female embryo (105 days) +6574 ENCFF772RIQ /home/drk/tillage/datasets/human/rna/encode/ENCSR214NBT/summary/coverage.w5 768 384 0.3 sum_sqrt 6574 RNA:CD4-positive, CD25-positive, alpha-beta regulatory T cell +6575 ENCFF648RBX /home/drk/tillage/datasets/human/rna/encode/ENCSR222IGR/summary/coverage.w5 768 384 0.3 sum_sqrt 6575 RNA:left lung tissue male embryo (96 days) +6576 ENCFF057IIF /home/drk/tillage/datasets/human/rna/encode/ENCSR223DWL/summary/coverage.w5 768 384 0.3 sum_sqrt 6576 RNA:K562 treated with 10 nM Bortezomib for 48 hours +6577 ENCFF470OCI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR224HFZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6578 RNA:myoepithelial cell of mammary gland female adult (22 years) +6578 ENCFF470OCI- /home/drk/tillage/datasets/human/rna/encode/ENCSR224HFZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6577 RNA:myoepithelial cell of mammary gland female adult (22 years) +6579 ENCFF103EZF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR226KML/summary/coverage+.w5 768 384 0.3 sum_sqrt 6580 RNA:right lobe of liver tissue female adult (53 years) +6580 ENCFF103EZF- /home/drk/tillage/datasets/human/rna/encode/ENCSR226KML/summary/coverage-.w5 768 384 0.3 sum_sqrt 6579 RNA:right lobe of liver tissue female adult (53 years) +6581 ENCFF367DUO /home/drk/tillage/datasets/human/rna/encode/ENCSR229JRA/summary/coverage.w5 768 384 0.3 sum_sqrt 6581 RNA:thymus tissue female embryo (113 days) +6582 ENCFF128YXB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR229LFK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6583 RNA:right lobe of liver tissue female child (16 years) +6583 ENCFF128YXB- /home/drk/tillage/datasets/human/rna/encode/ENCSR229LFK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6582 RNA:right lobe of liver tissue female child (16 years) +6584 ENCFF401TVK /home/drk/tillage/datasets/human/rna/encode/ENCSR230KVL/summary/coverage.w5 768 384 0.3 sum_sqrt 6584 RNA:naive thymus-derived CD8-positive, alpha-beta T cell +6585 ENCFF155PJH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR233IJT/summary/coverage+.w5 768 384 0.3 sum_sqrt 6586 RNA:astrocyte +6586 ENCFF155PJH- /home/drk/tillage/datasets/human/rna/encode/ENCSR233IJT/summary/coverage-.w5 768 384 0.3 sum_sqrt 6585 RNA:astrocyte +6587 ENCFF625DRX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR236OON/summary/coverage+.w5 768 384 0.3 sum_sqrt 6588 RNA:adipose tissue tissue female adult (63 years) +6588 ENCFF625DRX- /home/drk/tillage/datasets/human/rna/encode/ENCSR236OON/summary/coverage-.w5 768 384 0.3 sum_sqrt 6587 RNA:adipose tissue tissue female adult (63 years) +6589 ENCFF312VKB /home/drk/tillage/datasets/human/rna/encode/ENCSR236URT/summary/coverage.w5 768 384 0.3 sum_sqrt 6589 RNA:smooth muscle cell originated from H9 +6590 ENCFF837FLM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR238ZZD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6591 RNA:thyroid gland tissue female adult (53 years) +6591 ENCFF837FLM- /home/drk/tillage/datasets/human/rna/encode/ENCSR238ZZD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6590 RNA:thyroid gland tissue female adult (53 years) +6592 ENCFF641YCY /home/drk/tillage/datasets/human/rna/encode/ENCSR239BBI/summary/coverage.w5 768 384 0.3 sum_sqrt 6592 RNA:muscle of back tissue female embryo (85 days) +6593 ENCFF008UXK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR240JQW/summary/coverage+.w5 768 384 0.3 sum_sqrt 6594 RNA:activated CD8-positive, alpha-beta memory T cell male adult (30 years) +6594 ENCFF008UXK- /home/drk/tillage/datasets/human/rna/encode/ENCSR240JQW/summary/coverage-.w5 768 384 0.3 sum_sqrt 6593 RNA:activated CD8-positive, alpha-beta memory T cell male adult (30 years) +6595 ENCFF503NGO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR244HHV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6596 RNA:placenta tissue male embryo +6596 ENCFF503NGO- /home/drk/tillage/datasets/human/rna/encode/ENCSR244HHV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6595 RNA:placenta tissue male embryo +6597 ENCFF403DKN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR244ISQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6598 RNA:neural progenitor cell originated from H9 +6598 ENCFF403DKN- /home/drk/tillage/datasets/human/rna/encode/ENCSR244ISQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6597 RNA:neural progenitor cell originated from H9 +6599 ENCFF908VIH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR245ATJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6600 RNA:HepG2 +6600 ENCFF908VIH- /home/drk/tillage/datasets/human/rna/encode/ENCSR245ATJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6599 RNA:HepG2 +6601 ENCFF381OQK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR249CKG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6602 RNA:chorion tissue embryo (16 weeks) +6602 ENCFF381OQK- /home/drk/tillage/datasets/human/rna/encode/ENCSR249CKG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6601 RNA:chorion tissue embryo (16 weeks) +6603 ENCFF645AYP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR252UHW/summary/coverage+.w5 768 384 0.3 sum_sqrt 6604 RNA:heart right ventricle tissue female adult (46 years) +6604 ENCFF645AYP- /home/drk/tillage/datasets/human/rna/encode/ENCSR252UHW/summary/coverage-.w5 768 384 0.3 sum_sqrt 6603 RNA:heart right ventricle tissue female adult (46 years) +6605 ENCFF196LHX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR254JJM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6606 RNA:Daoy +6606 ENCFF196LHX- /home/drk/tillage/datasets/human/rna/encode/ENCSR254JJM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6605 RNA:Daoy +6607 ENCFF770TOE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR255NYQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6608 RNA:SK-N-DZ nuclear fraction +6608 ENCFF770TOE- /home/drk/tillage/datasets/human/rna/encode/ENCSR255NYQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6607 RNA:SK-N-DZ nuclear fraction +6609 ENCFF615IJV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR256YHI/summary/coverage+.w5 768 384 0.3 sum_sqrt 6610 RNA:neurosphere embryo (15 weeks) originated from cortex +6610 ENCFF615IJV- /home/drk/tillage/datasets/human/rna/encode/ENCSR256YHI/summary/coverage-.w5 768 384 0.3 sum_sqrt 6609 RNA:neurosphere embryo (15 weeks) originated from cortex +6611 ENCFF666KUJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR257NIR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6612 RNA:Peyer's patch tissue male adult (54 years) +6612 ENCFF666KUJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR257NIR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6611 RNA:Peyer's patch tissue male adult (54 years) +6613 ENCFF239BUM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR258ELN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6614 RNA:spleen tissue female adult (59 years) +6614 ENCFF239BUM- /home/drk/tillage/datasets/human/rna/encode/ENCSR258ELN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6613 RNA:spleen tissue female adult (59 years) +6615 ENCFF744TRY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR261ISO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6616 RNA:foreskin fibroblast male newborn +6616 ENCFF744TRY- /home/drk/tillage/datasets/human/rna/encode/ENCSR261ISO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6615 RNA:foreskin fibroblast male newborn +6617 ENCFF583FIC /home/drk/tillage/datasets/human/rna/encode/ENCSR264IXQ/summary/coverage.w5 768 384 0.3 sum_sqrt 6617 RNA:K562 treated with 10 uM AR-42 for 4 hours +6618 ENCFF946ZPT /home/drk/tillage/datasets/human/rna/encode/ENCSR264LON/summary/coverage.w5 768 384 0.3 sum_sqrt 6618 RNA:with multiple sclerosis; CD14-positive monocyte +6619 ENCFF393ZMQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR264VJN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6620 RNA:activated CD8-positive, alpha-beta T cell male adult (21 years) treated with anti-CD3 and anti-CD28 coated beads +6620 ENCFF393ZMQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR264VJN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6619 RNA:activated CD8-positive, alpha-beta T cell male adult (21 years) treated with anti-CD3 and anti-CD28 coated beads +6621 ENCFF315BLU /home/drk/tillage/datasets/human/rna/encode/ENCSR265NZF/summary/coverage.w5 768 384 0.3 sum_sqrt 6621 RNA:spleen tissue embryo (112 days) +6622 ENCFF313KNT /home/drk/tillage/datasets/human/rna/encode/ENCSR266LHQ/summary/coverage.w5 768 384 0.3 sum_sqrt 6622 RNA:CD4-positive, alpha-beta memory T cell +6623 ENCFF474WFJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR266PVZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6624 RNA:right cardiac atrium tissue female adult (46 years) +6624 ENCFF474WFJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR266PVZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6623 RNA:right cardiac atrium tissue female adult (46 years) +6625 ENCFF670MHR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR266XAJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6626 RNA:endodermal cell +6626 ENCFF670MHR- /home/drk/tillage/datasets/human/rna/encode/ENCSR266XAJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6625 RNA:endodermal cell +6627 ENCFF864IDH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR270OKS/summary/coverage+.w5 768 384 0.3 sum_sqrt 6628 RNA:sigmoid colon tissue male adult (21 years) +6628 ENCFF864IDH- /home/drk/tillage/datasets/human/rna/encode/ENCSR270OKS/summary/coverage-.w5 768 384 0.3 sum_sqrt 6627 RNA:sigmoid colon tissue male adult (21 years) +6629 ENCFF506SFB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR270XRV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6630 RNA:keratinocyte female +6630 ENCFF506SFB- /home/drk/tillage/datasets/human/rna/encode/ENCSR270XRV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6629 RNA:keratinocyte female +6631 ENCFF533JTF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR271DJJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6632 RNA:endocrine pancreas tissue adult (59 years) +6632 ENCFF533JTF- /home/drk/tillage/datasets/human/rna/encode/ENCSR271DJJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6631 RNA:endocrine pancreas tissue adult (59 years) +6633 ENCFF281UGH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR272UNO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6634 RNA:tibial nerve tissue female adult (51 years) +6634 ENCFF281UGH- /home/drk/tillage/datasets/human/rna/encode/ENCSR272UNO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6633 RNA:tibial nerve tissue female adult (51 years) +6635 ENCFF637ZBG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR274JRR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6636 RNA:brain tissue female adult (66 years) +6636 ENCFF637ZBG- /home/drk/tillage/datasets/human/rna/encode/ENCSR274JRR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6635 RNA:brain tissue female adult (66 years) +6637 ENCFF587USA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR275JSL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6638 RNA:with Alzheimer's disease, Cognitive impairment; middle frontal area 46 tissue male adult (73 years) +6638 ENCFF587USA- /home/drk/tillage/datasets/human/rna/encode/ENCSR275JSL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6637 RNA:with Alzheimer's disease, Cognitive impairment; middle frontal area 46 tissue male adult (73 years) +6639 ENCFF392DAR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR275ZLF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6640 RNA:mesenchymal stem cell originated from H1 +6640 ENCFF392DAR- /home/drk/tillage/datasets/human/rna/encode/ENCSR275ZLF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6639 RNA:mesenchymal stem cell originated from H1 +6641 ENCFF037OMX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR276MMH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6642 RNA:adrenal gland tissue male adult (37 years) +6642 ENCFF037OMX- /home/drk/tillage/datasets/human/rna/encode/ENCSR276MMH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6641 RNA:adrenal gland tissue male adult (37 years) +6643 ENCFF565FPI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR278UYN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6644 RNA:lung tissue male child (3 years) +6644 ENCFF565FPI- /home/drk/tillage/datasets/human/rna/encode/ENCSR278UYN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6643 RNA:lung tissue male child (3 years) +6645 ENCFF496QEP /home/drk/tillage/datasets/human/rna/encode/ENCSR279VNR/summary/coverage.w5 768 384 0.3 sum_sqrt 6645 RNA:CD8-positive, alpha-beta memory T cell +6646 ENCFF547YEW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR282GZU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6647 RNA:stimulated activated CD8-positive, alpha-beta T cell male adult (21 years) treated with 10 ng/mL Interleukin-2, anti-CD3 and anti-CD28 coated beads +6647 ENCFF547YEW- /home/drk/tillage/datasets/human/rna/encode/ENCSR282GZU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6646 RNA:stimulated activated CD8-positive, alpha-beta T cell male adult (21 years) treated with 10 ng/mL Interleukin-2, anti-CD3 and anti-CD28 coated beads +6648 ENCFF295JCA /home/drk/tillage/datasets/human/rna/encode/ENCSR286KWP/summary/coverage.w5 768 384 0.3 sum_sqrt 6648 RNA:large intestine tissue male embryo (115 days) +6649 ENCFF719RAL /home/drk/tillage/datasets/human/rna/encode/ENCSR287DHQ/summary/coverage.w5 768 384 0.3 sum_sqrt 6649 RNA:K562 treated with 10 nM Bortezomib for 12 hours +6650 ENCFF189MNY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR288RRZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6651 RNA:placenta tissue male embryo +6651 ENCFF189MNY- /home/drk/tillage/datasets/human/rna/encode/ENCSR288RRZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6650 RNA:placenta tissue male embryo +6652 ENCFF634JRF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR290IHM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6653 RNA:middle frontal area 46 tissue female adult (79 years) +6653 ENCFF634JRF- /home/drk/tillage/datasets/human/rna/encode/ENCSR290IHM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6652 RNA:middle frontal area 46 tissue female adult (79 years) +6654 ENCFF960QLH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR291DJH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6655 RNA:SK-MEL-5 cytosolic fraction +6655 ENCFF960QLH- /home/drk/tillage/datasets/human/rna/encode/ENCSR291DJH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6654 RNA:SK-MEL-5 cytosolic fraction +6656 ENCFF185NBE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR291TRJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6657 RNA:endodermal cell +6657 ENCFF185NBE- /home/drk/tillage/datasets/human/rna/encode/ENCSR291TRJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6656 RNA:endodermal cell +6658 ENCFF986WTF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR292TAP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6659 RNA:neural cell originated from H1 +6659 ENCFF986WTF- /home/drk/tillage/datasets/human/rna/encode/ENCSR292TAP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6658 RNA:neural cell originated from H1 +6660 ENCFF030JXK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR292TYT/summary/coverage+.w5 768 384 0.3 sum_sqrt 6661 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (90 or above years) +6661 ENCFF030JXK- /home/drk/tillage/datasets/human/rna/encode/ENCSR292TYT/summary/coverage-.w5 768 384 0.3 sum_sqrt 6660 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (90 or above years) +6662 ENCFF713REM /home/drk/tillage/datasets/human/rna/encode/ENCSR294NDO/summary/coverage.w5 768 384 0.3 sum_sqrt 6662 RNA:cardiac muscle cell originated from RUES2 +6663 ENCFF151AEP /home/drk/tillage/datasets/human/rna/encode/ENCSR296LJV/summary/coverage.w5 768 384 0.3 sum_sqrt 6663 RNA:CD4-positive, alpha-beta memory T cell +6664 ENCFF145OAS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR296PMS/summary/coverage+.w5 768 384 0.3 sum_sqrt 6665 RNA:stomach tissue male adult (54 years) +6665 ENCFF145OAS- /home/drk/tillage/datasets/human/rna/encode/ENCSR296PMS/summary/coverage-.w5 768 384 0.3 sum_sqrt 6664 RNA:stomach tissue male adult (54 years) +6666 ENCFF430DVF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR297AZN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6667 RNA:CD4-positive, alpha-beta memory T cell male adult (43 years) +6667 ENCFF430DVF- /home/drk/tillage/datasets/human/rna/encode/ENCSR297AZN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6666 RNA:CD4-positive, alpha-beta memory T cell male adult (43 years) +6668 ENCFF047KOP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR297UBP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6669 RNA:GM12878 +6669 ENCFF047KOP- /home/drk/tillage/datasets/human/rna/encode/ENCSR297UBP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6668 RNA:GM12878 +6670 ENCFF557CLI /home/drk/tillage/datasets/human/rna/encode/ENCSR303IRE/summary/coverage.w5 768 384 0.3 sum_sqrt 6670 RNA:K562 treated with 7.5 nM Panobinostat for 48 hours +6671 ENCFF119XJG /home/drk/tillage/datasets/human/rna/encode/ENCSR305NXN/summary/coverage.w5 768 384 0.3 sum_sqrt 6671 RNA:muscle of arm tissue male embryo (127 days) +6672 ENCFF624DSV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR306IAW/summary/coverage+.w5 768 384 0.3 sum_sqrt 6673 RNA:T-cell male adult (42 years) +6673 ENCFF624DSV- /home/drk/tillage/datasets/human/rna/encode/ENCSR306IAW/summary/coverage-.w5 768 384 0.3 sum_sqrt 6672 RNA:T-cell male adult (42 years) +6674 ENCFF560YUT /home/drk/tillage/datasets/human/rna/encode/ENCSR307FAG/summary/coverage.w5 768 384 0.3 sum_sqrt 6674 RNA:naive B cell +6675 ENCFF597XJZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR308XAR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6676 RNA:placenta tissue male embryo +6676 ENCFF597XJZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR308XAR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6675 RNA:placenta tissue male embryo +6677 ENCFF138GLV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR313COD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6678 RNA:upper lobe of left lung tissue male adult (37 years) +6678 ENCFF138GLV- /home/drk/tillage/datasets/human/rna/encode/ENCSR313COD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6677 RNA:upper lobe of left lung tissue male adult (37 years) +6679 ENCFF816BKU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR313TJR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6680 RNA:peripheral blood mononuclear cell female adult (28 years) +6680 ENCFF816BKU- /home/drk/tillage/datasets/human/rna/encode/ENCSR313TJR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6679 RNA:peripheral blood mononuclear cell female adult (28 years) +6681 ENCFF505JCK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR314LXG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6682 RNA:Karpas-422 +6682 ENCFF505JCK- /home/drk/tillage/datasets/human/rna/encode/ENCSR314LXG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6681 RNA:Karpas-422 +6683 ENCFF359ITR /home/drk/tillage/datasets/human/rna/encode/ENCSR317LMH/summary/coverage.w5 768 384 0.3 sum_sqrt 6683 RNA:muscle of arm tissue female embryo (98 days) +6684 ENCFF629XGV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR320BRR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6685 RNA:RPMI7951 +6685 ENCFF629XGV- /home/drk/tillage/datasets/human/rna/encode/ENCSR320BRR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6684 RNA:RPMI7951 +6686 ENCFF649MKX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR320OTJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6687 RNA:ovary tissue female adult (41 years) +6687 ENCFF649MKX- /home/drk/tillage/datasets/human/rna/encode/ENCSR320OTJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6686 RNA:ovary tissue female adult (41 years) +6688 ENCFF596HNR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR321PGV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6689 RNA:lower leg skin tissue male adult (37 years) +6689 ENCFF596HNR- /home/drk/tillage/datasets/human/rna/encode/ENCSR321PGV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6688 RNA:lower leg skin tissue male adult (37 years) +6690 ENCFF990MDC /home/drk/tillage/datasets/human/rna/encode/ENCSR321ROU/summary/coverage.w5 768 384 0.3 sum_sqrt 6690 RNA:left renal pelvis tissue male embryo (105 days) +6691 ENCFF119SVC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR323GUF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6692 RNA:right lobe of liver tissue female adult (47 years) +6692 ENCFF119SVC- /home/drk/tillage/datasets/human/rna/encode/ENCSR323GUF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6691 RNA:right lobe of liver tissue female adult (47 years) +6693 ENCFF206GMC /home/drk/tillage/datasets/human/rna/encode/ENCSR325BJP/summary/coverage.w5 768 384 0.3 sum_sqrt 6693 RNA:K562 treated with 25 uM Galeterone for 48 hours +6694 ENCFF724LSN /home/drk/tillage/datasets/human/rna/encode/ENCSR328PVI/summary/coverage.w5 768 384 0.3 sum_sqrt 6694 RNA:renal cortex interstitium tissue male embryo (91 days) +6695 ENCFF801PRP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR329MHM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6696 RNA:HepG2 +6696 ENCFF801PRP- /home/drk/tillage/datasets/human/rna/encode/ENCSR329MHM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6695 RNA:HepG2 +6697 ENCFF575YIP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR329ZRF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6698 RNA:trophoblast tissue female embryo (40 weeks) +6698 ENCFF575YIP- /home/drk/tillage/datasets/human/rna/encode/ENCSR329ZRF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6697 RNA:trophoblast tissue female embryo (40 weeks) +6699 ENCFF643UKE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR330UMQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6700 RNA:spleen tissue male adult (37 years) +6700 ENCFF643UKE- /home/drk/tillage/datasets/human/rna/encode/ENCSR330UMQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6699 RNA:spleen tissue male adult (37 years) +6701 ENCFF197PAV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR332DBS/summary/coverage+.w5 768 384 0.3 sum_sqrt 6702 RNA:LHCN-M2 +6702 ENCFF197PAV- /home/drk/tillage/datasets/human/rna/encode/ENCSR332DBS/summary/coverage-.w5 768 384 0.3 sum_sqrt 6701 RNA:LHCN-M2 +6703 ENCFF956BZU /home/drk/tillage/datasets/human/rna/encode/ENCSR332MTG/summary/coverage.w5 768 384 0.3 sum_sqrt 6703 RNA:muscle of arm tissue male embryo (105 days) +6704 ENCFF080VEH /home/drk/tillage/datasets/human/rna/encode/ENCSR333FZW/summary/coverage.w5 768 384 0.3 sum_sqrt 6704 RNA:spinal cord tissue male embryo (105 days) +6705 ENCFF319AAP /home/drk/tillage/datasets/human/rna/encode/ENCSR335GET/summary/coverage.w5 768 384 0.3 sum_sqrt 6705 RNA:adrenal gland tissue male embryo (101 days) +6706 ENCFF994HKO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR336VTK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6707 RNA:T-cell female adult (33 years) +6707 ENCFF994HKO- /home/drk/tillage/datasets/human/rna/encode/ENCSR336VTK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6706 RNA:T-cell female adult (33 years) +6708 ENCFF702CEO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR338FSZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6709 RNA:chorion tissue male embryo (38 weeks) +6709 ENCFF702CEO- /home/drk/tillage/datasets/human/rna/encode/ENCSR338FSZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6708 RNA:chorion tissue male embryo (38 weeks) +6710 ENCFF010PER /home/drk/tillage/datasets/human/rna/encode/ENCSR338WIW/summary/coverage.w5 768 384 0.3 sum_sqrt 6710 RNA:CD8-positive, alpha-beta memory T cell +6711 ENCFF629NUI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR339NMQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6712 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (89 years) +6712 ENCFF629NUI- /home/drk/tillage/datasets/human/rna/encode/ENCSR339NMQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6711 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (89 years) +6713 ENCFF893RLE /home/drk/tillage/datasets/human/rna/encode/ENCSR340QZY/summary/coverage.w5 768 384 0.3 sum_sqrt 6713 RNA:K562 treated with 10 uM AR-42 for 12 hours +6714 ENCFF905ARA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR343XXH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6715 RNA:mucosa of gallbladder tissue female child (16 years) +6715 ENCFF905ARA- /home/drk/tillage/datasets/human/rna/encode/ENCSR343XXH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6714 RNA:mucosa of gallbladder tissue female child (16 years) +6716 ENCFF333KHL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR344MQK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6717 RNA:testis tissue male adult (54 years) +6717 ENCFF333KHL- /home/drk/tillage/datasets/human/rna/encode/ENCSR344MQK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6716 RNA:testis tissue male adult (54 years) +6718 ENCFF022QBF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR348EFG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6719 RNA:HUES64 +6719 ENCFF022QBF- /home/drk/tillage/datasets/human/rna/encode/ENCSR348EFG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6718 RNA:HUES64 +6720 ENCFF166OEV /home/drk/tillage/datasets/human/rna/encode/ENCSR348YVF/summary/coverage.w5 768 384 0.3 sum_sqrt 6720 RNA:IgD-negative memory B cell +6721 ENCFF766JPS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR351OTL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6722 RNA:esophagus squamous epithelium tissue female adult (53 years) +6722 ENCFF766JPS- /home/drk/tillage/datasets/human/rna/encode/ENCSR351OTL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6721 RNA:esophagus squamous epithelium tissue female adult (53 years) +6723 ENCFF679YQA /home/drk/tillage/datasets/human/rna/encode/ENCSR352GCS/summary/coverage.w5 768 384 0.3 sum_sqrt 6723 RNA:right renal pelvis tissue male embryo (105 days) +6724 ENCFF995AUL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR352JCY/summary/coverage+.w5 768 384 0.3 sum_sqrt 6725 RNA:type B pancreatic cell +6725 ENCFF995AUL- /home/drk/tillage/datasets/human/rna/encode/ENCSR352JCY/summary/coverage-.w5 768 384 0.3 sum_sqrt 6724 RNA:type B pancreatic cell +6726 ENCFF187QYU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR354QPN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6727 RNA:esophagus squamous epithelium tissue male adult (37 years) +6727 ENCFF187QYU- /home/drk/tillage/datasets/human/rna/encode/ENCSR354QPN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6726 RNA:esophagus squamous epithelium tissue male adult (37 years) +6728 ENCFF301XEH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR355JZC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6729 RNA:MCF-7 +6729 ENCFF301XEH- /home/drk/tillage/datasets/human/rna/encode/ENCSR355JZC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6728 RNA:MCF-7 +6730 ENCFF253SBE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR357BYU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6731 RNA:left lobe of liver tissue male adult (45 years) +6731 ENCFF253SBE- /home/drk/tillage/datasets/human/rna/encode/ENCSR357BYU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6730 RNA:left lobe of liver tissue male adult (45 years) +6732 ENCFF410ISM /home/drk/tillage/datasets/human/rna/encode/ENCSR357XTU/summary/coverage.w5 768 384 0.3 sum_sqrt 6732 RNA:natural killer cell male adult (37 years) +6733 ENCFF032EUU /home/drk/tillage/datasets/human/rna/encode/ENCSR361DRG/summary/coverage.w5 768 384 0.3 sum_sqrt 6733 RNA:fibroblast of skin of abdomen male embryo (97 days) +6734 ENCFF462KTY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR362HMX/summary/coverage+.w5 768 384 0.3 sum_sqrt 6735 RNA:pericardium fibroblast NONE and female embryo (20 weeks) +6735 ENCFF462KTY- /home/drk/tillage/datasets/human/rna/encode/ENCSR362HMX/summary/coverage-.w5 768 384 0.3 sum_sqrt 6734 RNA:pericardium fibroblast NONE and female embryo (20 weeks) +6736 ENCFF932VPS /home/drk/tillage/datasets/human/rna/encode/ENCSR363BVC/summary/coverage.w5 768 384 0.3 sum_sqrt 6736 RNA:large intestine tissue female embryo (105 days) +6737 ENCFF228OVL /home/drk/tillage/datasets/human/rna/encode/ENCSR363BVN/summary/coverage.w5 768 384 0.3 sum_sqrt 6737 RNA:naive B cell +6738 ENCFF144GMK /home/drk/tillage/datasets/human/rna/encode/ENCSR364IBB/summary/coverage.w5 768 384 0.3 sum_sqrt 6738 RNA:muscle of arm tissue female embryo (120 days) +6739 ENCFF987VHA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR365ARV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6740 RNA:middle frontal area 46 tissue female adult (90 or above years) +6740 ENCFF987VHA- /home/drk/tillage/datasets/human/rna/encode/ENCSR365ARV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6739 RNA:middle frontal area 46 tissue female adult (90 or above years) +6741 ENCFF759YTG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR366LFQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6742 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (88 years) +6742 ENCFF759YTG- /home/drk/tillage/datasets/human/rna/encode/ENCSR366LFQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6741 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (88 years) +6743 ENCFF482YAE /home/drk/tillage/datasets/human/rna/encode/ENCSR367QHR/summary/coverage.w5 768 384 0.3 sum_sqrt 6743 RNA:thymus tissue male embryo (108 days) +6744 ENCFF119GRF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR368HRJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6745 RNA:ovary tissue female adult (61 years) +6745 ENCFF119GRF- /home/drk/tillage/datasets/human/rna/encode/ENCSR368HRJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6744 RNA:ovary tissue female adult (61 years) +6746 ENCFF856KPS /home/drk/tillage/datasets/human/rna/encode/ENCSR369MDF/summary/coverage.w5 768 384 0.3 sum_sqrt 6746 RNA:K562 treated with 5 uM JQ1 for 4 hours +6747 ENCFF287LRZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR369RVN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6748 RNA:cardiac ventricle fibroblast NONE and male adult (18 years) +6748 ENCFF287LRZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR369RVN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6747 RNA:cardiac ventricle fibroblast NONE and male adult (18 years) +6749 ENCFF610DVT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR371VGV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6750 RNA:myometrial cell female adult (34 years) +6750 ENCFF610DVT- /home/drk/tillage/datasets/human/rna/encode/ENCSR371VGV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6749 RNA:myometrial cell female adult (34 years) +6751 ENCFF635XGR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR373BDG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6752 RNA:kidney epithelial cell male embryo (22 weeks) and male newborn +6752 ENCFF635XGR- /home/drk/tillage/datasets/human/rna/encode/ENCSR373BDG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6751 RNA:kidney epithelial cell male embryo (22 weeks) and male newborn +6753 ENCFF197PEN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR377FPC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6754 RNA:aorta tissue female adult (59 years) +6754 ENCFF197PEN- /home/drk/tillage/datasets/human/rna/encode/ENCSR377FPC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6753 RNA:aorta tissue female adult (59 years) +6755 ENCFF368ZGL /home/drk/tillage/datasets/human/rna/encode/ENCSR377HQQ/summary/coverage.w5 768 384 0.3 sum_sqrt 6755 RNA:with multiple sclerosis; CD4-positive, alpha-beta memory T cell +6756 ENCFF290WID /home/drk/tillage/datasets/human/rna/encode/ENCSR379BAF/summary/coverage.w5 768 384 0.3 sum_sqrt 6756 RNA:K562 treated with 5 uM MB-3 for 48 hours +6757 ENCFF527SQU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR379DEC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6758 RNA:GM23338 +6758 ENCFF527SQU- /home/drk/tillage/datasets/human/rna/encode/ENCSR379DEC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6757 RNA:GM23338 +6759 ENCFF205VDK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR379YAE/summary/coverage+.w5 768 384 0.3 sum_sqrt 6760 RNA:cardiac muscle cell originated from RUES2 +6760 ENCFF205VDK- /home/drk/tillage/datasets/human/rna/encode/ENCSR379YAE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6759 RNA:cardiac muscle cell originated from RUES2 +6761 ENCFF358XLV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR381OTM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6762 RNA:HFFc6 +6762 ENCFF358XLV- /home/drk/tillage/datasets/human/rna/encode/ENCSR381OTM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6761 RNA:HFFc6 +6763 ENCFF081XYH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR382XJF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6764 RNA:ectodermal cell originated from HUES64 +6764 ENCFF081XYH- /home/drk/tillage/datasets/human/rna/encode/ENCSR382XJF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6763 RNA:ectodermal cell originated from HUES64 +6765 ENCFF853SNW /home/drk/tillage/datasets/human/rna/encode/ENCSR384HOK/summary/coverage.w5 768 384 0.3 sum_sqrt 6765 RNA:CD14-positive monocyte +6766 ENCFF136ZVX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR384ZXD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6767 RNA:K562 cytosolic fraction +6767 ENCFF136ZVX- /home/drk/tillage/datasets/human/rna/encode/ENCSR384ZXD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6766 RNA:K562 cytosolic fraction +6768 ENCFF362PNM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR385KVQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6769 RNA:activated CD4-positive, alpha-beta T cell male adult (20 years) treated with anti-CD3 and anti-CD28 coated beads +6769 ENCFF362PNM- /home/drk/tillage/datasets/human/rna/encode/ENCSR385KVQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6768 RNA:activated CD4-positive, alpha-beta T cell male adult (20 years) treated with anti-CD3 and anti-CD28 coated beads +6770 ENCFF167XOI /home/drk/tillage/datasets/human/rna/encode/ENCSR388ZNJ/summary/coverage.w5 768 384 0.3 sum_sqrt 6770 RNA:liver tissue female child (4 years) and with nonobstructive coronary artery disease; liver tissue male adult (32 years) +6771 ENCFF248QVA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR391VGU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6772 RNA:heart left ventricle tissue female adult (53 years) +6772 ENCFF248QVA- /home/drk/tillage/datasets/human/rna/encode/ENCSR391VGU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6771 RNA:heart left ventricle tissue female adult (53 years) +6773 ENCFF733YEJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR394ZSF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6774 RNA:middle frontal area 46 tissue male adult (87 years) +6774 ENCFF733YEJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR394ZSF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6773 RNA:middle frontal area 46 tissue male adult (87 years) +6775 ENCFF939BQK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR395DKP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6776 RNA:middle frontal area 46 tissue male adult (87 years) +6776 ENCFF939BQK- /home/drk/tillage/datasets/human/rna/encode/ENCSR395DKP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6775 RNA:middle frontal area 46 tissue male adult (87 years) +6777 ENCFF123PSX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR396GIH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6778 RNA:sigmoid colon tissue male child (3 years) +6778 ENCFF123PSX- /home/drk/tillage/datasets/human/rna/encode/ENCSR396GIH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6777 RNA:sigmoid colon tissue male child (3 years) +6779 ENCFF367NBV /home/drk/tillage/datasets/human/rna/encode/ENCSR400DJE/summary/coverage.w5 768 384 0.3 sum_sqrt 6779 RNA:right renal cortex interstitium tissue male embryo (105 days) +6780 ENCFF330EUN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR401DHH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6781 RNA:activated CD4-positive, alpha-beta memory T cell male adult (43 years) +6781 ENCFF330EUN- /home/drk/tillage/datasets/human/rna/encode/ENCSR401DHH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6780 RNA:activated CD4-positive, alpha-beta memory T cell male adult (43 years) +6782 ENCFF807KQZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR403SZN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6783 RNA:transverse colon tissue female adult (51 years) +6783 ENCFF807KQZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR403SZN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6782 RNA:transverse colon tissue female adult (51 years) +6784 ENCFF493OET+ /home/drk/tillage/datasets/human/rna/encode/ENCSR406SAW/summary/coverage+.w5 768 384 0.3 sum_sqrt 6785 RNA:upper lobe of left lung tissue female adult (53 years) +6785 ENCFF493OET- /home/drk/tillage/datasets/human/rna/encode/ENCSR406SAW/summary/coverage-.w5 768 384 0.3 sum_sqrt 6784 RNA:upper lobe of left lung tissue female adult (53 years) +6786 ENCFF309YCI /home/drk/tillage/datasets/human/rna/encode/ENCSR406YML/summary/coverage.w5 768 384 0.3 sum_sqrt 6786 RNA:muscle of arm tissue male embryo (91 days) +6787 ENCFF407ZDM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR409JSP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6788 RNA:peripheral blood mononuclear cell male adult (39 years) +6788 ENCFF407ZDM- /home/drk/tillage/datasets/human/rna/encode/ENCSR409JSP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6787 RNA:peripheral blood mononuclear cell male adult (39 years) +6789 ENCFF251CIC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR409UYW/summary/coverage+.w5 768 384 0.3 sum_sqrt 6790 RNA:activated naive CD8-positive, alpha-beta T cell male adult (30 years) +6790 ENCFF251CIC- /home/drk/tillage/datasets/human/rna/encode/ENCSR409UYW/summary/coverage-.w5 768 384 0.3 sum_sqrt 6789 RNA:activated naive CD8-positive, alpha-beta T cell male adult (30 years) +6791 ENCFF563MLL /home/drk/tillage/datasets/human/rna/encode/ENCSR410DUZ/summary/coverage.w5 768 384 0.3 sum_sqrt 6791 RNA:left renal pelvis tissue male embryo (120 days) +6792 ENCFF808KRP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR410MSS/summary/coverage+.w5 768 384 0.3 sum_sqrt 6793 RNA:left lung tissue female child (16 years) +6793 ENCFF808KRP- /home/drk/tillage/datasets/human/rna/encode/ENCSR410MSS/summary/coverage-.w5 768 384 0.3 sum_sqrt 6792 RNA:left lung tissue female child (16 years) +6794 ENCFF515TIF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR411MUF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6795 RNA:CD4-positive, alpha-beta T cell male adult (20 years) +6795 ENCFF515TIF- /home/drk/tillage/datasets/human/rna/encode/ENCSR411MUF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6794 RNA:CD4-positive, alpha-beta T cell male adult (20 years) +6796 ENCFF251HQX /home/drk/tillage/datasets/human/rna/encode/ENCSR413LXW/summary/coverage.w5 768 384 0.3 sum_sqrt 6796 RNA:left renal cortex interstitium tissue male embryo (105 days) +6797 ENCFF092QIW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR413QAL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6798 RNA:osteocyte +6798 ENCFF092QIW- /home/drk/tillage/datasets/human/rna/encode/ENCSR413QAL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6797 RNA:osteocyte +6799 ENCFF474MDU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR415SXI/summary/coverage+.w5 768 384 0.3 sum_sqrt 6800 RNA:activated CD8-positive, alpha-beta memory T cell male adult (30 years) +6800 ENCFF474MDU- /home/drk/tillage/datasets/human/rna/encode/ENCSR415SXI/summary/coverage-.w5 768 384 0.3 sum_sqrt 6799 RNA:activated CD8-positive, alpha-beta memory T cell male adult (30 years) +6801 ENCFF743FLM /home/drk/tillage/datasets/human/rna/encode/ENCSR416UGB/summary/coverage.w5 768 384 0.3 sum_sqrt 6801 RNA:with multiple sclerosis; naive B cell +6802 ENCFF737SYB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR420NLC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6803 RNA:PC-3 +6803 ENCFF737SYB- /home/drk/tillage/datasets/human/rna/encode/ENCSR420NLC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6802 RNA:PC-3 +6804 ENCFF581PRF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR420YFF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6805 RNA:placenta tissue female embryo +6805 ENCFF581PRF- /home/drk/tillage/datasets/human/rna/encode/ENCSR420YFF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6804 RNA:placenta tissue female embryo +6806 ENCFF035NBN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR420ZKB/summary/coverage+.w5 768 384 0.3 sum_sqrt 6807 RNA:HFFc6 +6807 ENCFF035NBN- /home/drk/tillage/datasets/human/rna/encode/ENCSR420ZKB/summary/coverage-.w5 768 384 0.3 sum_sqrt 6806 RNA:HFFc6 +6808 ENCFF110RPH /home/drk/tillage/datasets/human/rna/encode/ENCSR423LLK/summary/coverage.w5 768 384 0.3 sum_sqrt 6808 RNA:muscle of arm tissue male embryo (97 days) +6809 ENCFF188MTB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR424FAZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6810 RNA:IMR-90 +6810 ENCFF188MTB- /home/drk/tillage/datasets/human/rna/encode/ENCSR424FAZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6809 RNA:IMR-90 +6811 ENCFF391XHF /home/drk/tillage/datasets/human/rna/encode/ENCSR424TSZ/summary/coverage.w5 768 384 0.3 sum_sqrt 6811 RNA:renal pelvis tissue male embryo (91 days) +6812 ENCFF520NHF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR425RGZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6813 RNA:upper lobe of left lung tissue female adult (51 years) +6813 ENCFF520NHF- /home/drk/tillage/datasets/human/rna/encode/ENCSR425RGZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6812 RNA:upper lobe of left lung tissue female adult (51 years) +6814 ENCFF121ZPH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR429EGC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6815 RNA:endothelial cell +6815 ENCFF121ZPH- /home/drk/tillage/datasets/human/rna/encode/ENCSR429EGC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6814 RNA:endothelial cell +6816 ENCFF833YCY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR429EWK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6817 RNA:thoracic aorta tissue male adult (37 years) +6817 ENCFF833YCY- /home/drk/tillage/datasets/human/rna/encode/ENCSR429EWK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6816 RNA:thoracic aorta tissue male adult (37 years) +6818 ENCFF210MJF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR432EBE/summary/coverage+.w5 768 384 0.3 sum_sqrt 6819 RNA:pancreas tissue female adult (59 years) +6819 ENCFF210MJF- /home/drk/tillage/datasets/human/rna/encode/ENCSR432EBE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6818 RNA:pancreas tissue female adult (59 years) +6820 ENCFF103NIX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR433GXV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6821 RNA:mesodermal cell originated from HUES64 +6821 ENCFF103NIX- /home/drk/tillage/datasets/human/rna/encode/ENCSR433GXV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6820 RNA:mesodermal cell originated from HUES64 +6822 ENCFF774PKK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR433XCV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6823 RNA:heart right ventricle tissue male adult (34 years) +6823 ENCFF774PKK- /home/drk/tillage/datasets/human/rna/encode/ENCSR433XCV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6822 RNA:heart right ventricle tissue male adult (34 years) +6824 ENCFF261LJW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR433YBD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6825 RNA:mammary stem cell female adult (22 years) +6825 ENCFF261LJW- /home/drk/tillage/datasets/human/rna/encode/ENCSR433YBD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6824 RNA:mammary stem cell female adult (22 years) +6826 ENCFF169FHE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR434TEU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6827 RNA:breast epithelium tissue female adult (53 years) +6827 ENCFF169FHE- /home/drk/tillage/datasets/human/rna/encode/ENCSR434TEU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6826 RNA:breast epithelium tissue female adult (53 years) +6828 ENCFF472LKR /home/drk/tillage/datasets/human/rna/encode/ENCSR435WIF/summary/coverage.w5 768 384 0.3 sum_sqrt 6828 RNA:with multiple sclerosis; CD4-positive, CD25-positive, alpha-beta regulatory T cell +6829 ENCFF671DKT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR436QDU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6830 RNA:heart left ventricle tissue female adult (51 years) +6830 ENCFF671DKT- /home/drk/tillage/datasets/human/rna/encode/ENCSR436QDU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6829 RNA:heart left ventricle tissue female adult (51 years) +6831 ENCFF957KMU /home/drk/tillage/datasets/human/rna/encode/ENCSR436ZKE/summary/coverage.w5 768 384 0.3 sum_sqrt 6831 RNA:renal cortex interstitium tissue male embryo (97 days) +6832 ENCFF237QNH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR437HKI/summary/coverage+.w5 768 384 0.3 sum_sqrt 6833 RNA:activated naive CD8-positive, alpha-beta T cell male adult (30 years) +6833 ENCFF237QNH- /home/drk/tillage/datasets/human/rna/encode/ENCSR437HKI/summary/coverage-.w5 768 384 0.3 sum_sqrt 6832 RNA:activated naive CD8-positive, alpha-beta T cell male adult (30 years) +6834 ENCFF603PIL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR438YPF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6835 RNA:breast epithelium tissue female adult (51 years) +6835 ENCFF603PIL- /home/drk/tillage/datasets/human/rna/encode/ENCSR438YPF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6834 RNA:breast epithelium tissue female adult (51 years) +6836 ENCFF841CUM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR439SPU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6837 RNA:heart right ventricle tissue male child (3 years) +6837 ENCFF841CUM- /home/drk/tillage/datasets/human/rna/encode/ENCSR439SPU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6836 RNA:heart right ventricle tissue male child (3 years) +6838 ENCFF485JGG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR444WHQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6839 RNA:skeletal muscle myoblast +6839 ENCFF485JGG- /home/drk/tillage/datasets/human/rna/encode/ENCSR444WHQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6838 RNA:skeletal muscle myoblast +6840 ENCFF121WXQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR445DAC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6841 RNA:chorionic villus tissue male embryo (38 weeks) +6841 ENCFF121WXQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR445DAC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6840 RNA:chorionic villus tissue male embryo (38 weeks) +6842 ENCFF030XBO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR446LDS/summary/coverage+.w5 768 384 0.3 sum_sqrt 6843 RNA:CD8-positive, alpha-beta memory T cell male adult (30 years) +6843 ENCFF030XBO- /home/drk/tillage/datasets/human/rna/encode/ENCSR446LDS/summary/coverage-.w5 768 384 0.3 sum_sqrt 6842 RNA:CD8-positive, alpha-beta memory T cell male adult (30 years) +6844 ENCFF224GPU /home/drk/tillage/datasets/human/rna/encode/ENCSR446RKD/summary/coverage.w5 768 384 0.3 sum_sqrt 6844 RNA:small intestine tissue male embryo (91 days) +6845 ENCFF846ZCR /home/drk/tillage/datasets/human/rna/encode/ENCSR447IJE/summary/coverage.w5 768 384 0.3 sum_sqrt 6845 RNA:muscle of leg tissue male embryo (101 days) +6846 ENCFF327YQU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR448BTT/summary/coverage+.w5 768 384 0.3 sum_sqrt 6847 RNA:lower lobe of left lung tissue male adult (60 years) +6847 ENCFF327YQU- /home/drk/tillage/datasets/human/rna/encode/ENCSR448BTT/summary/coverage-.w5 768 384 0.3 sum_sqrt 6846 RNA:lower lobe of left lung tissue male adult (60 years) +6848 ENCFF214DTC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR448DCX/summary/coverage+.w5 768 384 0.3 sum_sqrt 6849 RNA:urinary bladder tissue male child (3 years) +6849 ENCFF214DTC- /home/drk/tillage/datasets/human/rna/encode/ENCSR448DCX/summary/coverage-.w5 768 384 0.3 sum_sqrt 6848 RNA:urinary bladder tissue male child (3 years) +6850 ENCFF804LUI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR448VSW/summary/coverage+.w5 768 384 0.3 sum_sqrt 6851 RNA:spleen tissue female adult (30 years) +6851 ENCFF804LUI- /home/drk/tillage/datasets/human/rna/encode/ENCSR448VSW/summary/coverage-.w5 768 384 0.3 sum_sqrt 6850 RNA:spleen tissue female adult (30 years) +6852 ENCFF011JWL /home/drk/tillage/datasets/human/rna/encode/ENCSR449GLL/summary/coverage.w5 768 384 0.3 sum_sqrt 6852 RNA:B cell male adult (37 years) +6853 ENCFF542BVY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR450BNZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6854 RNA:Peyer's patch tissue female adult (51 years) +6854 ENCFF542BVY- /home/drk/tillage/datasets/human/rna/encode/ENCSR450BNZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6853 RNA:Peyer's patch tissue female adult (51 years) +6855 ENCFF498TQF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR450ENK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6856 RNA:suprapubic skin tissue female adult (53 years) +6856 ENCFF498TQF- /home/drk/tillage/datasets/human/rna/encode/ENCSR450ENK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6855 RNA:suprapubic skin tissue female adult (53 years) +6857 ENCFF634WOM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR450EXF/summary/coverage+.w5 768 384 0.3 sum_sqrt 6858 RNA:WTC11 +6858 ENCFF634WOM- /home/drk/tillage/datasets/human/rna/encode/ENCSR450EXF/summary/coverage-.w5 768 384 0.3 sum_sqrt 6857 RNA:WTC11 +6859 ENCFF436OWO /home/drk/tillage/datasets/human/rna/encode/ENCSR451LDB/summary/coverage.w5 768 384 0.3 sum_sqrt 6859 RNA:K562 treated with 50 uM C646 for 12 hours +6860 ENCFF283IQC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR452VLO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6861 RNA:foreskin keratinocyte male newborn +6861 ENCFF283IQC- /home/drk/tillage/datasets/human/rna/encode/ENCSR452VLO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6860 RNA:foreskin keratinocyte male newborn +6862 ENCFF263KEJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR457ENP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6863 RNA:right atrium auricular region tissue female adult (51 years) +6863 ENCFF263KEJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR457ENP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6862 RNA:right atrium auricular region tissue female adult (51 years) +6864 ENCFF286TKQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR458FZP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6865 RNA:activated naive CD4-positive, alpha-beta T cell male adult (43 years) +6865 ENCFF286TKQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR458FZP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6864 RNA:activated naive CD4-positive, alpha-beta T cell male adult (43 years) +6866 ENCFF318EJE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR460YCS/summary/coverage+.w5 768 384 0.3 sum_sqrt 6867 RNA:lower leg skin tissue male adult (54 years) +6867 ENCFF318EJE- /home/drk/tillage/datasets/human/rna/encode/ENCSR460YCS/summary/coverage-.w5 768 384 0.3 sum_sqrt 6866 RNA:lower leg skin tissue male adult (54 years) +6868 ENCFF223MUU /home/drk/tillage/datasets/human/rna/encode/ENCSR463JBR/summary/coverage.w5 768 384 0.3 sum_sqrt 6868 RNA:CD4-positive, alpha-beta T cell male adult (37 years) +6869 ENCFF992XOR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR464VSR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6870 RNA:placenta tissue male embryo +6870 ENCFF992XOR- /home/drk/tillage/datasets/human/rna/encode/ENCSR464VSR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6869 RNA:placenta tissue male embryo +6871 ENCFF064BKZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR466XAR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6872 RNA:chorion tissue female embryo (40 weeks) +6872 ENCFF064BKZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR466XAR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6871 RNA:chorion tissue female embryo (40 weeks) +6873 ENCFF735XXE /home/drk/tillage/datasets/human/rna/encode/ENCSR467YDA/summary/coverage.w5 768 384 0.3 sum_sqrt 6873 RNA:CD14-positive monocyte +6874 ENCFF884TVR /home/drk/tillage/datasets/human/rna/encode/ENCSR468FBC/summary/coverage.w5 768 384 0.3 sum_sqrt 6874 RNA:thymus tissue male embryo (113 days) +6875 ENCFF144BNP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR469WPG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6876 RNA:Caco-2 +6876 ENCFF144BNP- /home/drk/tillage/datasets/human/rna/encode/ENCSR469WPG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6875 RNA:Caco-2 +6877 ENCFF163BTT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR470JHE/summary/coverage+.w5 768 384 0.3 sum_sqrt 6878 RNA:cerebellum tissue male embryo (20 weeks) +6878 ENCFF163BTT- /home/drk/tillage/datasets/human/rna/encode/ENCSR470JHE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6877 RNA:cerebellum tissue male embryo (20 weeks) +6879 ENCFF561TMR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR471RUK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6880 RNA:stomach tissue male adult (37 years) +6880 ENCFF561TMR- /home/drk/tillage/datasets/human/rna/encode/ENCSR471RUK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6879 RNA:stomach tissue male adult (37 years) +6881 ENCFF911JBW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR472PBS/summary/coverage+.w5 768 384 0.3 sum_sqrt 6882 RNA:endodermal cell originated from HUES64 +6882 ENCFF911JBW- /home/drk/tillage/datasets/human/rna/encode/ENCSR472PBS/summary/coverage-.w5 768 384 0.3 sum_sqrt 6881 RNA:endodermal cell originated from HUES64 +6883 ENCFF291GNY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR473XAP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6884 RNA:naive thymus-derived CD8-positive, alpha-beta T cell male adult (30 years) +6884 ENCFF291GNY- /home/drk/tillage/datasets/human/rna/encode/ENCSR473XAP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6883 RNA:naive thymus-derived CD8-positive, alpha-beta T cell male adult (30 years) +6885 ENCFF213LRI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR474TRG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6886 RNA:esophagus squamous epithelium tissue male adult (54 years) +6886 ENCFF213LRI- /home/drk/tillage/datasets/human/rna/encode/ENCSR474TRG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6885 RNA:esophagus squamous epithelium tissue male adult (54 years) +6887 ENCFF400CHK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR479MNN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6888 RNA:placenta tissue male embryo +6888 ENCFF400CHK- /home/drk/tillage/datasets/human/rna/encode/ENCSR479MNN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6887 RNA:placenta tissue male embryo +6889 ENCFF685FCX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR480SLD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6890 RNA:suprapubic skin tissue female adult (51 years) +6890 ENCFF685FCX- /home/drk/tillage/datasets/human/rna/encode/ENCSR480SLD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6889 RNA:suprapubic skin tissue female adult (51 years) +6891 ENCFF947KGU /home/drk/tillage/datasets/human/rna/encode/ENCSR482VRI/summary/coverage.w5 768 384 0.3 sum_sqrt 6891 RNA:small intestine tissue male embryo (108 days) +6892 ENCFF715SXQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR483IHO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6893 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (90 or above years) +6893 ENCFF715SXQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR483IHO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6892 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (90 or above years) +6894 ENCFF697OAU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR484WZL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6895 RNA:placenta tissue embryo +6895 ENCFF697OAU- /home/drk/tillage/datasets/human/rna/encode/ENCSR484WZL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6894 RNA:placenta tissue embryo +6896 ENCFF263ZRO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR485WBR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6897 RNA:gastroesophageal sphincter tissue male adult (54 years) +6897 ENCFF263ZRO- /home/drk/tillage/datasets/human/rna/encode/ENCSR485WBR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6896 RNA:gastroesophageal sphincter tissue male adult (54 years) +6898 ENCFF181QTI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR490SQH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6899 RNA:H7 +6899 ENCFF181QTI- /home/drk/tillage/datasets/human/rna/encode/ENCSR490SQH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6898 RNA:H7 +6900 ENCFF470HTA /home/drk/tillage/datasets/human/rna/encode/ENCSR491XNW/summary/coverage.w5 768 384 0.3 sum_sqrt 6900 RNA:with multiple sclerosis; CD8-positive, alpha-beta memory T cell +6901 ENCFF952UWM /home/drk/tillage/datasets/human/rna/encode/ENCSR492KRY/summary/coverage.w5 768 384 0.3 sum_sqrt 6901 RNA:K562 treated with 10 nM Chaetocin for 4 hours +6902 ENCFF535TIX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR495HDM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6903 RNA:prostate gland tissue male adult (37 years) +6903 ENCFF535TIX- /home/drk/tillage/datasets/human/rna/encode/ENCSR495HDM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6902 RNA:prostate gland tissue male adult (37 years) +6904 ENCFF143EWZ /home/drk/tillage/datasets/human/rna/encode/ENCSR495UXA/summary/coverage.w5 768 384 0.3 sum_sqrt 6904 RNA:kidney tissue female embryo (108 days) +6905 ENCFF089GUP /home/drk/tillage/datasets/human/rna/encode/ENCSR499NEL/summary/coverage.w5 768 384 0.3 sum_sqrt 6905 RNA:left lung tissue female embryo (105 days) +6906 ENCFF849BKN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR500JSJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6907 RNA:upper lobe of left lung tissue male adult (60 years) +6907 ENCFF849BKN- /home/drk/tillage/datasets/human/rna/encode/ENCSR500JSJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6906 RNA:upper lobe of left lung tissue male adult (60 years) +6908 ENCFF388QFN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR500UOD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6909 RNA:mesodermal cell originated from HUES64 +6909 ENCFF388QFN- /home/drk/tillage/datasets/human/rna/encode/ENCSR500UOD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6908 RNA:mesodermal cell originated from HUES64 +6910 ENCFF845DJW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR501DTN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6911 RNA:CD8-positive, alpha-beta T cell male adult (21 years) +6911 ENCFF845DJW- /home/drk/tillage/datasets/human/rna/encode/ENCSR501DTN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6910 RNA:CD8-positive, alpha-beta T cell male adult (21 years) +6912 ENCFF754ZRA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR502OTI/summary/coverage+.w5 768 384 0.3 sum_sqrt 6913 RNA:psoas muscle tissue female adult (30 years) +6913 ENCFF754ZRA- /home/drk/tillage/datasets/human/rna/encode/ENCSR502OTI/summary/coverage-.w5 768 384 0.3 sum_sqrt 6912 RNA:psoas muscle tissue female adult (30 years) +6914 ENCFF560FVE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR504NIU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6915 RNA:subcutaneous adipose tissue tissue female adult (53 years) +6915 ENCFF560FVE- /home/drk/tillage/datasets/human/rna/encode/ENCSR504NIU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6914 RNA:subcutaneous adipose tissue tissue female adult (53 years) +6916 ENCFF672AXQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR504QMK/summary/coverage+.w5 768 384 0.3 sum_sqrt 6917 RNA:right lobe of liver tissue female adult (53 years) +6917 ENCFF672AXQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR504QMK/summary/coverage-.w5 768 384 0.3 sum_sqrt 6916 RNA:right lobe of liver tissue female adult (53 years) +6918 ENCFF280YRS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR504VXC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6919 RNA:A375 +6919 ENCFF280YRS- /home/drk/tillage/datasets/human/rna/encode/ENCSR504VXC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6918 RNA:A375 +6920 ENCFF833QMT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR510MIA/summary/coverage+.w5 768 384 0.3 sum_sqrt 6921 RNA:esophagus squamous epithelium tissue female adult (51 years) +6921 ENCFF833QMT- /home/drk/tillage/datasets/human/rna/encode/ENCSR510MIA/summary/coverage-.w5 768 384 0.3 sum_sqrt 6920 RNA:esophagus squamous epithelium tissue female adult (51 years) +6922 ENCFF089SDX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR510PSL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6923 RNA:spleen tissue female adult (30 years) +6923 ENCFF089SDX- /home/drk/tillage/datasets/human/rna/encode/ENCSR510PSL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6922 RNA:spleen tissue female adult (30 years) +6924 ENCFF693XEN /home/drk/tillage/datasets/human/rna/encode/ENCSR510QZW/summary/coverage.w5 768 384 0.3 sum_sqrt 6924 RNA:GM23248 +6925 ENCFF672RNN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR516BJM/summary/coverage+.w5 768 384 0.3 sum_sqrt 6926 RNA:colonic mucosa tissue female child (16 years) +6926 ENCFF672RNN- /home/drk/tillage/datasets/human/rna/encode/ENCSR516BJM/summary/coverage-.w5 768 384 0.3 sum_sqrt 6925 RNA:colonic mucosa tissue female child (16 years) +6927 ENCFF709QRA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR516TTH/summary/coverage+.w5 768 384 0.3 sum_sqrt 6928 RNA:left ventricle myocardium inferior tissue male adult (60 years) +6928 ENCFF709QRA- /home/drk/tillage/datasets/human/rna/encode/ENCSR516TTH/summary/coverage-.w5 768 384 0.3 sum_sqrt 6927 RNA:left ventricle myocardium inferior tissue male adult (60 years) +6929 ENCFF481CMO /home/drk/tillage/datasets/human/rna/encode/ENCSR516VDS/summary/coverage.w5 768 384 0.3 sum_sqrt 6929 RNA:hindlimb muscle tissue male embryo (120 days) +6930 ENCFF344YVW /home/drk/tillage/datasets/human/rna/encode/ENCSR518BSQ/summary/coverage.w5 768 384 0.3 sum_sqrt 6930 RNA:K562 treated with 10 uM Crizotinib for 4 hours +6931 ENCFF670XJQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR518XRJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6932 RNA:endocrine pancreas tissue male adult (45 years) +6932 ENCFF670XJQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR518XRJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6931 RNA:endocrine pancreas tissue male adult (45 years) +6933 ENCFF027CXS /home/drk/tillage/datasets/human/rna/encode/ENCSR522XTV/summary/coverage.w5 768 384 0.3 sum_sqrt 6933 RNA:muscle of back tissue male embryo (101 days) +6934 ENCFF849FLT /home/drk/tillage/datasets/human/rna/encode/ENCSR523EDD/summary/coverage.w5 768 384 0.3 sum_sqrt 6934 RNA:small intestine tissue female embryo (120 days) +6935 ENCFF901ATL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR523RGW/summary/coverage+.w5 768 384 0.3 sum_sqrt 6936 RNA:pancreas tissue female adult (47 years) +6936 ENCFF901ATL- /home/drk/tillage/datasets/human/rna/encode/ENCSR523RGW/summary/coverage-.w5 768 384 0.3 sum_sqrt 6935 RNA:pancreas tissue female adult (47 years) +6937 ENCFF069OIK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR528ZKN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6938 RNA:gastroesophageal sphincter tissue male adult (37 years) +6938 ENCFF069OIK- /home/drk/tillage/datasets/human/rna/encode/ENCSR528ZKN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6937 RNA:gastroesophageal sphincter tissue male adult (37 years) +6939 ENCFF113MGH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR530NHO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6940 RNA:K562 nuclear fraction +6940 ENCFF113MGH- /home/drk/tillage/datasets/human/rna/encode/ENCSR530NHO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6939 RNA:K562 nuclear fraction +6941 ENCFF007LJG /home/drk/tillage/datasets/human/rna/encode/ENCSR531RKI/summary/coverage.w5 768 384 0.3 sum_sqrt 6941 RNA:muscle of trunk tissue female embryo (120 days) +6942 ENCFF101SYV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR532LJV/summary/coverage+.w5 768 384 0.3 sum_sqrt 6943 RNA:thyroid gland tissue male adult (37 years) +6943 ENCFF101SYV- /home/drk/tillage/datasets/human/rna/encode/ENCSR532LJV/summary/coverage-.w5 768 384 0.3 sum_sqrt 6942 RNA:thyroid gland tissue male adult (37 years) +6944 ENCFF646ZZQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR533TOW/summary/coverage+.w5 768 384 0.3 sum_sqrt 6945 RNA:activated CD8-positive, alpha-beta T cell male adult (21 years) treated with anti-CD3 and anti-CD28 coated beads +6945 ENCFF646ZZQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR533TOW/summary/coverage-.w5 768 384 0.3 sum_sqrt 6944 RNA:activated CD8-positive, alpha-beta T cell male adult (21 years) treated with anti-CD3 and anti-CD28 coated beads +6946 ENCFF298CPL /home/drk/tillage/datasets/human/rna/encode/ENCSR534JVH/summary/coverage.w5 768 384 0.3 sum_sqrt 6946 RNA:small intestine tissue female embryo (98 days) +6947 ENCFF636BRP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR534UXT/summary/coverage+.w5 768 384 0.3 sum_sqrt 6948 RNA:amnion tissue female embryo (40 weeks) +6948 ENCFF636BRP- /home/drk/tillage/datasets/human/rna/encode/ENCSR534UXT/summary/coverage-.w5 768 384 0.3 sum_sqrt 6947 RNA:amnion tissue female embryo (40 weeks) +6949 ENCFF790DEA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR535VTR/summary/coverage+.w5 768 384 0.3 sum_sqrt 6950 RNA:HT1080 +6950 ENCFF790DEA- /home/drk/tillage/datasets/human/rna/encode/ENCSR535VTR/summary/coverage-.w5 768 384 0.3 sum_sqrt 6949 RNA:HT1080 +6951 ENCFF709BGZ /home/drk/tillage/datasets/human/rna/encode/ENCSR535YOP/summary/coverage.w5 768 384 0.3 sum_sqrt 6951 RNA:with multiple sclerosis; IgD-negative memory B cell +6952 ENCFF257QQB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR537BCG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6953 RNA:H1 +6953 ENCFF257QQB- /home/drk/tillage/datasets/human/rna/encode/ENCSR537BCG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6952 RNA:H1 +6954 ENCFF982JPV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR538FRP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6955 RNA:activated CD8-positive, alpha-beta memory T cell male adult (30 years) +6955 ENCFF982JPV- /home/drk/tillage/datasets/human/rna/encode/ENCSR538FRP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6954 RNA:activated CD8-positive, alpha-beta memory T cell male adult (30 years) +6956 ENCFF808EKP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR541TIG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6957 RNA:HepG2 membrane fraction +6957 ENCFF808EKP- /home/drk/tillage/datasets/human/rna/encode/ENCSR541TIG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6956 RNA:HepG2 membrane fraction +6958 ENCFF595DZY /home/drk/tillage/datasets/human/rna/encode/ENCSR542OHE/summary/coverage.w5 768 384 0.3 sum_sqrt 6958 RNA:spinal cord tissue female embryo (113 days) +6959 ENCFF876MLB /home/drk/tillage/datasets/human/rna/encode/ENCSR543TQW/summary/coverage.w5 768 384 0.3 sum_sqrt 6959 RNA:right renal pelvis tissue male embryo (120 days) +6960 ENCFF684FER+ /home/drk/tillage/datasets/human/rna/encode/ENCSR544SAU/summary/coverage+.w5 768 384 0.3 sum_sqrt 6961 RNA:Peyer's patch tissue female adult (53 years) +6961 ENCFF684FER- /home/drk/tillage/datasets/human/rna/encode/ENCSR544SAU/summary/coverage-.w5 768 384 0.3 sum_sqrt 6960 RNA:Peyer's patch tissue female adult (53 years) +6962 ENCFF089BOJ /home/drk/tillage/datasets/human/rna/encode/ENCSR545MEZ/summary/coverage.w5 768 384 0.3 sum_sqrt 6962 RNA:CD4-positive, alpha-beta T cell male adult (21 years) +6963 ENCFF844CIZ /home/drk/tillage/datasets/human/rna/encode/ENCSR545WAC/summary/coverage.w5 768 384 0.3 sum_sqrt 6963 RNA:muscle of leg tissue male embryo (97 days) +6964 ENCFF713ZMW /home/drk/tillage/datasets/human/rna/encode/ENCSR547TNE/summary/coverage.w5 768 384 0.3 sum_sqrt 6964 RNA:muscle of arm tissue male embryo (96 days) +6965 ENCFF728QLF /home/drk/tillage/datasets/human/rna/encode/ENCSR549DVY/summary/coverage.w5 768 384 0.3 sum_sqrt 6965 RNA:stomach tissue female embryo (108 days) +6966 ENCFF410QZX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR551NII/summary/coverage+.w5 768 384 0.3 sum_sqrt 6967 RNA:lower leg skin tissue female adult (51 years) +6967 ENCFF410QZX- /home/drk/tillage/datasets/human/rna/encode/ENCSR551NII/summary/coverage-.w5 768 384 0.3 sum_sqrt 6966 RNA:lower leg skin tissue female adult (51 years) +6968 ENCFF150UWD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR552EGO/summary/coverage+.w5 768 384 0.3 sum_sqrt 6969 RNA:HeLa-S3 +6969 ENCFF150UWD- /home/drk/tillage/datasets/human/rna/encode/ENCSR552EGO/summary/coverage-.w5 768 384 0.3 sum_sqrt 6968 RNA:HeLa-S3 +6970 ENCFF707WTD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR552RFJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 6971 RNA:stimulated activated CD8-positive, alpha-beta T cell male adult (21 years) treated with 10 ng/mL Interleukin-2, anti-CD3 and anti-CD28 coated beads +6971 ENCFF707WTD- /home/drk/tillage/datasets/human/rna/encode/ENCSR552RFJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 6970 RNA:stimulated activated CD8-positive, alpha-beta T cell male adult (21 years) treated with 10 ng/mL Interleukin-2, anti-CD3 and anti-CD28 coated beads +6972 ENCFF146DMX /home/drk/tillage/datasets/human/rna/encode/ENCSR552YAE/summary/coverage.w5 768 384 0.3 sum_sqrt 6972 RNA:right renal pelvis tissue male embryo (105 days) +6973 ENCFF085JXX /home/drk/tillage/datasets/human/rna/encode/ENCSR554KBK/summary/coverage.w5 768 384 0.3 sum_sqrt 6973 RNA:right lung tissue female embryo (108 days) +6974 ENCFF489SFV /home/drk/tillage/datasets/human/rna/encode/ENCSR555BCP/summary/coverage.w5 768 384 0.3 sum_sqrt 6974 RNA:adrenal gland tissue embryo (96 days) +6975 ENCFF866JMO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR557PFL/summary/coverage+.w5 768 384 0.3 sum_sqrt 6976 RNA:amnion tissue embryo (16 weeks) +6976 ENCFF866JMO- /home/drk/tillage/datasets/human/rna/encode/ENCSR557PFL/summary/coverage-.w5 768 384 0.3 sum_sqrt 6975 RNA:amnion tissue embryo (16 weeks) +6977 ENCFF274TDF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR558SEE/summary/coverage+.w5 768 384 0.3 sum_sqrt 6978 RNA:A673 +6978 ENCFF274TDF- /home/drk/tillage/datasets/human/rna/encode/ENCSR558SEE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6977 RNA:A673 +6979 ENCFF565XVW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR559HWG/summary/coverage+.w5 768 384 0.3 sum_sqrt 6980 RNA:endodermal cell originated from H1 +6980 ENCFF565XVW- /home/drk/tillage/datasets/human/rna/encode/ENCSR559HWG/summary/coverage-.w5 768 384 0.3 sum_sqrt 6979 RNA:endodermal cell originated from H1 +6981 ENCFF744BTD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR560IFD/summary/coverage+.w5 768 384 0.3 sum_sqrt 6982 RNA:chorionic villus tissue male embryo (16 weeks) +6982 ENCFF744BTD- /home/drk/tillage/datasets/human/rna/encode/ENCSR560IFD/summary/coverage-.w5 768 384 0.3 sum_sqrt 6981 RNA:chorionic villus tissue male embryo (16 weeks) +6983 ENCFF488KNY /home/drk/tillage/datasets/human/rna/encode/ENCSR560MDQ/summary/coverage.w5 768 384 0.3 sum_sqrt 6983 RNA:right lung tissue female embryo (105 days) +6984 ENCFF917RKL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR561FEE/summary/coverage+.w5 768 384 0.3 sum_sqrt 6985 RNA:HepG2 +6985 ENCFF917RKL- /home/drk/tillage/datasets/human/rna/encode/ENCSR561FEE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6984 RNA:HepG2 +6986 ENCFF951UGT /home/drk/tillage/datasets/human/rna/encode/ENCSR561WEX/summary/coverage.w5 768 384 0.3 sum_sqrt 6986 RNA:muscle of leg tissue male embryo (97 days) +6987 ENCFF104NDB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR562BUN/summary/coverage+.w5 768 384 0.3 sum_sqrt 6988 RNA:middle frontal area 46 tissue female adult (90 or above years) +6988 ENCFF104NDB- /home/drk/tillage/datasets/human/rna/encode/ENCSR562BUN/summary/coverage-.w5 768 384 0.3 sum_sqrt 6987 RNA:middle frontal area 46 tissue female adult (90 or above years) +6989 ENCFF233HOM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR563VMC/summary/coverage+.w5 768 384 0.3 sum_sqrt 6990 RNA:psoas muscle tissue female adult (61 years) +6990 ENCFF233HOM- /home/drk/tillage/datasets/human/rna/encode/ENCSR563VMC/summary/coverage-.w5 768 384 0.3 sum_sqrt 6989 RNA:psoas muscle tissue female adult (61 years) +6991 ENCFF047ADC /home/drk/tillage/datasets/human/rna/encode/ENCSR563ZWI/summary/coverage.w5 768 384 0.3 sum_sqrt 6991 RNA:K562 treated with 10 nM Bortezomib for 4 hours +6992 ENCFF637ANZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR568YRP/summary/coverage+.w5 768 384 0.3 sum_sqrt 6993 RNA:SJCRH30 +6993 ENCFF637ANZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR568YRP/summary/coverage-.w5 768 384 0.3 sum_sqrt 6992 RNA:SJCRH30 +6994 ENCFF218MMR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR569JKX/summary/coverage+.w5 768 384 0.3 sum_sqrt 6995 RNA:SK-N-DZ cytosolic fraction +6995 ENCFF218MMR- /home/drk/tillage/datasets/human/rna/encode/ENCSR569JKX/summary/coverage-.w5 768 384 0.3 sum_sqrt 6994 RNA:SK-N-DZ cytosolic fraction +6996 ENCFF898IHN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR571BML/summary/coverage+.w5 768 384 0.3 sum_sqrt 6997 RNA:pancreas tissue female adult (30 years) +6997 ENCFF898IHN- /home/drk/tillage/datasets/human/rna/encode/ENCSR571BML/summary/coverage-.w5 768 384 0.3 sum_sqrt 6996 RNA:pancreas tissue female adult (30 years) +6998 ENCFF212QRL /home/drk/tillage/datasets/human/rna/encode/ENCSR571IUW/summary/coverage.w5 768 384 0.3 sum_sqrt 6998 RNA:stomach tissue female embryo (110 days) +6999 ENCFF025THJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR571RXE/summary/coverage+.w5 768 384 0.3 sum_sqrt 7000 RNA:right atrium auricular region tissue female adult (53 years) +7000 ENCFF025THJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR571RXE/summary/coverage-.w5 768 384 0.3 sum_sqrt 6999 RNA:right atrium auricular region tissue female adult (53 years) +7001 ENCFF231VPQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR572EET/summary/coverage+.w5 768 384 0.3 sum_sqrt 7002 RNA:neuronal stem cell originated from H1 +7002 ENCFF231VPQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR572EET/summary/coverage-.w5 768 384 0.3 sum_sqrt 7001 RNA:neuronal stem cell originated from H1 +7003 ENCFF387SJN /home/drk/tillage/datasets/human/rna/encode/ENCSR572FXC/summary/coverage.w5 768 384 0.3 sum_sqrt 7003 RNA:left lung tissue male embryo (105 days) +7004 ENCFF462KBX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR574PFY/summary/coverage+.w5 768 384 0.3 sum_sqrt 7005 RNA:psoas muscle tissue female adult (41 years) +7005 ENCFF462KBX- /home/drk/tillage/datasets/human/rna/encode/ENCSR574PFY/summary/coverage-.w5 768 384 0.3 sum_sqrt 7004 RNA:psoas muscle tissue female adult (41 years) +7006 ENCFF254PWG /home/drk/tillage/datasets/human/rna/encode/ENCSR575OHC/summary/coverage.w5 768 384 0.3 sum_sqrt 7006 RNA:muscle of arm tissue male embryo (120 days) +7007 ENCFF470VWE /home/drk/tillage/datasets/human/rna/encode/ENCSR576UKA/summary/coverage.w5 768 384 0.3 sum_sqrt 7007 RNA:muscle of back tissue male embryo (97 days) +7008 ENCFF788FRD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR579BDN/summary/coverage+.w5 768 384 0.3 sum_sqrt 7009 RNA:pancreas tissue female adult (41 years) +7009 ENCFF788FRD- /home/drk/tillage/datasets/human/rna/encode/ENCSR579BDN/summary/coverage-.w5 768 384 0.3 sum_sqrt 7008 RNA:pancreas tissue female adult (41 years) +7010 ENCFF228AHT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR580GSX/summary/coverage+.w5 768 384 0.3 sum_sqrt 7011 RNA:A172 +7011 ENCFF228AHT- /home/drk/tillage/datasets/human/rna/encode/ENCSR580GSX/summary/coverage-.w5 768 384 0.3 sum_sqrt 7010 RNA:A172 +7012 ENCFF591XND+ /home/drk/tillage/datasets/human/rna/encode/ENCSR584JXD/summary/coverage+.w5 768 384 0.3 sum_sqrt 7013 RNA:Caki2 +7013 ENCFF591XND- /home/drk/tillage/datasets/human/rna/encode/ENCSR584JXD/summary/coverage-.w5 768 384 0.3 sum_sqrt 7012 RNA:Caki2 +7014 ENCFF201RLE /home/drk/tillage/datasets/human/rna/encode/ENCSR585DJN/summary/coverage.w5 768 384 0.3 sum_sqrt 7014 RNA:with multiple sclerosis; naive thymus-derived CD8-positive, alpha-beta T cell +7015 ENCFF304TWT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR586SEE/summary/coverage+.w5 768 384 0.3 sum_sqrt 7016 RNA:NCI-H460 cytosolic fraction +7016 ENCFF304TWT- /home/drk/tillage/datasets/human/rna/encode/ENCSR586SEE/summary/coverage-.w5 768 384 0.3 sum_sqrt 7015 RNA:NCI-H460 cytosolic fraction +7017 ENCFF731FYT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR586SYA/summary/coverage+.w5 768 384 0.3 sum_sqrt 7018 RNA:body of pancreas tissue female adult (53 years) +7018 ENCFF731FYT- /home/drk/tillage/datasets/human/rna/encode/ENCSR586SYA/summary/coverage-.w5 768 384 0.3 sum_sqrt 7017 RNA:body of pancreas tissue female adult (53 years) +7019 ENCFF520RFL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR588EJX/summary/coverage+.w5 768 384 0.3 sum_sqrt 7020 RNA:H1 +7020 ENCFF520RFL- /home/drk/tillage/datasets/human/rna/encode/ENCSR588EJX/summary/coverage-.w5 768 384 0.3 sum_sqrt 7019 RNA:H1 +7021 ENCFF568PWV /home/drk/tillage/datasets/human/rna/encode/ENCSR588TSF/summary/coverage.w5 768 384 0.3 sum_sqrt 7021 RNA:CD4-positive, alpha-beta memory T cell +7022 ENCFF034RBU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR589EBT/summary/coverage+.w5 768 384 0.3 sum_sqrt 7023 RNA:upper lobe of left lung tissue female adult (61 years) +7023 ENCFF034RBU- /home/drk/tillage/datasets/human/rna/encode/ENCSR589EBT/summary/coverage-.w5 768 384 0.3 sum_sqrt 7022 RNA:upper lobe of left lung tissue female adult (61 years) +7024 ENCFF426JDT /home/drk/tillage/datasets/human/rna/encode/ENCSR592EZK/summary/coverage.w5 768 384 0.3 sum_sqrt 7024 RNA:left lung tissue female embryo (98 days) +7025 ENCFF086SRN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR593AMV/summary/coverage+.w5 768 384 0.3 sum_sqrt 7026 RNA:ectodermal cell originated from HUES64 +7026 ENCFF086SRN- /home/drk/tillage/datasets/human/rna/encode/ENCSR593AMV/summary/coverage-.w5 768 384 0.3 sum_sqrt 7025 RNA:ectodermal cell originated from HUES64 +7027 ENCFF827UHC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR593MZL/summary/coverage+.w5 768 384 0.3 sum_sqrt 7028 RNA:stimulated activated CD4-positive, alpha-beta T cell male adult (20 years) treated with 10 ng/mL Interleukin-2, anti-CD3 and anti-CD28 coated beads +7028 ENCFF827UHC- /home/drk/tillage/datasets/human/rna/encode/ENCSR593MZL/summary/coverage-.w5 768 384 0.3 sum_sqrt 7027 RNA:stimulated activated CD4-positive, alpha-beta T cell male adult (20 years) treated with 10 ng/mL Interleukin-2, anti-CD3 and anti-CD28 coated beads +7029 ENCFF199MXA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR594NJP/summary/coverage+.w5 768 384 0.3 sum_sqrt 7030 RNA:K562 insoluble cytoplasmic fraction +7030 ENCFF199MXA- /home/drk/tillage/datasets/human/rna/encode/ENCSR594NJP/summary/coverage-.w5 768 384 0.3 sum_sqrt 7029 RNA:K562 insoluble cytoplasmic fraction +7031 ENCFF694EBG /home/drk/tillage/datasets/human/rna/encode/ENCSR595YCU/summary/coverage.w5 768 384 0.3 sum_sqrt 7031 RNA:excitatory neuron genetically modified (insertion) using TALEN inserting M. musculus Neurog2 originated from WTC11 +7032 ENCFF407ZDP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR596ACL/summary/coverage+.w5 768 384 0.3 sum_sqrt 7033 RNA:K562 membrane fraction +7033 ENCFF407ZDP- /home/drk/tillage/datasets/human/rna/encode/ENCSR596ACL/summary/coverage-.w5 768 384 0.3 sum_sqrt 7032 RNA:K562 membrane fraction +7034 ENCFF018NXB /home/drk/tillage/datasets/human/rna/encode/ENCSR596AQY/summary/coverage.w5 768 384 0.3 sum_sqrt 7034 RNA:naive thymus-derived CD4-positive, alpha-beta T cell +7035 ENCFF455CDH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR596KAH/summary/coverage+.w5 768 384 0.3 sum_sqrt 7036 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (90 or above years) +7036 ENCFF455CDH- /home/drk/tillage/datasets/human/rna/encode/ENCSR596KAH/summary/coverage-.w5 768 384 0.3 sum_sqrt 7035 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (90 or above years) +7037 ENCFF297NQG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR598KJX/summary/coverage+.w5 768 384 0.3 sum_sqrt 7038 RNA:adrenal gland tissue male adult (34 years) +7038 ENCFF297NQG- /home/drk/tillage/datasets/human/rna/encode/ENCSR598KJX/summary/coverage-.w5 768 384 0.3 sum_sqrt 7037 RNA:adrenal gland tissue male adult (34 years) +7039 ENCFF065NBH /home/drk/tillage/datasets/human/rna/encode/ENCSR601DZY/summary/coverage.w5 768 384 0.3 sum_sqrt 7039 RNA:K562 treated with 1% DMSO for 12 hours +7040 ENCFF007ZBY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR609NZM/summary/coverage+.w5 768 384 0.3 sum_sqrt 7041 RNA:gastrocnemius medialis tissue female adult (51 years) +7041 ENCFF007ZBY- /home/drk/tillage/datasets/human/rna/encode/ENCSR609NZM/summary/coverage-.w5 768 384 0.3 sum_sqrt 7040 RNA:gastrocnemius medialis tissue female adult (51 years) +7042 ENCFF097CWH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR612HYR/summary/coverage+.w5 768 384 0.3 sum_sqrt 7043 RNA:small intestine tissue female adult (30 years) +7043 ENCFF097CWH- /home/drk/tillage/datasets/human/rna/encode/ENCSR612HYR/summary/coverage-.w5 768 384 0.3 sum_sqrt 7042 RNA:small intestine tissue female adult (30 years) +7044 ENCFF585HTZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR615EEK/summary/coverage+.w5 768 384 0.3 sum_sqrt 7045 RNA:K562 +7045 ENCFF585HTZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR615EEK/summary/coverage-.w5 768 384 0.3 sum_sqrt 7044 RNA:K562 +7046 ENCFF458QQU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR618IQY/summary/coverage+.w5 768 384 0.3 sum_sqrt 7047 RNA:small intestine tissue male child (3 years) +7047 ENCFF458QQU- /home/drk/tillage/datasets/human/rna/encode/ENCSR618IQY/summary/coverage-.w5 768 384 0.3 sum_sqrt 7046 RNA:small intestine tissue male child (3 years) +7048 ENCFF973BSN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR620LQN/summary/coverage+.w5 768 384 0.3 sum_sqrt 7049 RNA:esophagus muscularis mucosa tissue female adult (51 years) +7049 ENCFF973BSN- /home/drk/tillage/datasets/human/rna/encode/ENCSR620LQN/summary/coverage-.w5 768 384 0.3 sum_sqrt 7048 RNA:esophagus muscularis mucosa tissue female adult (51 years) +7050 ENCFF383RPD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR620NSN/summary/coverage+.w5 768 384 0.3 sum_sqrt 7051 RNA:bronchus fibroblast of lung +7051 ENCFF383RPD- /home/drk/tillage/datasets/human/rna/encode/ENCSR620NSN/summary/coverage-.w5 768 384 0.3 sum_sqrt 7050 RNA:bronchus fibroblast of lung +7052 ENCFF824QUG /home/drk/tillage/datasets/human/rna/encode/ENCSR620ZNQ/summary/coverage.w5 768 384 0.3 sum_sqrt 7052 RNA:muscle of arm tissue female embryo (105 days) +7053 ENCFF592XZA /home/drk/tillage/datasets/human/rna/encode/ENCSR621FYE/summary/coverage.w5 768 384 0.3 sum_sqrt 7053 RNA:small intestine tissue male embryo (115 days) +7054 ENCFF733JPK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR621PZI/summary/coverage+.w5 768 384 0.3 sum_sqrt 7055 RNA:spleen tissue female adult (41 years) +7055 ENCFF733JPK- /home/drk/tillage/datasets/human/rna/encode/ENCSR621PZI/summary/coverage-.w5 768 384 0.3 sum_sqrt 7054 RNA:spleen tissue female adult (41 years) +7056 ENCFF808SDN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR622PIH/summary/coverage+.w5 768 384 0.3 sum_sqrt 7057 RNA:right cardiac atrium tissue female adult (59 years) +7057 ENCFF808SDN- /home/drk/tillage/datasets/human/rna/encode/ENCSR622PIH/summary/coverage-.w5 768 384 0.3 sum_sqrt 7056 RNA:right cardiac atrium tissue female adult (59 years) +7058 ENCFF086IIB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR625QJI/summary/coverage+.w5 768 384 0.3 sum_sqrt 7059 RNA:NCI-H460 nuclear fraction +7059 ENCFF086IIB- /home/drk/tillage/datasets/human/rna/encode/ENCSR625QJI/summary/coverage-.w5 768 384 0.3 sum_sqrt 7058 RNA:NCI-H460 nuclear fraction +7060 ENCFF091AGW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR629VMZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7061 RNA:pancreas tissue male adult (34 years) +7061 ENCFF091AGW- /home/drk/tillage/datasets/human/rna/encode/ENCSR629VMZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7060 RNA:pancreas tissue male adult (34 years) +7062 ENCFF672VYQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR630VJN/summary/coverage+.w5 768 384 0.3 sum_sqrt 7063 RNA:transverse colon tissue male adult (54 years) +7063 ENCFF672VYQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR630VJN/summary/coverage-.w5 768 384 0.3 sum_sqrt 7062 RNA:transverse colon tissue male adult (54 years) +7064 ENCFF983RQU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR631FXT/summary/coverage+.w5 768 384 0.3 sum_sqrt 7065 RNA:T-cell male adult (38 years) +7065 ENCFF983RQU- /home/drk/tillage/datasets/human/rna/encode/ENCSR631FXT/summary/coverage-.w5 768 384 0.3 sum_sqrt 7064 RNA:T-cell male adult (38 years) +7066 ENCFF296HLN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR631NUQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7067 RNA:sciatic nerve tissue female adult (41 years) +7067 ENCFF296HLN- /home/drk/tillage/datasets/human/rna/encode/ENCSR631NUQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7066 RNA:sciatic nerve tissue female adult (41 years) +7068 ENCFF482BDQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR634JQK/summary/coverage+.w5 768 384 0.3 sum_sqrt 7069 RNA:middle frontal area 46 tissue female adult (90 or above years) +7069 ENCFF482BDQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR634JQK/summary/coverage-.w5 768 384 0.3 sum_sqrt 7068 RNA:middle frontal area 46 tissue female adult (90 or above years) +7070 ENCFF814YUZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR634LOX/summary/coverage+.w5 768 384 0.3 sum_sqrt 7071 RNA:foreskin melanocyte male newborn +7071 ENCFF814YUZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR634LOX/summary/coverage-.w5 768 384 0.3 sum_sqrt 7070 RNA:foreskin melanocyte male newborn +7072 ENCFF045ZHZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR635GTY/summary/coverage+.w5 768 384 0.3 sum_sqrt 7073 RNA:heart tissue female adult (30 years) +7073 ENCFF045ZHZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR635GTY/summary/coverage-.w5 768 384 0.3 sum_sqrt 7072 RNA:heart tissue female adult (30 years) +7074 ENCFF275DKR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR636LEU/summary/coverage+.w5 768 384 0.3 sum_sqrt 7075 RNA:HFFc6 +7075 ENCFF275DKR- /home/drk/tillage/datasets/human/rna/encode/ENCSR636LEU/summary/coverage-.w5 768 384 0.3 sum_sqrt 7074 RNA:HFFc6 +7076 ENCFF365LTY /home/drk/tillage/datasets/human/rna/encode/ENCSR637GBV/summary/coverage.w5 768 384 0.3 sum_sqrt 7076 RNA:fibroblast of skin of back male embryo (97 days) +7077 ENCFF544ZDA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR637VLS/summary/coverage+.w5 768 384 0.3 sum_sqrt 7078 RNA:K562 +7078 ENCFF544ZDA- /home/drk/tillage/datasets/human/rna/encode/ENCSR637VLS/summary/coverage-.w5 768 384 0.3 sum_sqrt 7077 RNA:K562 +7079 ENCFF848ZVQ /home/drk/tillage/datasets/human/rna/encode/ENCSR638OAH/summary/coverage.w5 768 384 0.3 sum_sqrt 7079 RNA:with multiple sclerosis; CD14-positive monocyte +7080 ENCFF368FFC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR643QIZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7081 RNA:H1 +7081 ENCFF368FFC- /home/drk/tillage/datasets/human/rna/encode/ENCSR643QIZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7080 RNA:H1 +7082 ENCFF889JRS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR645TCG/summary/coverage+.w5 768 384 0.3 sum_sqrt 7083 RNA:omental fat pad tissue female adult (53 years) +7083 ENCFF889JRS- /home/drk/tillage/datasets/human/rna/encode/ENCSR645TCG/summary/coverage-.w5 768 384 0.3 sum_sqrt 7082 RNA:omental fat pad tissue female adult (53 years) +7084 ENCFF857GII+ /home/drk/tillage/datasets/human/rna/encode/ENCSR648JOK/summary/coverage+.w5 768 384 0.3 sum_sqrt 7085 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (90 or above years) +7085 ENCFF857GII- /home/drk/tillage/datasets/human/rna/encode/ENCSR648JOK/summary/coverage-.w5 768 384 0.3 sum_sqrt 7084 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (90 or above years) +7086 ENCFF702XYL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR648KDM/summary/coverage+.w5 768 384 0.3 sum_sqrt 7087 RNA:PC-3 +7087 ENCFF702XYL- /home/drk/tillage/datasets/human/rna/encode/ENCSR648KDM/summary/coverage-.w5 768 384 0.3 sum_sqrt 7086 RNA:PC-3 +7088 ENCFF304ZAM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR648OSR/summary/coverage+.w5 768 384 0.3 sum_sqrt 7089 RNA:tibial nerve tissue male adult (54 years) +7089 ENCFF304ZAM- /home/drk/tillage/datasets/human/rna/encode/ENCSR648OSR/summary/coverage-.w5 768 384 0.3 sum_sqrt 7088 RNA:tibial nerve tissue male adult (54 years) +7090 ENCFF196UHI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR648YUM/summary/coverage+.w5 768 384 0.3 sum_sqrt 7091 RNA:placenta tissue female embryo +7091 ENCFF196UHI- /home/drk/tillage/datasets/human/rna/encode/ENCSR648YUM/summary/coverage-.w5 768 384 0.3 sum_sqrt 7090 RNA:placenta tissue female embryo +7092 ENCFF519CVR /home/drk/tillage/datasets/human/rna/encode/ENCSR652AWW/summary/coverage.w5 768 384 0.3 sum_sqrt 7092 RNA:muscle of back tissue male embryo (105 days) +7093 ENCFF185FBE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR652PHZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7094 RNA:left cardiac atrium tissue female adult (59 years) +7094 ENCFF185FBE- /home/drk/tillage/datasets/human/rna/encode/ENCSR652PHZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7093 RNA:left cardiac atrium tissue female adult (59 years) +7095 ENCFF125TVD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR653DFZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7096 RNA:G401 +7096 ENCFF125TVD- /home/drk/tillage/datasets/human/rna/encode/ENCSR653DFZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7095 RNA:G401 +7097 ENCFF844TIV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR653ZJF/summary/coverage+.w5 768 384 0.312421875 sum_sqrt 7098 RNA:transverse colon tissue male adult (37 years) +7098 ENCFF844TIV- /home/drk/tillage/datasets/human/rna/encode/ENCSR653ZJF/summary/coverage-.w5 768 384 0.312421875 sum_sqrt 7097 RNA:transverse colon tissue male adult (37 years) +7099 ENCFF443EQZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR654UPQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7100 RNA:endothelial cell of umbilical vein newborn +7100 ENCFF443EQZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR654UPQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7099 RNA:endothelial cell of umbilical vein newborn +7101 ENCFF893OBW /home/drk/tillage/datasets/human/rna/encode/ENCSR655XQF/summary/coverage.w5 768 384 0.3 sum_sqrt 7101 RNA:fibroblast of skin of back male embryo (97 days) +7102 ENCFF168WRO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR663IOE/summary/coverage+.w5 768 384 0.3 sum_sqrt 7103 RNA:spleen tissue male child (3 years) +7103 ENCFF168WRO- /home/drk/tillage/datasets/human/rna/encode/ENCSR663IOE/summary/coverage-.w5 768 384 0.3 sum_sqrt 7102 RNA:spleen tissue male child (3 years) +7104 ENCFF807FQZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR663WGC/summary/coverage+.w5 768 384 0.3 sum_sqrt 7105 RNA:mesenchymal stem cell originated from H1 +7105 ENCFF807FQZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR663WGC/summary/coverage-.w5 768 384 0.3 sum_sqrt 7104 RNA:mesenchymal stem cell originated from H1 +7106 ENCFF497DVM /home/drk/tillage/datasets/human/rna/encode/ENCSR667OPL/summary/coverage.w5 768 384 0.3 sum_sqrt 7106 RNA:CD4-positive, CD25-positive, alpha-beta regulatory T cell +7107 ENCFF492KPL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR669KQU/summary/coverage+.w5 768 384 0.3 sum_sqrt 7108 RNA:SK-MEL-5 +7108 ENCFF492KPL- /home/drk/tillage/datasets/human/rna/encode/ENCSR669KQU/summary/coverage-.w5 768 384 0.3 sum_sqrt 7107 RNA:SK-MEL-5 +7109 ENCFF190ZWO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR670WQY/summary/coverage+.w5 768 384 0.3 sum_sqrt 7110 RNA:H1 +7110 ENCFF190ZWO- /home/drk/tillage/datasets/human/rna/encode/ENCSR670WQY/summary/coverage-.w5 768 384 0.3 sum_sqrt 7109 RNA:H1 +7111 ENCFF784RHW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR671IYC/summary/coverage+.w5 768 384 0.3 sum_sqrt 7112 RNA:body of pancreas tissue female adult (51 years) +7112 ENCFF784RHW- /home/drk/tillage/datasets/human/rna/encode/ENCSR671IYC/summary/coverage-.w5 768 384 0.3 sum_sqrt 7111 RNA:body of pancreas tissue female adult (51 years) +7113 ENCFF402SEK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR671WMH/summary/coverage+.w5 768 384 0.3 sum_sqrt 7114 RNA:subcutaneous adipose tissue tissue male adult (54 years) +7114 ENCFF402SEK- /home/drk/tillage/datasets/human/rna/encode/ENCSR671WMH/summary/coverage-.w5 768 384 0.3 sum_sqrt 7113 RNA:subcutaneous adipose tissue tissue male adult (54 years) +7115 ENCFF851BGA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR672JUF/summary/coverage+.w5 768 384 0.3 sum_sqrt 7116 RNA:foreskin keratinocyte male newborn +7116 ENCFF851BGA- /home/drk/tillage/datasets/human/rna/encode/ENCSR672JUF/summary/coverage-.w5 768 384 0.3 sum_sqrt 7115 RNA:foreskin keratinocyte male newborn +7117 ENCFF940DUI /home/drk/tillage/datasets/human/rna/encode/ENCSR672NDZ/summary/coverage.w5 768 384 0.3 sum_sqrt 7117 RNA:K562 treated with 10 nM Chaetocin for 12 hours +7118 ENCFF325GTZ /home/drk/tillage/datasets/human/rna/encode/ENCSR672OYP/summary/coverage.w5 768 384 0.3 sum_sqrt 7118 RNA:immature natural killer cell +7119 ENCFF248FOA /home/drk/tillage/datasets/human/rna/encode/ENCSR673DJS/summary/coverage.w5 768 384 0.3 sum_sqrt 7119 RNA:with multiple sclerosis; CD4-positive, alpha-beta memory T cell +7120 ENCFF118IXU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR673UKZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7121 RNA:WTC11 +7121 ENCFF118IXU- /home/drk/tillage/datasets/human/rna/encode/ENCSR673UKZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7120 RNA:WTC11 +7122 ENCFF985BPB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR674KHG/summary/coverage+.w5 768 384 0.3 sum_sqrt 7123 RNA:mucosa of descending colon tissue male adult (40 years) +7123 ENCFF985BPB- /home/drk/tillage/datasets/human/rna/encode/ENCSR674KHG/summary/coverage-.w5 768 384 0.3 sum_sqrt 7122 RNA:mucosa of descending colon tissue male adult (40 years) +7124 ENCFF643ZAD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR675YAS/summary/coverage+.w5 768 384 0.3 sum_sqrt 7125 RNA:right cardiac atrium tissue male adult (34 years) +7125 ENCFF643ZAD- /home/drk/tillage/datasets/human/rna/encode/ENCSR675YAS/summary/coverage-.w5 768 384 0.3 sum_sqrt 7124 RNA:right cardiac atrium tissue male adult (34 years) +7126 ENCFF565FOF /home/drk/tillage/datasets/human/rna/encode/ENCSR677MYO/summary/coverage.w5 768 384 0.3 sum_sqrt 7126 RNA:muscle of arm tissue embryo (101 days) +7127 ENCFF532MCC /home/drk/tillage/datasets/human/rna/encode/ENCSR678LBL/summary/coverage.w5 768 384 0.3 sum_sqrt 7127 RNA:IgD-negative memory B cell +7128 ENCFF340TAG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR678TMV/summary/coverage+.w5 768 384 0.3 sum_sqrt 7129 RNA:gastrocnemius medialis tissue female adult (53 years) +7129 ENCFF340TAG- /home/drk/tillage/datasets/human/rna/encode/ENCSR678TMV/summary/coverage-.w5 768 384 0.3 sum_sqrt 7128 RNA:gastrocnemius medialis tissue female adult (53 years) +7130 ENCFF064KKA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR680AAZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7131 RNA:adrenal gland tissue male adult (21 years) +7131 ENCFF064KKA- /home/drk/tillage/datasets/human/rna/encode/ENCSR680AAZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7130 RNA:adrenal gland tissue male adult (21 years) +7132 ENCFF544MJM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR680USE/summary/coverage+.w5 768 384 0.3 sum_sqrt 7133 RNA:hair follicular keratinocyte male adult (55 years) +7133 ENCFF544MJM- /home/drk/tillage/datasets/human/rna/encode/ENCSR680USE/summary/coverage-.w5 768 384 0.3 sum_sqrt 7132 RNA:hair follicular keratinocyte male adult (55 years) +7134 ENCFF838WFU /home/drk/tillage/datasets/human/rna/encode/ENCSR681ALA/summary/coverage.w5 768 384 0.3 sum_sqrt 7134 RNA:fibroblast of skin of abdomen male embryo (97 days) +7135 ENCFF267DQT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR686JJB/summary/coverage+.w5 768 384 0.3 sum_sqrt 7136 RNA:adipose tissue tissue female adult (30 years) +7136 ENCFF267DQT- /home/drk/tillage/datasets/human/rna/encode/ENCSR686JJB/summary/coverage-.w5 768 384 0.3 sum_sqrt 7135 RNA:adipose tissue tissue female adult (30 years) +7137 ENCFF166WWA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR687HJY/summary/coverage+.w5 768 384 0.3 sum_sqrt 7138 RNA:thyroid gland tissue female adult (51 years) +7138 ENCFF166WWA- /home/drk/tillage/datasets/human/rna/encode/ENCSR687HJY/summary/coverage-.w5 768 384 0.3 sum_sqrt 7137 RNA:thyroid gland tissue female adult (51 years) +7139 ENCFF662LZH /home/drk/tillage/datasets/human/rna/encode/ENCSR688YOZ/summary/coverage.w5 768 384 0.3 sum_sqrt 7139 RNA:adrenal gland tissue male embryo (108 days) +7140 ENCFF082CYK /home/drk/tillage/datasets/human/rna/encode/ENCSR689NPY/summary/coverage.w5 768 384 0.3 sum_sqrt 7140 RNA:K562 treated with 10 uM AR-42 for 24 hours +7141 ENCFF334KVA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR692DIM/summary/coverage+.w5 768 384 0.3 sum_sqrt 7142 RNA:naive thymus-derived CD8-positive, alpha-beta T cell male adult (30 years) +7142 ENCFF334KVA- /home/drk/tillage/datasets/human/rna/encode/ENCSR692DIM/summary/coverage-.w5 768 384 0.3 sum_sqrt 7141 RNA:naive thymus-derived CD8-positive, alpha-beta T cell male adult (30 years) +7143 ENCFF205MJV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR693CSQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7144 RNA:heart left ventricle tissue male child (3 years) +7144 ENCFF205MJV- /home/drk/tillage/datasets/human/rna/encode/ENCSR693CSQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7143 RNA:heart left ventricle tissue male child (3 years) +7145 ENCFF134ALH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR693GGB/summary/coverage+.w5 768 384 0.3 sum_sqrt 7146 RNA:testis tissue male adult (44 years) +7146 ENCFF134ALH- /home/drk/tillage/datasets/human/rna/encode/ENCSR693GGB/summary/coverage-.w5 768 384 0.3 sum_sqrt 7145 RNA:testis tissue male adult (44 years) +7147 ENCFF652PJM /home/drk/tillage/datasets/human/rna/encode/ENCSR693JOK/summary/coverage.w5 768 384 0.3 sum_sqrt 7147 RNA:K562 treated with 100 nM GSK J4 for 24 hours +7148 ENCFF927ANS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR693KOP/summary/coverage+.w5 768 384 0.3 sum_sqrt 7149 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (86 years) +7149 ENCFF927ANS- /home/drk/tillage/datasets/human/rna/encode/ENCSR693KOP/summary/coverage-.w5 768 384 0.3 sum_sqrt 7148 RNA:with Alzheimer's disease; middle frontal area 46 tissue female adult (86 years) +7150 ENCFF632MAZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR694AWV/summary/coverage+.w5 768 384 0.3 sum_sqrt 7151 RNA:activated CD4-positive, alpha-beta memory T cell male adult (43 years) +7151 ENCFF632MAZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR694AWV/summary/coverage-.w5 768 384 0.3 sum_sqrt 7150 RNA:activated CD4-positive, alpha-beta memory T cell male adult (43 years) +7152 ENCFF940PJY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR696SMK/summary/coverage+.w5 768 384 0.3 sum_sqrt 7153 RNA:M059J +7153 ENCFF940PJY- /home/drk/tillage/datasets/human/rna/encode/ENCSR696SMK/summary/coverage-.w5 768 384 0.3 sum_sqrt 7152 RNA:M059J +7154 ENCFF221JZR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR696YIB/summary/coverage+.w5 768 384 0.3 sum_sqrt 7155 RNA:K562 insoluble cytoplasmic fraction +7155 ENCFF221JZR- /home/drk/tillage/datasets/human/rna/encode/ENCSR696YIB/summary/coverage-.w5 768 384 0.3 sum_sqrt 7154 RNA:K562 insoluble cytoplasmic fraction +7156 ENCFF730BTN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR697EYE/summary/coverage+.w5 768 384 0.3 sum_sqrt 7157 RNA:foreskin melanocyte male newborn +7157 ENCFF730BTN- /home/drk/tillage/datasets/human/rna/encode/ENCSR697EYE/summary/coverage-.w5 768 384 0.3 sum_sqrt 7156 RNA:foreskin melanocyte male newborn +7158 ENCFF087ORU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR698RPL/summary/coverage+.w5 768 384 0.3 sum_sqrt 7159 RNA:HCT116 +7159 ENCFF087ORU- /home/drk/tillage/datasets/human/rna/encode/ENCSR698RPL/summary/coverage-.w5 768 384 0.3 sum_sqrt 7158 RNA:HCT116 +7160 ENCFF594TMO /home/drk/tillage/datasets/human/rna/encode/ENCSR699YJR/summary/coverage.w5 768 384 0.3 sum_sqrt 7160 RNA:muscle of leg tissue female embryo (113 days) +7161 ENCFF101ASV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR700BEW/summary/coverage+.w5 768 384 0.3 sum_sqrt 7162 RNA:mesendoderm originated from H1 +7162 ENCFF101ASV- /home/drk/tillage/datasets/human/rna/encode/ENCSR700BEW/summary/coverage-.w5 768 384 0.3 sum_sqrt 7161 RNA:mesendoderm originated from H1 +7163 ENCFF610OAY /home/drk/tillage/datasets/human/rna/encode/ENCSR700QVJ/summary/coverage.w5 768 384 0.3 sum_sqrt 7163 RNA:spleen tissue male embryo (120 days) +7164 ENCFF930CLZ /home/drk/tillage/datasets/human/rna/encode/ENCSR701JXD/summary/coverage.w5 768 384 0.3 sum_sqrt 7164 RNA:with multiple sclerosis; immature natural killer cell +7165 ENCFF854EBH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR701TST/summary/coverage+.w5 768 384 0.3 sum_sqrt 7166 RNA:prostate gland tissue male adult (54 years) +7166 ENCFF854EBH- /home/drk/tillage/datasets/human/rna/encode/ENCSR701TST/summary/coverage-.w5 768 384 0.3 sum_sqrt 7165 RNA:prostate gland tissue male adult (54 years) +7167 ENCFF633OPC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR701UNO/summary/coverage+.w5 768 384 0.3 sum_sqrt 7168 RNA:trophoblast tissue embryo (22 weeks) +7168 ENCFF633OPC- /home/drk/tillage/datasets/human/rna/encode/ENCSR701UNO/summary/coverage-.w5 768 384 0.3 sum_sqrt 7167 RNA:trophoblast tissue embryo (22 weeks) +7169 ENCFF759QTQ /home/drk/tillage/datasets/human/rna/encode/ENCSR702IGQ/summary/coverage.w5 768 384 0.3 sum_sqrt 7169 RNA:stomach tissue female embryo (107 days) +7170 ENCFF629GCT /home/drk/tillage/datasets/human/rna/encode/ENCSR702IMR/summary/coverage.w5 768 384 0.3 sum_sqrt 7170 RNA:left kidney tissue male embryo (96 days) +7171 ENCFF777ZZR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR706XCG/summary/coverage+.w5 768 384 0.3 sum_sqrt 7172 RNA:placental basal plate tissue male embryo (16 weeks) +7172 ENCFF777ZZR- /home/drk/tillage/datasets/human/rna/encode/ENCSR706XCG/summary/coverage-.w5 768 384 0.3 sum_sqrt 7171 RNA:placental basal plate tissue male embryo (16 weeks) +7173 ENCFF911IWQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR708VVE/summary/coverage+.w5 768 384 0.3 sum_sqrt 7174 RNA:subcutaneous adipose tissue tissue male adult (37 years) +7174 ENCFF911IWQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR708VVE/summary/coverage-.w5 768 384 0.3 sum_sqrt 7173 RNA:subcutaneous adipose tissue tissue male adult (37 years) +7175 ENCFF771JBV /home/drk/tillage/datasets/human/rna/encode/ENCSR711NGL/summary/coverage.w5 768 384 0.3 sum_sqrt 7175 RNA:forelimb muscle tissue female embryo (108 days) +7176 ENCFF920HIF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR712BRU/summary/coverage+.w5 768 384 0.3 sum_sqrt 7177 RNA:H9 +7177 ENCFF920HIF- /home/drk/tillage/datasets/human/rna/encode/ENCSR712BRU/summary/coverage-.w5 768 384 0.3 sum_sqrt 7176 RNA:H9 +7178 ENCFF811FXS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR712GOC/summary/coverage+.w5 768 384 0.3 sum_sqrt 7179 RNA:H1 +7179 ENCFF811FXS- /home/drk/tillage/datasets/human/rna/encode/ENCSR712GOC/summary/coverage-.w5 768 384 0.3 sum_sqrt 7178 RNA:H1 +7180 ENCFF970CQA /home/drk/tillage/datasets/human/rna/encode/ENCSR713DXI/summary/coverage.w5 768 384 0.3 sum_sqrt 7180 RNA:IgD-negative memory B cell +7181 ENCFF112IXW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR714KDG/summary/coverage+.w5 768 384 0.3 sum_sqrt 7182 RNA:liver tissue male child (3 years) +7182 ENCFF112IXW- /home/drk/tillage/datasets/human/rna/encode/ENCSR714KDG/summary/coverage-.w5 768 384 0.3 sum_sqrt 7181 RNA:liver tissue male child (3 years) +7183 ENCFF155PWD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR714QAF/summary/coverage+.w5 768 384 0.3 sum_sqrt 7184 RNA:mole tissue female +7184 ENCFF155PWD- /home/drk/tillage/datasets/human/rna/encode/ENCSR714QAF/summary/coverage-.w5 768 384 0.3 sum_sqrt 7183 RNA:mole tissue female +7185 ENCFF697OJB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR718CDN/summary/coverage+.w5 768 384 0.3 sum_sqrt 7186 RNA:adipose tissue tissue male child (3 years) +7186 ENCFF697OJB- /home/drk/tillage/datasets/human/rna/encode/ENCSR718CDN/summary/coverage-.w5 768 384 0.3 sum_sqrt 7185 RNA:adipose tissue tissue male child (3 years) +7187 ENCFF350OVE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR718RTN/summary/coverage+.w5 768 384 0.3 sum_sqrt 7188 RNA:lower lobe of left lung tissue female adult (59 years) +7188 ENCFF350OVE- /home/drk/tillage/datasets/human/rna/encode/ENCSR718RTN/summary/coverage-.w5 768 384 0.3 sum_sqrt 7187 RNA:lower lobe of left lung tissue female adult (59 years) +7189 ENCFF481AMD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR719HRO/summary/coverage+.w5 768 384 0.6390034863451481 sum_sqrt 7190 RNA:small intestine tissue male adult (34 years) +7190 ENCFF481AMD- /home/drk/tillage/datasets/human/rna/encode/ENCSR719HRO/summary/coverage-.w5 768 384 0.6390034863451481 sum_sqrt 7189 RNA:small intestine tissue male adult (34 years) +7191 ENCFF597SVD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR719PXC/summary/coverage+.w5 768 384 0.3 sum_sqrt 7192 RNA:ascending aorta tissue female adult (53 years) +7192 ENCFF597SVD- /home/drk/tillage/datasets/human/rna/encode/ENCSR719PXC/summary/coverage-.w5 768 384 0.3 sum_sqrt 7191 RNA:ascending aorta tissue female adult (53 years) +7193 ENCFF504JQU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR721HDG/summary/coverage+.w5 768 384 0.3 sum_sqrt 7194 RNA:stomach tissue male adult (34 years) +7194 ENCFF504JQU- /home/drk/tillage/datasets/human/rna/encode/ENCSR721HDG/summary/coverage-.w5 768 384 0.3 sum_sqrt 7193 RNA:stomach tissue male adult (34 years) +7195 ENCFF751PQP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR725TPW/summary/coverage+.w5 768 384 0.3 sum_sqrt 7196 RNA:ovary tissue female adult (30 years) +7196 ENCFF751PQP- /home/drk/tillage/datasets/human/rna/encode/ENCSR725TPW/summary/coverage-.w5 768 384 0.3 sum_sqrt 7195 RNA:ovary tissue female adult (30 years) +7197 ENCFF753LZJ /home/drk/tillage/datasets/human/rna/encode/ENCSR727VTD/summary/coverage.w5 768 384 0.3 sum_sqrt 7197 RNA:ovary tissue female embryo +7198 ENCFF900XSJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR729CAZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7199 RNA:omental fat pad tissue male adult (37 years) +7199 ENCFF900XSJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR729CAZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7198 RNA:omental fat pad tissue male adult (37 years) +7200 ENCFF590WBY /home/drk/tillage/datasets/human/rna/encode/ENCSR729ZII/summary/coverage.w5 768 384 0.3 sum_sqrt 7200 RNA:muscle of back tissue male embryo (91 days) +7201 ENCFF225CYB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR733JBX/summary/coverage+.w5 768 384 0.3 sum_sqrt 7202 RNA:progenitor cell of endocrine pancreas +7202 ENCFF225CYB- /home/drk/tillage/datasets/human/rna/encode/ENCSR733JBX/summary/coverage-.w5 768 384 0.3 sum_sqrt 7201 RNA:progenitor cell of endocrine pancreas +7203 ENCFF457QEG /home/drk/tillage/datasets/human/rna/encode/ENCSR733MWN/summary/coverage.w5 768 384 0.3 sum_sqrt 7203 RNA:left lung tissue male embryo (91 days) +7204 ENCFF817LFV /home/drk/tillage/datasets/human/rna/encode/ENCSR733QST/summary/coverage.w5 768 384 0.3 sum_sqrt 7204 RNA:K562 treated with 7.5 nM Panobinostat for 24 hours +7205 ENCFF381OAF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR735JKB/summary/coverage+.w5 768 384 0.3 sum_sqrt 7206 RNA:HFFc6 +7206 ENCFF381OAF- /home/drk/tillage/datasets/human/rna/encode/ENCSR735JKB/summary/coverage-.w5 768 384 0.3 sum_sqrt 7205 RNA:HFFc6 +7207 ENCFF629VCP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR738PHQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7208 RNA:HUES64 +7208 ENCFF629VCP- /home/drk/tillage/datasets/human/rna/encode/ENCSR738PHQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7207 RNA:HUES64 +7209 ENCFF274KQB /home/drk/tillage/datasets/human/rna/encode/ENCSR738ZHN/summary/coverage.w5 768 384 0.3 sum_sqrt 7209 RNA:K562 treated with 10 uM Crizotinib for 12 hours +7210 ENCFF558ZAJ /home/drk/tillage/datasets/human/rna/encode/ENCSR740OPV/summary/coverage.w5 768 384 0.3 sum_sqrt 7210 RNA:adrenal gland tissue female embryo (108 days) +7211 ENCFF553LFH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR740YMS/summary/coverage+.w5 768 384 0.3 sum_sqrt 7212 RNA:gastroesophageal sphincter tissue female adult (53 years) +7212 ENCFF553LFH- /home/drk/tillage/datasets/human/rna/encode/ENCSR740YMS/summary/coverage-.w5 768 384 0.3 sum_sqrt 7211 RNA:gastroesophageal sphincter tissue female adult (53 years) +7213 ENCFF056UVT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR741MLF/summary/coverage+.w5 768 384 0.3 sum_sqrt 7214 RNA:chorion tissue male embryo (16 weeks) +7214 ENCFF056UVT- /home/drk/tillage/datasets/human/rna/encode/ENCSR741MLF/summary/coverage-.w5 768 384 0.3 sum_sqrt 7213 RNA:chorion tissue male embryo (16 weeks) +7215 ENCFF206OGU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR741QEH/summary/coverage+.w5 768 384 0.3480142405063291 sum_sqrt 7216 RNA:adipose tissue tissue male adult (34 years) +7216 ENCFF206OGU- /home/drk/tillage/datasets/human/rna/encode/ENCSR741QEH/summary/coverage-.w5 768 384 0.3480142405063291 sum_sqrt 7215 RNA:adipose tissue tissue male adult (34 years) +7217 ENCFF682NDW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR743GKS/summary/coverage+.w5 768 384 0.3 sum_sqrt 7218 RNA:PC-9 +7218 ENCFF682NDW- /home/drk/tillage/datasets/human/rna/encode/ENCSR743GKS/summary/coverage-.w5 768 384 0.3 sum_sqrt 7217 RNA:PC-9 +7219 ENCFF689BPI /home/drk/tillage/datasets/human/rna/encode/ENCSR748GVH/summary/coverage.w5 768 384 0.3 sum_sqrt 7219 RNA:GM23338 originated from GM23248 +7220 ENCFF008POE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR750ETS/summary/coverage+.w5 768 384 0.3 sum_sqrt 7221 RNA:esophagus muscularis mucosa tissue male adult (54 years) +7221 ENCFF008POE- /home/drk/tillage/datasets/human/rna/encode/ENCSR750ETS/summary/coverage-.w5 768 384 0.3 sum_sqrt 7220 RNA:esophagus muscularis mucosa tissue male adult (54 years) +7222 ENCFF056FRZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR752UNJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7223 RNA:stomach tissue female adult (53 years) +7223 ENCFF056FRZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR752UNJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7222 RNA:stomach tissue female adult (53 years) +7224 ENCFF349IKE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR754WLW/summary/coverage+.w5 768 384 0.3 sum_sqrt 7225 RNA:adrenal gland tissue female adult (53 years) +7225 ENCFF349IKE- /home/drk/tillage/datasets/human/rna/encode/ENCSR754WLW/summary/coverage-.w5 768 384 0.3 sum_sqrt 7224 RNA:adrenal gland tissue female adult (53 years) +7226 ENCFF012MMA /home/drk/tillage/datasets/human/rna/encode/ENCSR755ARW/summary/coverage.w5 768 384 0.3 sum_sqrt 7226 RNA:naive thymus-derived CD4-positive, alpha-beta T cell +7227 ENCFF193HUI /home/drk/tillage/datasets/human/rna/encode/ENCSR755LFM/summary/coverage.w5 768 384 0.3 sum_sqrt 7227 RNA:testis tissue male embryo +7228 ENCFF988LZV /home/drk/tillage/datasets/human/rna/encode/ENCSR758BAT/summary/coverage.w5 768 384 0.3 sum_sqrt 7228 RNA:K562 treated with 50 uM C646 for 4 hours +7229 ENCFF541ZMB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR759TPN/summary/coverage+.w5 768 384 0.3 sum_sqrt 7230 RNA:left colon tissue female adult (59 years) +7230 ENCFF541ZMB- /home/drk/tillage/datasets/human/rna/encode/ENCSR759TPN/summary/coverage-.w5 768 384 0.3 sum_sqrt 7229 RNA:left colon tissue female adult (59 years) +7231 ENCFF099RCL /home/drk/tillage/datasets/human/rna/encode/ENCSR759WPF/summary/coverage.w5 768 384 0.3 sum_sqrt 7231 RNA:left renal cortex interstitium tissue male embryo (120 days) +7232 ENCFF024MEB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR761SHI/summary/coverage+.w5 768 384 0.3 sum_sqrt 7233 RNA:neural crest cell +7233 ENCFF024MEB- /home/drk/tillage/datasets/human/rna/encode/ENCSR761SHI/summary/coverage-.w5 768 384 0.3 sum_sqrt 7232 RNA:neural crest cell +7234 ENCFF498CLL /home/drk/tillage/datasets/human/rna/encode/ENCSR761ZWF/summary/coverage.w5 768 384 0.3 sum_sqrt 7234 RNA:with multiple sclerosis; CD4-positive, CD25-positive, alpha-beta regulatory T cell +7235 ENCFF382VRE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR762CJN/summary/coverage+.w5 768 384 0.3 sum_sqrt 7236 RNA:trophoblast cell originated from H1 +7236 ENCFF382VRE- /home/drk/tillage/datasets/human/rna/encode/ENCSR762CJN/summary/coverage-.w5 768 384 0.3 sum_sqrt 7235 RNA:trophoblast cell originated from H1 +7237 ENCFF805SZS /home/drk/tillage/datasets/human/rna/encode/ENCSR763CQW/summary/coverage.w5 768 384 0.3 sum_sqrt 7237 RNA:T-cell male adult (37 years) +7238 ENCFF386YGS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR763NOO/summary/coverage+.w5 768 384 0.4277421236872812 sum_sqrt 7239 RNA:aorta tissue male adult (34 years) +7239 ENCFF386YGS- /home/drk/tillage/datasets/human/rna/encode/ENCSR763NOO/summary/coverage-.w5 768 384 0.4277421236872812 sum_sqrt 7238 RNA:aorta tissue male adult (34 years) +7240 ENCFF133LYJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR763OMY/summary/coverage+.w5 768 384 0.3 sum_sqrt 7241 RNA:adrenal gland tissue female adult (41 years) +7241 ENCFF133LYJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR763OMY/summary/coverage-.w5 768 384 0.3 sum_sqrt 7240 RNA:adrenal gland tissue female adult (41 years) +7242 ENCFF343JCX /home/drk/tillage/datasets/human/rna/encode/ENCSR764EGD/summary/coverage.w5 768 384 0.3 sum_sqrt 7242 RNA:with multiple sclerosis; CD8-positive, alpha-beta memory T cell +7243 ENCFF366WMR /home/drk/tillage/datasets/human/rna/encode/ENCSR766TSI/summary/coverage.w5 768 384 0.3 sum_sqrt 7243 RNA:with multiple sclerosis; immature natural killer cell +7244 ENCFF851XAJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR769LNJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7245 RNA:heart left ventricle tissue male adult (34 years) +7245 ENCFF851XAJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR769LNJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7244 RNA:heart left ventricle tissue male adult (34 years) +7246 ENCFF270YAA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR773COB/summary/coverage+.w5 768 384 0.3 sum_sqrt 7247 RNA:left colon tissue female adult (46 years) +7247 ENCFF270YAA- /home/drk/tillage/datasets/human/rna/encode/ENCSR773COB/summary/coverage-.w5 768 384 0.3 sum_sqrt 7246 RNA:left colon tissue female adult (46 years) +7248 ENCFF958WEW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR774MGO/summary/coverage+.w5 768 384 0.3 sum_sqrt 7249 RNA:chondrocyte +7249 ENCFF958WEW- /home/drk/tillage/datasets/human/rna/encode/ENCSR774MGO/summary/coverage-.w5 768 384 0.3 sum_sqrt 7248 RNA:chondrocyte +7250 ENCFF295UWT /home/drk/tillage/datasets/human/rna/encode/ENCSR774SEX/summary/coverage.w5 768 384 0.3 sum_sqrt 7250 RNA:stomach tissue embryo (101 days) +7251 ENCFF502RUV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR775KCE/summary/coverage+.w5 768 384 0.3 sum_sqrt 7252 RNA:thymus tissue male child (3 years) +7252 ENCFF502RUV- /home/drk/tillage/datasets/human/rna/encode/ENCSR775KCE/summary/coverage-.w5 768 384 0.3 sum_sqrt 7251 RNA:thymus tissue male child (3 years) +7253 ENCFF259EZH /home/drk/tillage/datasets/human/rna/encode/ENCSR777ONH/summary/coverage.w5 768 384 0.3 sum_sqrt 7253 RNA:large intestine tissue male embryo (91 days) +7254 ENCFF462CKY+ /home/drk/tillage/datasets/human/rna/encode/ENCSR777TBF/summary/coverage+.w5 768 384 0.3 sum_sqrt 7255 RNA:activated CD4-positive, alpha-beta memory T cell male adult (43 years) +7255 ENCFF462CKY- /home/drk/tillage/datasets/human/rna/encode/ENCSR777TBF/summary/coverage-.w5 768 384 0.3 sum_sqrt 7254 RNA:activated CD4-positive, alpha-beta memory T cell male adult (43 years) +7256 ENCFF813JFQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR779ZJW/summary/coverage+.w5 768 384 0.3 sum_sqrt 7257 RNA:trophoblast tissue male embryo (16 weeks) +7257 ENCFF813JFQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR779ZJW/summary/coverage-.w5 768 384 0.3 sum_sqrt 7256 RNA:trophoblast tissue male embryo (16 weeks) +7258 ENCFF333ENA /home/drk/tillage/datasets/human/rna/encode/ENCSR783BUO/summary/coverage.w5 768 384 0.3 sum_sqrt 7258 RNA:stomach tissue female embryo (98 days) +7259 ENCFF924CJI /home/drk/tillage/datasets/human/rna/encode/ENCSR783SSV/summary/coverage.w5 768 384 0.3 sum_sqrt 7259 RNA:with multiple sclerosis; naive B cell +7260 ENCFF734UPN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR784PDN/summary/coverage+.w5 768 384 0.3 sum_sqrt 7261 RNA:amnion tissue male embryo (16 weeks) +7261 ENCFF734UPN- /home/drk/tillage/datasets/human/rna/encode/ENCSR784PDN/summary/coverage-.w5 768 384 0.3 sum_sqrt 7260 RNA:amnion tissue male embryo (16 weeks) +7262 ENCFF050ROK /home/drk/tillage/datasets/human/rna/encode/ENCSR785OCX/summary/coverage.w5 768 384 0.3 sum_sqrt 7262 RNA:muscle of arm tissue female embryo (85 days) +7263 ENCFF312ZLI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR792OIJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7264 RNA:K562 +7264 ENCFF312ZLI- /home/drk/tillage/datasets/human/rna/encode/ENCSR792OIJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7263 RNA:K562 +7265 ENCFF794ZUR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR793CEK/summary/coverage+.w5 768 384 0.3 sum_sqrt 7266 RNA:placental basal plate tissue male embryo (38 weeks) +7266 ENCFF794ZUR- /home/drk/tillage/datasets/human/rna/encode/ENCSR793CEK/summary/coverage-.w5 768 384 0.3 sum_sqrt 7265 RNA:placental basal plate tissue male embryo (38 weeks) +7267 ENCFF145JTX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR795GYH/summary/coverage+.w5 768 384 0.3 sum_sqrt 7268 RNA:middle frontal area 46 tissue female adult (85 years) +7268 ENCFF145JTX- /home/drk/tillage/datasets/human/rna/encode/ENCSR795GYH/summary/coverage-.w5 768 384 0.3 sum_sqrt 7267 RNA:middle frontal area 46 tissue female adult (85 years) +7269 ENCFF532KPO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR796HLX/summary/coverage+.w5 768 384 0.3 sum_sqrt 7270 RNA:tibial nerve tissue male adult (37 years) +7270 ENCFF532KPO- /home/drk/tillage/datasets/human/rna/encode/ENCSR796HLX/summary/coverage-.w5 768 384 0.3 sum_sqrt 7269 RNA:tibial nerve tissue male adult (37 years) +7271 ENCFF321EHO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR797BPP/summary/coverage+.w5 768 384 0.3 sum_sqrt 7272 RNA:GM23248 +7272 ENCFF321EHO- /home/drk/tillage/datasets/human/rna/encode/ENCSR797BPP/summary/coverage-.w5 768 384 0.3 sum_sqrt 7271 RNA:GM23248 +7273 ENCFF722TDX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR797RXV/summary/coverage+.w5 768 384 0.3 sum_sqrt 7274 RNA:IMR-90 +7274 ENCFF722TDX- /home/drk/tillage/datasets/human/rna/encode/ENCSR797RXV/summary/coverage-.w5 768 384 0.3 sum_sqrt 7273 RNA:IMR-90 +7275 ENCFF542OVF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR800WIY/summary/coverage+.w5 768 384 0.3 sum_sqrt 7276 RNA:transverse colon tissue female adult (53 years) +7276 ENCFF542OVF- /home/drk/tillage/datasets/human/rna/encode/ENCSR800WIY/summary/coverage-.w5 768 384 0.3 sum_sqrt 7275 RNA:transverse colon tissue female adult (53 years) +7277 ENCFF718DUW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR801MKV/summary/coverage+.w5 768 384 0.3 sum_sqrt 7278 RNA:adrenal gland tissue female adult (51 years) +7278 ENCFF718DUW- /home/drk/tillage/datasets/human/rna/encode/ENCSR801MKV/summary/coverage-.w5 768 384 0.3 sum_sqrt 7277 RNA:adrenal gland tissue female adult (51 years) +7279 ENCFF980FZG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR802HPM/summary/coverage+.w5 768 384 0.3 sum_sqrt 7280 RNA:Peyer's patch tissue male adult (37 years) +7280 ENCFF980FZG- /home/drk/tillage/datasets/human/rna/encode/ENCSR802HPM/summary/coverage-.w5 768 384 0.3 sum_sqrt 7279 RNA:Peyer's patch tissue male adult (37 years) +7281 ENCFF875RMF /home/drk/tillage/datasets/human/rna/encode/ENCSR804NGX/summary/coverage.w5 768 384 0.3 sum_sqrt 7281 RNA:with multiple sclerosis; naive B cell +7282 ENCFF483IQJ /home/drk/tillage/datasets/human/rna/encode/ENCSR806ESH/summary/coverage.w5 768 384 0.3 sum_sqrt 7282 RNA:muscle of back tissue female embryo (98 days) +7283 ENCFF466NAU /home/drk/tillage/datasets/human/rna/encode/ENCSR806HCA/summary/coverage.w5 768 384 0.3 sum_sqrt 7283 RNA:K562 treated with 100 nM GSK J4 for 12 hours +7284 ENCFF347OLB /home/drk/tillage/datasets/human/rna/encode/ENCSR810ZKJ/summary/coverage.w5 768 384 0.3 sum_sqrt 7284 RNA:K562 treated with 5 uM JQ1 for 24 hours +7285 ENCFF372DGR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR812AKX/summary/coverage+.w5 768 384 0.3 sum_sqrt 7286 RNA:sigmoid colon tissue male adult (54 years) +7286 ENCFF372DGR- /home/drk/tillage/datasets/human/rna/encode/ENCSR812AKX/summary/coverage-.w5 768 384 0.3 sum_sqrt 7285 RNA:sigmoid colon tissue male adult (54 years) +7287 ENCFF495AAX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR813BDU/summary/coverage+.w5 768 384 0.3 sum_sqrt 7288 RNA:HepG2 insoluble cytoplasmic fraction +7288 ENCFF495AAX- /home/drk/tillage/datasets/human/rna/encode/ENCSR813BDU/summary/coverage-.w5 768 384 0.3 sum_sqrt 7287 RNA:HepG2 insoluble cytoplasmic fraction +7289 ENCFF458ASG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR815NTL/summary/coverage+.w5 768 384 0.3 sum_sqrt 7290 RNA:MCF 10A +7290 ENCFF458ASG- /home/drk/tillage/datasets/human/rna/encode/ENCSR815NTL/summary/coverage-.w5 768 384 0.3 sum_sqrt 7289 RNA:MCF 10A +7291 ENCFF818BRP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR815UVL/summary/coverage+.w5 768 384 0.3 sum_sqrt 7292 RNA:mammary microvascular endothelial cell female adult (26 years) +7292 ENCFF818BRP- /home/drk/tillage/datasets/human/rna/encode/ENCSR815UVL/summary/coverage-.w5 768 384 0.3 sum_sqrt 7291 RNA:mammary microvascular endothelial cell female adult (26 years) +7293 ENCFF573JBK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR816HLU/summary/coverage+.w5 768 384 0.3 sum_sqrt 7294 RNA:left lung tissue male adult (40 years) +7294 ENCFF573JBK- /home/drk/tillage/datasets/human/rna/encode/ENCSR816HLU/summary/coverage-.w5 768 384 0.3 sum_sqrt 7293 RNA:left lung tissue male adult (40 years) +7295 ENCFF740HBZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR816IZA/summary/coverage+.w5 768 384 0.3 sum_sqrt 7296 RNA:upper lobe of right lung tissue male adult (60 years) +7296 ENCFF740HBZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR816IZA/summary/coverage-.w5 768 384 0.3 sum_sqrt 7295 RNA:upper lobe of right lung tissue male adult (60 years) +7297 ENCFF903PTH /home/drk/tillage/datasets/human/rna/encode/ENCSR816TAY/summary/coverage.w5 768 384 0.3 sum_sqrt 7297 RNA:CD4-positive, CD25-positive, alpha-beta regulatory T cell +7298 ENCFF711RSC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR817TLH/summary/coverage+.w5 768 384 0.3 sum_sqrt 7299 RNA:psoas muscle tissue male child (3 years) +7299 ENCFF711RSC- /home/drk/tillage/datasets/human/rna/encode/ENCSR817TLH/summary/coverage-.w5 768 384 0.3 sum_sqrt 7298 RNA:psoas muscle tissue male child (3 years) +7300 ENCFF840DDQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR817WHQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7301 RNA:activated CD8-positive, alpha-beta memory T cell male adult (30 years) +7301 ENCFF840DDQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR817WHQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7300 RNA:activated CD8-positive, alpha-beta memory T cell male adult (30 years) +7302 ENCFF403PZE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR818DBU/summary/coverage+.w5 768 384 0.3 sum_sqrt 7303 RNA:cardiac septum tissue female adult (41 years) +7303 ENCFF403PZE- /home/drk/tillage/datasets/human/rna/encode/ENCSR818DBU/summary/coverage-.w5 768 384 0.3 sum_sqrt 7302 RNA:cardiac septum tissue female adult (41 years) +7304 ENCFF045JED+ /home/drk/tillage/datasets/human/rna/encode/ENCSR820IIL/summary/coverage+.w5 768 384 0.3 sum_sqrt 7305 RNA:activated naive CD8-positive, alpha-beta T cell male adult (30 years) +7305 ENCFF045JED- /home/drk/tillage/datasets/human/rna/encode/ENCSR820IIL/summary/coverage-.w5 768 384 0.3 sum_sqrt 7304 RNA:activated naive CD8-positive, alpha-beta T cell male adult (30 years) +7306 ENCFF509KPQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR820PHH/summary/coverage+.w5 768 384 0.3 sum_sqrt 7307 RNA:GM12878 +7307 ENCFF509KPQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR820PHH/summary/coverage-.w5 768 384 0.3 sum_sqrt 7306 RNA:GM12878 +7308 ENCFF745TWT /home/drk/tillage/datasets/human/rna/encode/ENCSR822AOE/summary/coverage.w5 768 384 0.3 sum_sqrt 7308 RNA:right renal cortex interstitium tissue male embryo (105 days) +7309 ENCFF595CZA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR822SUG/summary/coverage+.w5 768 384 0.3 sum_sqrt 7310 RNA:airway epithelial cell +7310 ENCFF595CZA- /home/drk/tillage/datasets/human/rna/encode/ENCSR822SUG/summary/coverage-.w5 768 384 0.3 sum_sqrt 7309 RNA:airway epithelial cell +7311 ENCFF929RPC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR825GWD/summary/coverage+.w5 768 384 2.003142076502732 sum_sqrt 7312 RNA:sigmoid colon tissue female adult (30 years) +7312 ENCFF929RPC- /home/drk/tillage/datasets/human/rna/encode/ENCSR825GWD/summary/coverage-.w5 768 384 2.003142076502732 sum_sqrt 7311 RNA:sigmoid colon tissue female adult (30 years) +7313 ENCFF577ZXA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR825UXP/summary/coverage+.w5 768 384 0.3 sum_sqrt 7314 RNA:lower lobe of right lung tissue male adult (60 years) +7314 ENCFF577ZXA- /home/drk/tillage/datasets/human/rna/encode/ENCSR825UXP/summary/coverage-.w5 768 384 0.3 sum_sqrt 7313 RNA:lower lobe of right lung tissue male adult (60 years) +7315 ENCFF118XUB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR827IXS/summary/coverage+.w5 768 384 0.3 sum_sqrt 7316 RNA:sigmoid colon tissue female adult (51 years) +7316 ENCFF118XUB- /home/drk/tillage/datasets/human/rna/encode/ENCSR827IXS/summary/coverage-.w5 768 384 0.3 sum_sqrt 7315 RNA:sigmoid colon tissue female adult (51 years) +7317 ENCFF589DYO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR828TEI/summary/coverage+.w5 768 384 0.3 sum_sqrt 7318 RNA:myotube originated from skeletal muscle myoblast +7318 ENCFF589DYO- /home/drk/tillage/datasets/human/rna/encode/ENCSR828TEI/summary/coverage-.w5 768 384 0.3 sum_sqrt 7317 RNA:myotube originated from skeletal muscle myoblast +7319 ENCFF333QAU /home/drk/tillage/datasets/human/rna/encode/ENCSR830HIN/summary/coverage.w5 768 384 0.3 sum_sqrt 7319 RNA:common myeloid progenitor, CD34-positive female adult (33 years) +7320 ENCFF553FQR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR837VMK/summary/coverage+.w5 768 384 0.3 sum_sqrt 7321 RNA:heart left ventricle tissue female adult (46 years) +7321 ENCFF553FQR- /home/drk/tillage/datasets/human/rna/encode/ENCSR837VMK/summary/coverage-.w5 768 384 0.3 sum_sqrt 7320 RNA:heart left ventricle tissue female adult (46 years) +7322 ENCFF512AIP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR837ZLY/summary/coverage+.w5 768 384 0.3 sum_sqrt 7323 RNA:thoracic aorta tissue male adult (54 years) +7323 ENCFF512AIP- /home/drk/tillage/datasets/human/rna/encode/ENCSR837ZLY/summary/coverage-.w5 768 384 0.3 sum_sqrt 7322 RNA:thoracic aorta tissue male adult (54 years) +7324 ENCFF668DGV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR838XNO/summary/coverage+.w5 768 384 0.3 sum_sqrt 7325 RNA:mesenteric fat pad tissue female adult (59 years) +7325 ENCFF668DGV- /home/drk/tillage/datasets/human/rna/encode/ENCSR838XNO/summary/coverage-.w5 768 384 0.3 sum_sqrt 7324 RNA:mesenteric fat pad tissue female adult (59 years) +7326 ENCFF237XXC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR839ZDH/summary/coverage+.w5 768 384 0.3 sum_sqrt 7327 RNA:upper lobe of left lung tissue male adult (54 years) +7327 ENCFF237XXC- /home/drk/tillage/datasets/human/rna/encode/ENCSR839ZDH/summary/coverage-.w5 768 384 0.3 sum_sqrt 7326 RNA:upper lobe of left lung tissue male adult (54 years) +7328 ENCFF368TTD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR841ADZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7329 RNA:ovary tissue female adult (53 years) +7329 ENCFF368TTD- /home/drk/tillage/datasets/human/rna/encode/ENCSR841ADZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7328 RNA:ovary tissue female adult (53 years) +7330 ENCFF863HRX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR841QAC/summary/coverage+.w5 768 384 0.3 sum_sqrt 7331 RNA:HFFc6 +7331 ENCFF863HRX- /home/drk/tillage/datasets/human/rna/encode/ENCSR841QAC/summary/coverage-.w5 768 384 0.3 sum_sqrt 7330 RNA:HFFc6 +7332 ENCFF999ARF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR843HXR/summary/coverage+.w5 768 384 0.3 sum_sqrt 7333 RNA:psoas muscle tissue male adult (34 years) +7333 ENCFF999ARF- /home/drk/tillage/datasets/human/rna/encode/ENCSR843HXR/summary/coverage-.w5 768 384 0.3 sum_sqrt 7332 RNA:psoas muscle tissue male adult (34 years) +7334 ENCFF463TDW /home/drk/tillage/datasets/human/rna/encode/ENCSR843RJV/summary/coverage.w5 768 384 0.3 sum_sqrt 7334 RNA:GM12878 +7335 ENCFF685VLN /home/drk/tillage/datasets/human/rna/encode/ENCSR844RSF/summary/coverage.w5 768 384 0.3 sum_sqrt 7335 RNA:K562 treated with 7.5 nM Vorinostat for 24 hours +7336 ENCFF203UBD+ /home/drk/tillage/datasets/human/rna/encode/ENCSR844SCP/summary/coverage+.w5 768 384 0.3 sum_sqrt 7337 RNA:activated T-cell male adult (38 years) treated with 50 U/mL Interleukin-2 for 4 hours, anti-CD3 and anti-CD28 coated beads for 4 hours +7337 ENCFF203UBD- /home/drk/tillage/datasets/human/rna/encode/ENCSR844SCP/summary/coverage-.w5 768 384 0.3 sum_sqrt 7336 RNA:activated T-cell male adult (38 years) treated with 50 U/mL Interleukin-2 for 4 hours, anti-CD3 and anti-CD28 coated beads for 4 hours +7338 ENCFF918AOG /home/drk/tillage/datasets/human/rna/encode/ENCSR849RVP/summary/coverage.w5 768 384 0.3 sum_sqrt 7338 RNA:with multiple sclerosis; CD8-positive, alpha-beta memory T cell +7339 ENCFF255CRM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR851BRK/summary/coverage+.w5 768 384 0.3 sum_sqrt 7340 RNA:ectodermal cell originated from HUES64 +7340 ENCFF255CRM- /home/drk/tillage/datasets/human/rna/encode/ENCSR851BRK/summary/coverage-.w5 768 384 0.3 sum_sqrt 7339 RNA:ectodermal cell originated from HUES64 +7341 ENCFF734OLC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR851GNB/summary/coverage+.w5 768 384 0.3 sum_sqrt 7342 RNA:foreskin fibroblast male newborn +7342 ENCFF734OLC- /home/drk/tillage/datasets/human/rna/encode/ENCSR851GNB/summary/coverage-.w5 768 384 0.3 sum_sqrt 7341 RNA:foreskin fibroblast male newborn +7343 ENCFF563AJC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR853BNH/summary/coverage+.w5 768 384 0.3 sum_sqrt 7344 RNA:gastrocnemius medialis tissue male adult (37 years) +7344 ENCFF563AJC- /home/drk/tillage/datasets/human/rna/encode/ENCSR853BNH/summary/coverage-.w5 768 384 0.3 sum_sqrt 7343 RNA:gastrocnemius medialis tissue male adult (37 years) +7345 ENCFF756IDW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR853TXT/summary/coverage+.w5 768 384 0.3 sum_sqrt 7346 RNA:right cardiac atrium tissue male adult (60 years) +7346 ENCFF756IDW- /home/drk/tillage/datasets/human/rna/encode/ENCSR853TXT/summary/coverage-.w5 768 384 0.3 sum_sqrt 7345 RNA:right cardiac atrium tissue male adult (60 years) +7347 ENCFF818TKM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR853WOM/summary/coverage+.w5 768 384 0.3 sum_sqrt 7348 RNA:stomach tissue female adult (51 years) +7348 ENCFF818TKM- /home/drk/tillage/datasets/human/rna/encode/ENCSR853WOM/summary/coverage-.w5 768 384 0.3 sum_sqrt 7347 RNA:stomach tissue female adult (51 years) +7349 ENCFF918QTH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR856VAD/summary/coverage+.w5 768 384 0.3 sum_sqrt 7350 RNA:activated CD4-positive, alpha-beta memory T cell male adult (43 years) +7350 ENCFF918QTH- /home/drk/tillage/datasets/human/rna/encode/ENCSR856VAD/summary/coverage-.w5 768 384 0.3 sum_sqrt 7349 RNA:activated CD4-positive, alpha-beta memory T cell male adult (43 years) +7351 ENCFF444MUM /home/drk/tillage/datasets/human/rna/encode/ENCSR857VKL/summary/coverage.w5 768 384 0.3 sum_sqrt 7351 RNA:large intestine tissue female embryo (98 days) +7352 ENCFF593EJA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR857WJK/summary/coverage+.w5 768 384 0.3 sum_sqrt 7353 RNA:sigmoid colon tissue male adult (37 years) +7353 ENCFF593EJA- /home/drk/tillage/datasets/human/rna/encode/ENCSR857WJK/summary/coverage-.w5 768 384 0.3 sum_sqrt 7352 RNA:sigmoid colon tissue male adult (37 years) +7354 ENCFF108RDF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR858QEL/summary/coverage+.w5 768 384 0.3 sum_sqrt 7355 RNA:tibial nerve tissue female adult (53 years) +7355 ENCFF108RDF- /home/drk/tillage/datasets/human/rna/encode/ENCSR858QEL/summary/coverage-.w5 768 384 0.3 sum_sqrt 7354 RNA:tibial nerve tissue female adult (53 years) +7356 ENCFF238NVT /home/drk/tillage/datasets/human/rna/encode/ENCSR859HWB/summary/coverage.w5 768 384 0.3 sum_sqrt 7356 RNA:hepatocyte originated from H9 +7357 ENCFF946QQD /home/drk/tillage/datasets/human/rna/encode/ENCSR859KGW/summary/coverage.w5 768 384 0.3 sum_sqrt 7357 RNA:large intestine tissue female embryo (85 days) +7358 ENCFF490SYS /home/drk/tillage/datasets/human/rna/encode/ENCSR860DST/summary/coverage.w5 768 384 0.3 sum_sqrt 7358 RNA:muscle of leg tissue male embryo (96 days) +7359 ENCFF837YDW+ /home/drk/tillage/datasets/human/rna/encode/ENCSR860DWK/summary/coverage+.w5 768 384 0.3 sum_sqrt 7360 RNA:K562 cytosolic fraction +7360 ENCFF837YDW- /home/drk/tillage/datasets/human/rna/encode/ENCSR860DWK/summary/coverage-.w5 768 384 0.3 sum_sqrt 7359 RNA:K562 cytosolic fraction +7361 ENCFF300QBN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR860HAA/summary/coverage+.w5 768 384 0.3 sum_sqrt 7362 RNA:mammary epithelial cell female adult (17 years) +7362 ENCFF300QBN- /home/drk/tillage/datasets/human/rna/encode/ENCSR860HAA/summary/coverage-.w5 768 384 0.3 sum_sqrt 7361 RNA:mammary epithelial cell female adult (17 years) +7363 ENCFF104DTS /home/drk/tillage/datasets/human/rna/encode/ENCSR861QKF/summary/coverage.w5 768 384 0.3 sum_sqrt 7363 RNA:CD8-positive, alpha-beta T cell male adult (37 years) +7364 ENCFF551UCO /home/drk/tillage/datasets/human/rna/encode/ENCSR861SOG/summary/coverage.w5 768 384 0.3 sum_sqrt 7364 RNA:left lung tissue female embryo (108 days) +7365 ENCFF853VJV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR862HPO/summary/coverage+.w5 768 384 0.3 sum_sqrt 7366 RNA:HepG2 cytosolic fraction +7366 ENCFF853VJV- /home/drk/tillage/datasets/human/rna/encode/ENCSR862HPO/summary/coverage-.w5 768 384 0.3 sum_sqrt 7365 RNA:HepG2 cytosolic fraction +7367 ENCFF726WTR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR862RGX/summary/coverage+.w5 768 384 0.3 sum_sqrt 7368 RNA:suprapubic skin tissue male adult (37 years) +7368 ENCFF726WTR- /home/drk/tillage/datasets/human/rna/encode/ENCSR862RGX/summary/coverage-.w5 768 384 0.3 sum_sqrt 7367 RNA:suprapubic skin tissue male adult (37 years) +7369 ENCFF458XQS /home/drk/tillage/datasets/human/rna/encode/ENCSR863BUL/summary/coverage.w5 768 384 0.3 sum_sqrt 7369 RNA:heart tissue female embryo (91 days) +7370 ENCFF193HXZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR863VFU/summary/coverage+.w5 768 384 0.3 sum_sqrt 7371 RNA:middle frontal area 46 tissue female adult (88 years) +7371 ENCFF193HXZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR863VFU/summary/coverage-.w5 768 384 0.3 sum_sqrt 7370 RNA:middle frontal area 46 tissue female adult (88 years) +7372 ENCFF289VET /home/drk/tillage/datasets/human/rna/encode/ENCSR866UOA/summary/coverage.w5 768 384 0.3 sum_sqrt 7372 RNA:K562 treated with 7.5 nM Panobinostat for 4 hours +7373 ENCFF858QRQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR867WQC/summary/coverage+.w5 768 384 0.3 sum_sqrt 7374 RNA:right cardiac atrium tissue male adult (40 years) +7374 ENCFF858QRQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR867WQC/summary/coverage-.w5 768 384 0.3 sum_sqrt 7373 RNA:right cardiac atrium tissue male adult (40 years) +7375 ENCFF096CZT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR872LTT/summary/coverage+.w5 768 384 0.3 sum_sqrt 7376 RNA:ectodermal cell originated from HUES64 +7376 ENCFF096CZT- /home/drk/tillage/datasets/human/rna/encode/ENCSR872LTT/summary/coverage-.w5 768 384 0.3 sum_sqrt 7375 RNA:ectodermal cell originated from HUES64 +7377 ENCFF701JUX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR875MVZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7378 RNA:with Alzheimer's disease; middle frontal area 46 tissue male adult (90 or above years) +7378 ENCFF701JUX- /home/drk/tillage/datasets/human/rna/encode/ENCSR875MVZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7377 RNA:with Alzheimer's disease; middle frontal area 46 tissue male adult (90 or above years) +7379 ENCFF571AFR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR876TAN/summary/coverage+.w5 768 384 0.3 sum_sqrt 7380 RNA:left ventricle myocardium superior tissue male adult (60 years) +7380 ENCFF571AFR- /home/drk/tillage/datasets/human/rna/encode/ENCSR876TAN/summary/coverage-.w5 768 384 0.3 sum_sqrt 7379 RNA:left ventricle myocardium superior tissue male adult (60 years) +7381 ENCFF163AGZ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR877FRY/summary/coverage+.w5 768 384 0.3 sum_sqrt 7382 RNA:excitatory neuron +7382 ENCFF163AGZ- /home/drk/tillage/datasets/human/rna/encode/ENCSR877FRY/summary/coverage-.w5 768 384 0.3 sum_sqrt 7381 RNA:excitatory neuron +7383 ENCFF863JIL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR878EUT/summary/coverage+.w5 768 384 0.3 sum_sqrt 7384 RNA:glomerular endothelial cell female embryo (22 weeks) and male embryo (22 weeks) +7384 ENCFF863JIL- /home/drk/tillage/datasets/human/rna/encode/ENCSR878EUT/summary/coverage-.w5 768 384 0.3 sum_sqrt 7383 RNA:glomerular endothelial cell female embryo (22 weeks) and male embryo (22 weeks) +7385 ENCFF318MTJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR880EGO/summary/coverage+.w5 768 384 0.3 sum_sqrt 7386 RNA:SJSA1 +7386 ENCFF318MTJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR880EGO/summary/coverage-.w5 768 384 0.3 sum_sqrt 7385 RNA:SJSA1 +7387 ENCFF107LVE /home/drk/tillage/datasets/human/rna/encode/ENCSR880XLM/summary/coverage.w5 768 384 0.3 sum_sqrt 7387 RNA:placenta tissue female embryo (113 days) +7388 ENCFF432UNL /home/drk/tillage/datasets/human/rna/encode/ENCSR882NWV/summary/coverage.w5 768 384 0.3 sum_sqrt 7388 RNA:K562 treated with 50 uM C646 for 24 hours +7389 ENCFF717QBH /home/drk/tillage/datasets/human/rna/encode/ENCSR884EVS/summary/coverage.w5 768 384 0.3 sum_sqrt 7389 RNA:right renal cortex interstitium tissue male embryo (120 days) +7390 ENCFF278EZT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR885DVH/summary/coverage+.w5 768 384 0.3 sum_sqrt 7391 RNA:K562 +7391 ENCFF278EZT- /home/drk/tillage/datasets/human/rna/encode/ENCSR885DVH/summary/coverage-.w5 768 384 0.3 sum_sqrt 7390 RNA:K562 +7392 ENCFF990JTR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR887ZSY/summary/coverage+.w5 768 384 0.3 sum_sqrt 7393 RNA:HepG2 membrane fraction +7393 ENCFF990JTR- /home/drk/tillage/datasets/human/rna/encode/ENCSR887ZSY/summary/coverage-.w5 768 384 0.3 sum_sqrt 7392 RNA:HepG2 membrane fraction +7394 ENCFF279MVV /home/drk/tillage/datasets/human/rna/encode/ENCSR889TRN/summary/coverage.w5 768 384 0.3 sum_sqrt 7394 RNA:GM12878 +7395 ENCFF343WYY /home/drk/tillage/datasets/human/rna/encode/ENCSR891JVD/summary/coverage.w5 768 384 0.3 sum_sqrt 7395 RNA:muscle of back tissue male embryo (127 days) +7396 ENCFF649RYB+ /home/drk/tillage/datasets/human/rna/encode/ENCSR892LBU/summary/coverage+.w5 768 384 0.3 sum_sqrt 7397 RNA:kidney tissue female adult (47 years) +7397 ENCFF649RYB- /home/drk/tillage/datasets/human/rna/encode/ENCSR892LBU/summary/coverage-.w5 768 384 0.3 sum_sqrt 7396 RNA:kidney tissue female adult (47 years) +7398 ENCFF982WYX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR894WMQ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7399 RNA:myocyte originated from LHCN-M2 +7399 ENCFF982WYX- /home/drk/tillage/datasets/human/rna/encode/ENCSR894WMQ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7398 RNA:myocyte originated from LHCN-M2 +7400 ENCFF031QGE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR895ZTB/summary/coverage+.w5 768 384 0.3 sum_sqrt 7401 RNA:H1 +7401 ENCFF031QGE- /home/drk/tillage/datasets/human/rna/encode/ENCSR895ZTB/summary/coverage-.w5 768 384 0.3 sum_sqrt 7400 RNA:H1 +7402 ENCFF168RPD /home/drk/tillage/datasets/human/rna/encode/ENCSR896QPD/summary/coverage.w5 768 384 0.3 sum_sqrt 7402 RNA:kidney tissue female embryo (85 days) +7403 ENCFF873UUS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR897JEH/summary/coverage+.w5 768 384 0.3 sum_sqrt 7404 RNA:Calu3 +7404 ENCFF873UUS- /home/drk/tillage/datasets/human/rna/encode/ENCSR897JEH/summary/coverage-.w5 768 384 0.3 sum_sqrt 7403 RNA:Calu3 +7405 ENCFF734VZN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR897KTO/summary/coverage+.w5 768 384 0.3 sum_sqrt 7406 RNA:epithelial cell of alveolus of lung NONE and female embryo (21 weeks) +7406 ENCFF734VZN- /home/drk/tillage/datasets/human/rna/encode/ENCSR897KTO/summary/coverage-.w5 768 384 0.3 sum_sqrt 7405 RNA:epithelial cell of alveolus of lung NONE and female embryo (21 weeks) +7407 ENCFF801IUE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR899IVV/summary/coverage+.w5 768 384 0.3 sum_sqrt 7408 RNA:stimulated activated CD4-positive, alpha-beta T cell male adult (20 years) treated with 10 ng/mL Interleukin-2, anti-CD3 and anti-CD28 coated beads +7408 ENCFF801IUE- /home/drk/tillage/datasets/human/rna/encode/ENCSR899IVV/summary/coverage-.w5 768 384 0.3 sum_sqrt 7407 RNA:stimulated activated CD4-positive, alpha-beta T cell male adult (20 years) treated with 10 ng/mL Interleukin-2, anti-CD3 and anti-CD28 coated beads +7409 ENCFF473FLK /home/drk/tillage/datasets/human/rna/encode/ENCSR899NLW/summary/coverage.w5 768 384 0.3 sum_sqrt 7409 RNA:spinal cord tissue male embryo (96 days) +7410 ENCFF297UUV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR899OKE/summary/coverage+.w5 768 384 0.3 sum_sqrt 7411 RNA:placenta tissue female embryo +7411 ENCFF297UUV- /home/drk/tillage/datasets/human/rna/encode/ENCSR899OKE/summary/coverage-.w5 768 384 0.3 sum_sqrt 7410 RNA:placenta tissue female embryo +7412 ENCFF860TYK /home/drk/tillage/datasets/human/rna/encode/ENCSR899SWV/summary/coverage.w5 768 384 0.3 sum_sqrt 7412 RNA:renal cortex interstitium tissue female embryo (120 days) +7413 ENCFF544TFG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR900DUO/summary/coverage+.w5 768 384 0.3 sum_sqrt 7414 RNA:activated T-cell female adult (33 years) treated with 50 U/mL Interleukin-2 for 4 hours, anti-CD3 and anti-CD28 coated beads for 4 hours +7414 ENCFF544TFG- /home/drk/tillage/datasets/human/rna/encode/ENCSR900DUO/summary/coverage-.w5 768 384 0.3 sum_sqrt 7413 RNA:activated T-cell female adult (33 years) treated with 50 U/mL Interleukin-2 for 4 hours, anti-CD3 and anti-CD28 coated beads for 4 hours +7415 ENCFF353PFR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR900FUP/summary/coverage+.w5 768 384 0.3 sum_sqrt 7416 RNA:heart left ventricle tissue female adult (59 years) +7416 ENCFF353PFR- /home/drk/tillage/datasets/human/rna/encode/ENCSR900FUP/summary/coverage-.w5 768 384 0.3 sum_sqrt 7415 RNA:heart left ventricle tissue female adult (59 years) +7417 ENCFF496TSR /home/drk/tillage/datasets/human/rna/encode/ENCSR900IJI/summary/coverage.w5 768 384 0.3 sum_sqrt 7417 RNA:K562 treated with 10 nM Bortezomib for 24 hours +7418 ENCFF726SGF /home/drk/tillage/datasets/human/rna/encode/ENCSR900JSG/summary/coverage.w5 768 384 0.3 sum_sqrt 7418 RNA:Purkinje cell male child (6 years) +7419 ENCFF359MDK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR900SGE/summary/coverage+.w5 768 384 0.3 sum_sqrt 7420 RNA:spleen tissue female adult (53 years) +7420 ENCFF359MDK- /home/drk/tillage/datasets/human/rna/encode/ENCSR900SGE/summary/coverage-.w5 768 384 0.3 sum_sqrt 7419 RNA:spleen tissue female adult (53 years) +7421 ENCFF430NYX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR903XMI/summary/coverage+.w5 768 384 0.3 sum_sqrt 7422 RNA:placenta tissue female embryo +7422 ENCFF430NYX- /home/drk/tillage/datasets/human/rna/encode/ENCSR903XMI/summary/coverage-.w5 768 384 0.3 sum_sqrt 7421 RNA:placenta tissue female embryo +7423 ENCFF926QTW /home/drk/tillage/datasets/human/rna/encode/ENCSR905LVO/summary/coverage.w5 768 384 0.3 sum_sqrt 7423 RNA:CD14-positive monocyte male adult (37 years) +7424 ENCFF318UAJ /home/drk/tillage/datasets/human/rna/encode/ENCSR906HEV/summary/coverage.w5 768 384 0.3 sum_sqrt 7424 RNA:muscle of trunk tissue female embryo (113 days) +7425 ENCFF880GSS /home/drk/tillage/datasets/human/rna/encode/ENCSR907KDH/summary/coverage.w5 768 384 0.3 sum_sqrt 7425 RNA:kidney tissue male embryo (87 days) +7426 ENCFF358IQV+ /home/drk/tillage/datasets/human/rna/encode/ENCSR908ZAS/summary/coverage+.w5 768 384 0.3 sum_sqrt 7427 RNA:hepatocyte originated from H9 +7427 ENCFF358IQV- /home/drk/tillage/datasets/human/rna/encode/ENCSR908ZAS/summary/coverage-.w5 768 384 0.3 sum_sqrt 7426 RNA:hepatocyte originated from H9 +7428 ENCFF426XDX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR910QOX/summary/coverage+.w5 768 384 0.3 sum_sqrt 7429 RNA:spleen tissue male adult (34 years) +7429 ENCFF426XDX- /home/drk/tillage/datasets/human/rna/encode/ENCSR910QOX/summary/coverage-.w5 768 384 0.3 sum_sqrt 7428 RNA:spleen tissue male adult (34 years) +7430 ENCFF107LSA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR911XSA/summary/coverage+.w5 768 384 0.3 sum_sqrt 7431 RNA:naive thymus-derived CD4-positive, alpha-beta T cell male adult (43 years) +7431 ENCFF107LSA- /home/drk/tillage/datasets/human/rna/encode/ENCSR911XSA/summary/coverage-.w5 768 384 0.3 sum_sqrt 7430 RNA:naive thymus-derived CD4-positive, alpha-beta T cell male adult (43 years) +7432 ENCFF043VUO+ /home/drk/tillage/datasets/human/rna/encode/ENCSR915EBZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7433 RNA:heart right ventricle tissue male adult (40 years) +7433 ENCFF043VUO- /home/drk/tillage/datasets/human/rna/encode/ENCSR915EBZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7432 RNA:heart right ventricle tissue male adult (40 years) +7434 ENCFF124RPC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR917YHC/summary/coverage+.w5 768 384 0.3 sum_sqrt 7435 RNA:lung tissue female adult (30 years) +7435 ENCFF124RPC- /home/drk/tillage/datasets/human/rna/encode/ENCSR917YHC/summary/coverage-.w5 768 384 0.3 sum_sqrt 7434 RNA:lung tissue female adult (30 years) +7436 ENCFF403HKJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR919MZM/summary/coverage+.w5 768 384 0.3 sum_sqrt 7437 RNA:endometrial microvascular endothelial cells female adult (34 years) +7437 ENCFF403HKJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR919MZM/summary/coverage-.w5 768 384 0.3 sum_sqrt 7436 RNA:endometrial microvascular endothelial cells female adult (34 years) +7438 ENCFF965MBA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR919QJT/summary/coverage+.w5 768 384 0.3 sum_sqrt 7439 RNA:H4 +7439 ENCFF965MBA- /home/drk/tillage/datasets/human/rna/encode/ENCSR919QJT/summary/coverage-.w5 768 384 0.3 sum_sqrt 7438 RNA:H4 +7440 ENCFF123BBC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR920UAO/summary/coverage+.w5 768 384 0.3 sum_sqrt 7441 RNA:astrocyte +7441 ENCFF123BBC- /home/drk/tillage/datasets/human/rna/encode/ENCSR920UAO/summary/coverage-.w5 768 384 0.3 sum_sqrt 7440 RNA:astrocyte +7442 ENCFF891ALA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR922VBO/summary/coverage+.w5 768 384 0.3 sum_sqrt 7443 RNA:stomach tissue male child (3 years) +7443 ENCFF891ALA- /home/drk/tillage/datasets/human/rna/encode/ENCSR922VBO/summary/coverage-.w5 768 384 0.3 sum_sqrt 7442 RNA:stomach tissue male child (3 years) +7444 ENCFF974VUT+ /home/drk/tillage/datasets/human/rna/encode/ENCSR924MSZ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7445 RNA:heart left ventricle tissue male adult (40 years) +7445 ENCFF974VUT- /home/drk/tillage/datasets/human/rna/encode/ENCSR924MSZ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7444 RNA:heart left ventricle tissue male adult (40 years) +7446 ENCFF150XAC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR925GFP/summary/coverage+.w5 768 384 0.3 sum_sqrt 7447 RNA:activated CD4-positive, alpha-beta T cell male adult (20 years) treated with anti-CD3 and anti-CD28 coated beads +7447 ENCFF150XAC- /home/drk/tillage/datasets/human/rna/encode/ENCSR925GFP/summary/coverage-.w5 768 384 0.3 sum_sqrt 7446 RNA:activated CD4-positive, alpha-beta T cell male adult (20 years) treated with anti-CD3 and anti-CD28 coated beads +7448 ENCFF046NXJ /home/drk/tillage/datasets/human/rna/encode/ENCSR928CEQ/summary/coverage.w5 768 384 0.3 sum_sqrt 7448 RNA:right renal pelvis tissue male embryo (105 days) +7449 ENCFF688MII /home/drk/tillage/datasets/human/rna/encode/ENCSR929KRW/summary/coverage.w5 768 384 0.3 sum_sqrt 7449 RNA:renal pelvis tissue male embryo (97 days) +7450 ENCFF604EYK /home/drk/tillage/datasets/human/rna/encode/ENCSR930URM/summary/coverage.w5 768 384 0.3 sum_sqrt 7450 RNA:large intestine tissue female embryo (120 days) +7451 ENCFF831HGM+ /home/drk/tillage/datasets/human/rna/encode/ENCSR931WGT/summary/coverage+.w5 768 384 0.3 sum_sqrt 7452 RNA:HepG2 cytosolic fraction +7452 ENCFF831HGM- /home/drk/tillage/datasets/human/rna/encode/ENCSR931WGT/summary/coverage-.w5 768 384 0.3 sum_sqrt 7451 RNA:HepG2 cytosolic fraction +7453 ENCFF484JKA /home/drk/tillage/datasets/human/rna/encode/ENCSR936COT/summary/coverage.w5 768 384 0.3 sum_sqrt 7453 RNA:naive B cell +7454 ENCFF850SIL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR938LSP/summary/coverage+.w5 768 384 0.3 sum_sqrt 7455 RNA:GM23338 originated from GM23248 +7455 ENCFF850SIL- /home/drk/tillage/datasets/human/rna/encode/ENCSR938LSP/summary/coverage-.w5 768 384 0.3 sum_sqrt 7454 RNA:GM23338 originated from GM23248 +7456 ENCFF956EIE /home/drk/tillage/datasets/human/rna/encode/ENCSR940EVD/summary/coverage.w5 768 384 0.3 sum_sqrt 7456 RNA:with multiple sclerosis; CD4-positive, CD25-positive, alpha-beta regulatory T cell +7457 ENCFF413WXI+ /home/drk/tillage/datasets/human/rna/encode/ENCSR942YMN/summary/coverage+.w5 768 384 0.3 sum_sqrt 7458 RNA:placenta tissue male embryo +7458 ENCFF413WXI- /home/drk/tillage/datasets/human/rna/encode/ENCSR942YMN/summary/coverage-.w5 768 384 0.3 sum_sqrt 7457 RNA:placenta tissue male embryo +7459 ENCFF315ODV /home/drk/tillage/datasets/human/rna/encode/ENCSR944FLL/summary/coverage.w5 768 384 0.3 sum_sqrt 7459 RNA:CD8-positive, alpha-beta T cell male adult (21 years) +7460 ENCFF526ILL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR944OIX/summary/coverage+.w5 768 384 0.3 sum_sqrt 7461 RNA:activated naive CD4-positive, alpha-beta T cell male adult (43 years) +7461 ENCFF526ILL- /home/drk/tillage/datasets/human/rna/encode/ENCSR944OIX/summary/coverage-.w5 768 384 0.3 sum_sqrt 7460 RNA:activated naive CD4-positive, alpha-beta T cell male adult (43 years) +7462 ENCFF346AHI /home/drk/tillage/datasets/human/rna/encode/ENCSR949BBZ/summary/coverage.w5 768 384 0.3 sum_sqrt 7462 RNA:K562 treated with 25 uM Galeterone for 12 hours +7463 ENCFF568QQJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR949UTT/summary/coverage+.w5 768 384 0.3 sum_sqrt 7464 RNA:with mild cognitive impairment; middle frontal area 46 tissue female adult (83 years) +7464 ENCFF568QQJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR949UTT/summary/coverage-.w5 768 384 0.3 sum_sqrt 7463 RNA:with mild cognitive impairment; middle frontal area 46 tissue female adult (83 years) +7465 ENCFF333PSK /home/drk/tillage/datasets/human/rna/encode/ENCSR950YTM/summary/coverage.w5 768 384 0.3 sum_sqrt 7465 RNA:K562 treated with 7.5 nM Vorinostat for 48 hours +7466 ENCFF480XQC /home/drk/tillage/datasets/human/rna/encode/ENCSR951NPS/summary/coverage.w5 768 384 0.3 sum_sqrt 7466 RNA:stomach tissue female embryo (105 days) +7467 ENCFF496YJK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR954PZB/summary/coverage+.w5 768 384 0.3 sum_sqrt 7468 RNA:adrenal gland tissue male adult (54 years) +7468 ENCFF496YJK- /home/drk/tillage/datasets/human/rna/encode/ENCSR954PZB/summary/coverage-.w5 768 384 0.3 sum_sqrt 7467 RNA:adrenal gland tissue male adult (54 years) +7469 ENCFF095UOA+ /home/drk/tillage/datasets/human/rna/encode/ENCSR956ZVR/summary/coverage+.w5 768 384 0.3 sum_sqrt 7470 RNA:naive thymus-derived CD4-positive, alpha-beta T cell male adult (43 years) +7470 ENCFF095UOA- /home/drk/tillage/datasets/human/rna/encode/ENCSR956ZVR/summary/coverage-.w5 768 384 0.3 sum_sqrt 7469 RNA:naive thymus-derived CD4-positive, alpha-beta T cell male adult (43 years) +7471 ENCFF366NWQ /home/drk/tillage/datasets/human/rna/encode/ENCSR957GVE/summary/coverage.w5 768 384 0.3 sum_sqrt 7471 RNA:K562 treated with 7.5 nM Panobinostat for 12 hours +7472 ENCFF995SOX+ /home/drk/tillage/datasets/human/rna/encode/ENCSR957WSE/summary/coverage+.w5 768 384 0.3 sum_sqrt 7473 RNA:activated naive CD8-positive, alpha-beta T cell male adult (30 years) +7473 ENCFF995SOX- /home/drk/tillage/datasets/human/rna/encode/ENCSR957WSE/summary/coverage-.w5 768 384 0.3 sum_sqrt 7472 RNA:activated naive CD8-positive, alpha-beta T cell male adult (30 years) +7474 ENCFF238RLU /home/drk/tillage/datasets/human/rna/encode/ENCSR962TBJ/summary/coverage.w5 768 384 0.3 sum_sqrt 7474 RNA:H1 +7475 ENCFF882KEH /home/drk/tillage/datasets/human/rna/encode/ENCSR963EVZ/summary/coverage.w5 768 384 0.3 sum_sqrt 7475 RNA:immature natural killer cell +7476 ENCFF314QAJ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR967BBU/summary/coverage+.w5 768 384 0.3 sum_sqrt 7477 RNA:chorionic villus tissue embryo (16 weeks) +7477 ENCFF314QAJ- /home/drk/tillage/datasets/human/rna/encode/ENCSR967BBU/summary/coverage-.w5 768 384 0.3 sum_sqrt 7476 RNA:chorionic villus tissue embryo (16 weeks) +7478 ENCFF497QCQ+ /home/drk/tillage/datasets/human/rna/encode/ENCSR967JPI/summary/coverage+.w5 768 384 0.3 sum_sqrt 7479 RNA:gastrocnemius medialis tissue male adult (54 years) +7479 ENCFF497QCQ- /home/drk/tillage/datasets/human/rna/encode/ENCSR967JPI/summary/coverage-.w5 768 384 0.3 sum_sqrt 7478 RNA:gastrocnemius medialis tissue male adult (54 years) +7480 ENCFF623LHV /home/drk/tillage/datasets/human/rna/encode/ENCSR968ACN/summary/coverage.w5 768 384 0.3 sum_sqrt 7480 RNA:with multiple sclerosis; CD14-positive monocyte +7481 ENCFF825QDH+ /home/drk/tillage/datasets/human/rna/encode/ENCSR968WKR/summary/coverage+.w5 768 384 0.3 sum_sqrt 7482 RNA:bipolar neuron originated from GM23338 treated with 0.5 ug/mL doxycycline hyclate for 4 days +7482 ENCFF825QDH- /home/drk/tillage/datasets/human/rna/encode/ENCSR968WKR/summary/coverage-.w5 768 384 0.3 sum_sqrt 7481 RNA:bipolar neuron originated from GM23338 treated with 0.5 ug/mL doxycycline hyclate for 4 days +7483 ENCFF874FQG /home/drk/tillage/datasets/human/rna/encode/ENCSR969XWI/summary/coverage.w5 768 384 0.3 sum_sqrt 7483 RNA:with multiple sclerosis; CD4-positive, alpha-beta memory T cell +7484 ENCFF045DIU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR971GPJ/summary/coverage+.w5 768 384 0.3 sum_sqrt 7485 RNA:HT-29 +7485 ENCFF045DIU- /home/drk/tillage/datasets/human/rna/encode/ENCSR971GPJ/summary/coverage-.w5 768 384 0.3 sum_sqrt 7484 RNA:HT-29 +7486 ENCFF974CXF+ /home/drk/tillage/datasets/human/rna/encode/ENCSR971KNW/summary/coverage+.w5 768 384 0.3 sum_sqrt 7487 RNA:MG63 +7487 ENCFF974CXF- /home/drk/tillage/datasets/human/rna/encode/ENCSR971KNW/summary/coverage-.w5 768 384 0.3 sum_sqrt 7486 RNA:MG63 +7488 ENCFF937GHU /home/drk/tillage/datasets/human/rna/encode/ENCSR972TMF/summary/coverage.w5 768 384 0.3 sum_sqrt 7488 RNA:immature natural killer cell +7489 ENCFF050OBT /home/drk/tillage/datasets/human/rna/encode/ENCSR975CMV/summary/coverage.w5 768 384 0.3 sum_sqrt 7489 RNA:with multiple sclerosis; IgD-negative memory B cell +7490 ENCFF373WQE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR976JGI/summary/coverage+.w5 768 384 0.3 sum_sqrt 7491 RNA:mesendoderm originated from H1 +7491 ENCFF373WQE- /home/drk/tillage/datasets/human/rna/encode/ENCSR976JGI/summary/coverage-.w5 768 384 0.3 sum_sqrt 7490 RNA:mesendoderm originated from H1 +7492 ENCFF599BTC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR977XUX/summary/coverage+.w5 768 384 0.3 sum_sqrt 7493 RNA:neuronal stem cell originated from H1 +7493 ENCFF599BTC- /home/drk/tillage/datasets/human/rna/encode/ENCSR977XUX/summary/coverage-.w5 768 384 0.3 sum_sqrt 7492 RNA:neuronal stem cell originated from H1 +7494 ENCFF279XJK+ /home/drk/tillage/datasets/human/rna/encode/ENCSR980UEY/summary/coverage+.w5 768 384 0.5493131868131869 sum_sqrt 7495 RNA:stomach tissue female adult (30 years) +7495 ENCFF279XJK- /home/drk/tillage/datasets/human/rna/encode/ENCSR980UEY/summary/coverage-.w5 768 384 0.5493131868131869 sum_sqrt 7494 RNA:stomach tissue female adult (30 years) +7496 ENCFF301WLH /home/drk/tillage/datasets/human/rna/encode/ENCSR982VYI/summary/coverage.w5 768 384 0.3 sum_sqrt 7496 RNA:fibroblast of skin of scalp male embryo (97 days) +7497 ENCFF696GRP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR985KAT/summary/coverage+.w5 768 384 0.3 sum_sqrt 7498 RNA:HepG2 +7498 ENCFF696GRP- /home/drk/tillage/datasets/human/rna/encode/ENCSR985KAT/summary/coverage-.w5 768 384 0.3 sum_sqrt 7497 RNA:HepG2 +7499 ENCFF684CHN+ /home/drk/tillage/datasets/human/rna/encode/ENCSR989IFF/summary/coverage+.w5 768 384 0.3 sum_sqrt 7500 RNA:CD8-positive, alpha-beta memory T cell male adult (30 years) +7500 ENCFF684CHN- /home/drk/tillage/datasets/human/rna/encode/ENCSR989IFF/summary/coverage-.w5 768 384 0.3 sum_sqrt 7499 RNA:CD8-positive, alpha-beta memory T cell male adult (30 years) +7501 ENCFF304RDA /home/drk/tillage/datasets/human/rna/encode/ENCSR990LHE/summary/coverage.w5 768 384 0.3 sum_sqrt 7501 RNA:muscle of arm tissue male embryo (96 days) +7502 ENCFF057TXE+ /home/drk/tillage/datasets/human/rna/encode/ENCSR991HIR/summary/coverage+.w5 768 384 0.3 sum_sqrt 7503 RNA:lower leg skin tissue female adult (53 years) +7503 ENCFF057TXE- /home/drk/tillage/datasets/human/rna/encode/ENCSR991HIR/summary/coverage-.w5 768 384 0.3 sum_sqrt 7502 RNA:lower leg skin tissue female adult (53 years) +7504 ENCFF004DOF /home/drk/tillage/datasets/human/rna/encode/ENCSR992MUF/summary/coverage.w5 768 384 0.3 sum_sqrt 7504 RNA:CD14-positive monocyte +7505 ENCFF573WTP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR993IPO/summary/coverage+.w5 768 384 0.3 sum_sqrt 7506 RNA:left cardiac atrium tissue male adult (60 years) +7506 ENCFF573WTP- /home/drk/tillage/datasets/human/rna/encode/ENCSR993IPO/summary/coverage-.w5 768 384 0.3 sum_sqrt 7505 RNA:left cardiac atrium tissue male adult (60 years) +7507 ENCFF118SVL+ /home/drk/tillage/datasets/human/rna/encode/ENCSR993JMV/summary/coverage+.w5 768 384 0.3 sum_sqrt 7508 RNA:endothelial cell of umbilical vein newborn +7508 ENCFF118SVL- /home/drk/tillage/datasets/human/rna/encode/ENCSR993JMV/summary/coverage-.w5 768 384 0.3 sum_sqrt 7507 RNA:endothelial cell of umbilical vein newborn +7509 ENCFF915WBG+ /home/drk/tillage/datasets/human/rna/encode/ENCSR993QGR/summary/coverage+.w5 768 384 0.3 sum_sqrt 7510 RNA:esophagus tissue female adult (30 years) +7510 ENCFF915WBG- /home/drk/tillage/datasets/human/rna/encode/ENCSR993QGR/summary/coverage-.w5 768 384 0.3 sum_sqrt 7509 RNA:esophagus tissue female adult (30 years) +7511 ENCFF137BCU+ /home/drk/tillage/datasets/human/rna/encode/ENCSR995BHD/summary/coverage+.w5 768 384 0.3 sum_sqrt 7512 RNA:aorta tissue female adult (30 years) +7512 ENCFF137BCU- /home/drk/tillage/datasets/human/rna/encode/ENCSR995BHD/summary/coverage-.w5 768 384 0.3 sum_sqrt 7511 RNA:aorta tissue female adult (30 years) +7513 ENCFF630CTN /home/drk/tillage/datasets/human/rna/encode/ENCSR995ORR/summary/coverage.w5 768 384 0.3 sum_sqrt 7513 RNA:muscle of back tissue female embryo (105 days) +7514 ENCFF098OUC+ /home/drk/tillage/datasets/human/rna/encode/ENCSR996OED/summary/coverage+.w5 768 384 0.3 sum_sqrt 7515 RNA:activated naive CD4-positive, alpha-beta T cell male adult (43 years) +7515 ENCFF098OUC- /home/drk/tillage/datasets/human/rna/encode/ENCSR996OED/summary/coverage-.w5 768 384 0.3 sum_sqrt 7514 RNA:activated naive CD4-positive, alpha-beta T cell male adult (43 years) +7516 ENCFF895JFS+ /home/drk/tillage/datasets/human/rna/encode/ENCSR997KDB/summary/coverage+.w5 768 384 0.3 sum_sqrt 7517 RNA:heart right ventricle tissue female adult (59 years) +7517 ENCFF895JFS- /home/drk/tillage/datasets/human/rna/encode/ENCSR997KDB/summary/coverage-.w5 768 384 0.3 sum_sqrt 7516 RNA:heart right ventricle tissue female adult (59 years) +7518 ENCFF487HBP+ /home/drk/tillage/datasets/human/rna/encode/ENCSR999CPT/summary/coverage+.w5 768 384 0.3 sum_sqrt 7519 RNA:placental basal plate tissue female embryo (40 weeks) +7519 ENCFF487HBP- /home/drk/tillage/datasets/human/rna/encode/ENCSR999CPT/summary/coverage-.w5 768 384 0.3 sum_sqrt 7518 RNA:placental basal plate tissue female embryo (40 weeks) +7520 ENCFF304GQR+ /home/drk/tillage/datasets/human/rna/encode/ENCSR999ZCI/summary/coverage+.w5 768 384 0.7307142857142858 sum_sqrt 7521 RNA:sigmoid colon tissue male adult (34 years) +7521 ENCFF304GQR- /home/drk/tillage/datasets/human/rna/encode/ENCSR999ZCI/summary/coverage-.w5 768 384 0.7307142857142858 sum_sqrt 7520 RNA:sigmoid colon tissue male adult (34 years) +7522 GTEX-132QS-2526-SM-62LFJ.1 /home/drk/tillage/datasets/human/rna/recount3/adipose_tissue/GTEX-132QS-2526-SM-62LFJ.1/coverage.w5 768 384 0.01 sum_sqrt 7522 RNA:adipose_tissue +7523 GTEX-1GMR3-0826-SM-9WYT4.1 /home/drk/tillage/datasets/human/rna/recount3/adipose_tissue/GTEX-1GMR3-0826-SM-9WYT4.1/coverage.w5 768 384 0.01 sum_sqrt 7523 RNA:adipose_tissue +7524 GTEX-1HSEH-0226-SM-ACKVV.1 /home/drk/tillage/datasets/human/rna/recount3/adipose_tissue/GTEX-1HSEH-0226-SM-ACKVV.1/coverage.w5 768 384 0.01 sum_sqrt 7524 RNA:adipose_tissue +7525 GTEX-11GSP-0326-SM-5A5KW.1 /home/drk/tillage/datasets/human/rna/recount3/adrenal_gland/GTEX-11GSP-0326-SM-5A5KW.1/coverage.w5 768 384 0.01 sum_sqrt 7525 RNA:adrenal_gland +7526 GTEX-13PVR-0226-SM-5RQJI.1 /home/drk/tillage/datasets/human/rna/recount3/adrenal_gland/GTEX-13PVR-0226-SM-5RQJI.1/coverage.w5 768 384 0.01 sum_sqrt 7526 RNA:adrenal_gland +7527 GTEX-14PKV-0726-SM-686ZF.1 /home/drk/tillage/datasets/human/rna/recount3/adrenal_gland/GTEX-14PKV-0726-SM-686ZF.1/coverage.w5 768 384 0.01 sum_sqrt 7527 RNA:adrenal_gland +7528 GTEX-T6MN-2226-SM-EVYAM.1 /home/drk/tillage/datasets/human/rna/recount3/bladder/GTEX-T6MN-2226-SM-EVYAM.1/coverage.w5 768 384 0.01 sum_sqrt 7528 RNA:bladder +7529 GTEX-U3ZN-1226-SM-4DXUD.1 /home/drk/tillage/datasets/human/rna/recount3/bladder/GTEX-U3ZN-1226-SM-4DXUD.1/coverage.w5 768 384 0.01 sum_sqrt 7529 RNA:bladder +7530 GTEX-U4B1-1226-SM-4DXT7.1 /home/drk/tillage/datasets/human/rna/recount3/bladder/GTEX-U4B1-1226-SM-4DXT7.1/coverage.w5 768 384 0.01 sum_sqrt 7530 RNA:bladder +7531 GTEX-1I4MK-0002-SM-EZ6M9.1 /home/drk/tillage/datasets/human/rna/recount3/blood/GTEX-1I4MK-0002-SM-EZ6M9.1/coverage.w5 768 384 0.01 sum_sqrt 7531 RNA:blood +7532 GTEX-1LB8K-0005-SM-DIPED.1 /home/drk/tillage/datasets/human/rna/recount3/blood/GTEX-1LB8K-0005-SM-DIPED.1/coverage.w5 768 384 0.01 sum_sqrt 7532 RNA:blood +7533 GTEX-1OKEX-0006-SM-DKPQ2.1 /home/drk/tillage/datasets/human/rna/recount3/blood/GTEX-1OKEX-0006-SM-DKPQ2.1/coverage.w5 768 384 0.01 sum_sqrt 7533 RNA:blood +7534 GTEX-14DAR-2126-SM-5RQID.1 /home/drk/tillage/datasets/human/rna/recount3/blood_vessel/GTEX-14DAR-2126-SM-5RQID.1/coverage.w5 768 384 0.01 sum_sqrt 7534 RNA:blood_vessel +7535 GTEX-1JK1U-0426-SM-CYPSP.1 /home/drk/tillage/datasets/human/rna/recount3/blood_vessel/GTEX-1JK1U-0426-SM-CYPSP.1/coverage.w5 768 384 0.01 sum_sqrt 7535 RNA:blood_vessel +7536 GTEX-Y3IK-2826-SM-4WWDW.1 /home/drk/tillage/datasets/human/rna/recount3/blood_vessel/GTEX-Y3IK-2826-SM-4WWDW.1/coverage.w5 768 384 0.01 sum_sqrt 7536 RNA:blood_vessel +7537 K-562-SM-5KM2I.1 /home/drk/tillage/datasets/human/rna/recount3/bone_marrow/K-562-SM-5KM2I.1/coverage.w5 768 384 0.01 sum_sqrt 7537 RNA:bone_marrow +7538 K-562-SM-7LG4B.1 /home/drk/tillage/datasets/human/rna/recount3/bone_marrow/K-562-SM-7LG4B.1/coverage.w5 768 384 0.01 sum_sqrt 7538 RNA:bone_marrow +7539 GTEX-13FTY-0011-R11a-SM-5IJEA.1 /home/drk/tillage/datasets/human/rna/recount3/brain/GTEX-13FTY-0011-R11a-SM-5IJEA.1/coverage.w5 768 384 0.01 sum_sqrt 7539 RNA:brain +7540 GTEX-1EX96-0011-R4a-SM-ARU82.1 /home/drk/tillage/datasets/human/rna/recount3/brain/GTEX-1EX96-0011-R4a-SM-ARU82.1/coverage.w5 768 384 0.01 sum_sqrt 7540 RNA:brain +7541 GTEX-1H3O1-1726-SM-9WYSR.1 /home/drk/tillage/datasets/human/rna/recount3/brain/GTEX-1H3O1-1726-SM-9WYSR.1/coverage.w5 768 384 0.01 sum_sqrt 7541 RNA:brain +7542 GTEX-13W3W-1226-SM-5LU4H.1 /home/drk/tillage/datasets/human/rna/recount3/breast/GTEX-13W3W-1226-SM-5LU4H.1/coverage.w5 768 384 0.01 sum_sqrt 7542 RNA:breast +7543 GTEX-1GN1W-1926-SM-7P8TH.1 /home/drk/tillage/datasets/human/rna/recount3/breast/GTEX-1GN1W-1926-SM-7P8TH.1/coverage.w5 768 384 0.01 sum_sqrt 7543 RNA:breast +7544 GTEX-ZYT6-0126-SM-5E45J.1 /home/drk/tillage/datasets/human/rna/recount3/breast/GTEX-ZYT6-0126-SM-5E45J.1/coverage.w5 768 384 0.01 sum_sqrt 7544 RNA:breast +7545 GTEX-PLZ4-2226-SM-EZ6KS.1 /home/drk/tillage/datasets/human/rna/recount3/cervix_uteri/GTEX-PLZ4-2226-SM-EZ6KS.1/coverage.w5 768 384 0.01 sum_sqrt 7545 RNA:cervix_uteri +7546 GTEX-T6MO-1426-SM-4DM73.1 /home/drk/tillage/datasets/human/rna/recount3/cervix_uteri/GTEX-T6MO-1426-SM-4DM73.1/coverage.w5 768 384 0.01 sum_sqrt 7546 RNA:cervix_uteri +7547 GTEX-U3ZN-1626-SM-4DXTZ.1 /home/drk/tillage/datasets/human/rna/recount3/cervix_uteri/GTEX-U3ZN-1626-SM-4DXTZ.1/coverage.w5 768 384 0.01 sum_sqrt 7547 RNA:cervix_uteri +7548 GTEX-11P81-1426-SM-5P9JN.1 /home/drk/tillage/datasets/human/rna/recount3/colon/GTEX-11P81-1426-SM-5P9JN.1/coverage.w5 768 384 0.01 sum_sqrt 7548 RNA:colon +7549 GTEX-13W3W-2226-SM-5LU4M.1 /home/drk/tillage/datasets/human/rna/recount3/colon/GTEX-13W3W-2226-SM-5LU4M.1/coverage.w5 768 384 0.01 sum_sqrt 7549 RNA:colon +7550 GTEX-ZAB4-2226-SM-5HL97.1 /home/drk/tillage/datasets/human/rna/recount3/colon/GTEX-ZAB4-2226-SM-5HL97.1/coverage.w5 768 384 0.01 sum_sqrt 7550 RNA:colon +7551 GTEX-14DAR-0726-SM-5RQIA.1 /home/drk/tillage/datasets/human/rna/recount3/esophagus/GTEX-14DAR-0726-SM-5RQIA.1/coverage.w5 768 384 0.01 sum_sqrt 7551 RNA:esophagus +7552 GTEX-1RAZR-1426-SM-EVR4M.1 /home/drk/tillage/datasets/human/rna/recount3/esophagus/GTEX-1RAZR-1426-SM-EVR4M.1/coverage.w5 768 384 0.01 sum_sqrt 7552 RNA:esophagus +7553 GTEX-ZDTT-1326-SM-4WKFH.1 /home/drk/tillage/datasets/human/rna/recount3/esophagus/GTEX-ZDTT-1326-SM-4WKFH.1/coverage.w5 768 384 0.01 sum_sqrt 7553 RNA:esophagus +7554 GTEX-PLZ4-2326-SM-EYYV5.1 /home/drk/tillage/datasets/human/rna/recount3/fallopian_tube/GTEX-PLZ4-2326-SM-EYYV5.1/coverage.w5 768 384 0.01 sum_sqrt 7554 RNA:fallopian_tube +7555 GTEX-S32W-1326-SM-4AD5Q.1 /home/drk/tillage/datasets/human/rna/recount3/fallopian_tube/GTEX-S32W-1326-SM-4AD5Q.1/coverage.w5 768 384 0.01 sum_sqrt 7555 RNA:fallopian_tube +7556 GTEX-T6MO-1026-SM-4DM72.1 /home/drk/tillage/datasets/human/rna/recount3/fallopian_tube/GTEX-T6MO-1026-SM-4DM72.1/coverage.w5 768 384 0.01 sum_sqrt 7556 RNA:fallopian_tube +7557 GTEX-11LCK-0826-SM-5PNYD.1 /home/drk/tillage/datasets/human/rna/recount3/heart/GTEX-11LCK-0826-SM-5PNYD.1/coverage.w5 768 384 0.01 sum_sqrt 7557 RNA:heart +7558 GTEX-18465-0926-SM-731AY.1 /home/drk/tillage/datasets/human/rna/recount3/heart/GTEX-18465-0926-SM-731AY.1/coverage.w5 768 384 0.01 sum_sqrt 7558 RNA:heart +7559 GTEX-1MUQO-1226-SM-E9TJK.1 /home/drk/tillage/datasets/human/rna/recount3/heart/GTEX-1MUQO-1226-SM-E9TJK.1/coverage.w5 768 384 0.01 sum_sqrt 7559 RNA:heart +7560 GTEX-13112-2126-SM-5GCO4.1 /home/drk/tillage/datasets/human/rna/recount3/kidney/GTEX-13112-2126-SM-5GCO4.1/coverage.w5 768 384 0.01 sum_sqrt 7560 RNA:kidney +7561 GTEX-17F96-1926-SM-7MGWY.1 /home/drk/tillage/datasets/human/rna/recount3/kidney/GTEX-17F96-1926-SM-7MGWY.1/coverage.w5 768 384 0.01 sum_sqrt 7561 RNA:kidney +7562 GTEX-ZVZP-0926-SM-5GIDB.1 /home/drk/tillage/datasets/human/rna/recount3/kidney/GTEX-ZVZP-0926-SM-5GIDB.1/coverage.w5 768 384 0.01 sum_sqrt 7562 RNA:kidney +7563 GTEX-11EQ9-0526-SM-5A5JZ.1 /home/drk/tillage/datasets/human/rna/recount3/liver/GTEX-11EQ9-0526-SM-5A5JZ.1/coverage.w5 768 384 0.01 sum_sqrt 7563 RNA:liver +7564 GTEX-1QP66-0226-SM-DPRXS.1 /home/drk/tillage/datasets/human/rna/recount3/liver/GTEX-1QP66-0226-SM-DPRXS.1/coverage.w5 768 384 0.01 sum_sqrt 7564 RNA:liver +7565 GTEX-ZYT6-0626-SM-5E45V.1 /home/drk/tillage/datasets/human/rna/recount3/liver/GTEX-ZYT6-0626-SM-5E45V.1/coverage.w5 768 384 0.01 sum_sqrt 7565 RNA:liver +7566 GTEX-1399S-1726-SM-5L3DI.1 /home/drk/tillage/datasets/human/rna/recount3/lung/GTEX-1399S-1726-SM-5L3DI.1/coverage.w5 768 384 0.01 sum_sqrt 7566 RNA:lung +7567 GTEX-14AS3-0926-SM-5TDD6.1 /home/drk/tillage/datasets/human/rna/recount3/lung/GTEX-14AS3-0926-SM-5TDD6.1/coverage.w5 768 384 0.01 sum_sqrt 7567 RNA:lung +7568 GTEX-14JG1-0926-SM-5YY8W.1 /home/drk/tillage/datasets/human/rna/recount3/lung/GTEX-14JG1-0926-SM-5YY8W.1/coverage.w5 768 384 0.01 sum_sqrt 7568 RNA:lung +7569 GTEX-13QJ3-0726-SM-5SI68.1 /home/drk/tillage/datasets/human/rna/recount3/muscle/GTEX-13QJ3-0726-SM-5SI68.1/coverage.w5 768 384 0.01 sum_sqrt 7569 RNA:muscle +7570 GTEX-14PJ4-2426-SM-6EU1U.1 /home/drk/tillage/datasets/human/rna/recount3/muscle/GTEX-14PJ4-2426-SM-6EU1U.1/coverage.w5 768 384 0.01 sum_sqrt 7570 RNA:muscle +7571 GTEX-1H1CY-2726-SM-9MQLR.1 /home/drk/tillage/datasets/human/rna/recount3/muscle/GTEX-1H1CY-2726-SM-9MQLR.1/coverage.w5 768 384 0.01 sum_sqrt 7571 RNA:muscle +7572 GTEX-14PJ6-2426-SM-6ETZS.1 /home/drk/tillage/datasets/human/rna/recount3/nerve/GTEX-14PJ6-2426-SM-6ETZS.1/coverage.w5 768 384 0.01 sum_sqrt 7572 RNA:nerve +7573 GTEX-1HKZK-0626-SM-ADEHP.1 /home/drk/tillage/datasets/human/rna/recount3/nerve/GTEX-1HKZK-0626-SM-ADEHP.1/coverage.w5 768 384 0.01 sum_sqrt 7573 RNA:nerve +7574 GTEX-11VI4-1026-SM-5EQM1.1 /home/drk/tillage/datasets/human/rna/recount3/ovary/GTEX-11VI4-1026-SM-5EQM1.1/coverage.w5 768 384 0.01 sum_sqrt 7574 RNA:ovary +7575 GTEX-1269C-1826-SM-5N9E1.1 /home/drk/tillage/datasets/human/rna/recount3/ovary/GTEX-1269C-1826-SM-5N9E1.1/coverage.w5 768 384 0.01 sum_sqrt 7575 RNA:ovary +7576 GTEX-1399S-2126-SM-5J2MH.1 /home/drk/tillage/datasets/human/rna/recount3/ovary/GTEX-1399S-2126-SM-5J2MH.1/coverage.w5 768 384 0.01 sum_sqrt 7576 RNA:ovary +7577 GTEX-11I78-0626-SM-5A5LZ.1 /home/drk/tillage/datasets/human/rna/recount3/pancreas/GTEX-11I78-0626-SM-5A5LZ.1/coverage.w5 768 384 0.01 sum_sqrt 7577 RNA:pancreas +7578 GTEX-14PJ6-0926-SM-686ZA.1 /home/drk/tillage/datasets/human/rna/recount3/pancreas/GTEX-14PJ6-0926-SM-686ZA.1/coverage.w5 768 384 0.01 sum_sqrt 7578 RNA:pancreas +7579 GTEX-1HKZK-1126-SM-9WG82.1 /home/drk/tillage/datasets/human/rna/recount3/pancreas/GTEX-1HKZK-1126-SM-9WG82.1/coverage.w5 768 384 0.01 sum_sqrt 7579 RNA:pancreas +7580 GTEX-12WSC-3126-SM-5GCNB.1 /home/drk/tillage/datasets/human/rna/recount3/pituitary/GTEX-12WSC-3126-SM-5GCNB.1/coverage.w5 768 384 0.01 sum_sqrt 7580 RNA:pituitary +7581 GTEX-145LS-3226-SM-5TDCA.1 /home/drk/tillage/datasets/human/rna/recount3/pituitary/GTEX-145LS-3226-SM-5TDCA.1/coverage.w5 768 384 0.01 sum_sqrt 7581 RNA:pituitary +7582 GTEX-1I1GR-3026-SM-ARU7C.1 /home/drk/tillage/datasets/human/rna/recount3/pituitary/GTEX-1I1GR-3026-SM-ARU7C.1/coverage.w5 768 384 0.01 sum_sqrt 7582 RNA:pituitary +7583 GTEX-11P82-1126-SM-5BC5K.1 /home/drk/tillage/datasets/human/rna/recount3/prostate/GTEX-11P82-1126-SM-5BC5K.1/coverage.w5 768 384 0.01 sum_sqrt 7583 RNA:prostate +7584 GTEX-1KXAM-1126-SM-E9TJU.1 /home/drk/tillage/datasets/human/rna/recount3/prostate/GTEX-1KXAM-1126-SM-E9TJU.1/coverage.w5 768 384 0.01 sum_sqrt 7584 RNA:prostate +7585 GTEX-P4PQ-2026-SM-E9U4E.1 /home/drk/tillage/datasets/human/rna/recount3/prostate/GTEX-P4PQ-2026-SM-E9U4E.1/coverage.w5 768 384 0.01 sum_sqrt 7585 RNA:prostate +7586 GTEX-1J8QM-1826-SM-A96TA.1 /home/drk/tillage/datasets/human/rna/recount3/salivary_gland/GTEX-1J8QM-1826-SM-A96TA.1/coverage.w5 768 384 0.01 sum_sqrt 7586 RNA:salivary_gland +7587 GTEX-YB5K-1726-SM-5IFIZ.1 /home/drk/tillage/datasets/human/rna/recount3/salivary_gland/GTEX-YB5K-1726-SM-5IFIZ.1/coverage.w5 768 384 0.01 sum_sqrt 7587 RNA:salivary_gland +7588 GTEX-ZLWG-2026-SM-DO11F.1 /home/drk/tillage/datasets/human/rna/recount3/salivary_gland/GTEX-ZLWG-2026-SM-DO11F.1/coverage.w5 768 384 0.01 sum_sqrt 7588 RNA:salivary_gland +7589 GTEX-133LE-2326-SM-5K7W3.1 /home/drk/tillage/datasets/human/rna/recount3/skin/GTEX-133LE-2326-SM-5K7W3.1/coverage.w5 768 384 0.01 sum_sqrt 7589 RNA:skin +7590 GTEX-139TS-0008-SM-62LDG.1 /home/drk/tillage/datasets/human/rna/recount3/skin/GTEX-139TS-0008-SM-62LDG.1/coverage.w5 768 384 0.01 sum_sqrt 7590 RNA:skin +7591 GTEX-13U4I-0126-SM-5LU38.1 /home/drk/tillage/datasets/human/rna/recount3/skin/GTEX-13U4I-0126-SM-5LU38.1/coverage.w5 768 384 0.01 sum_sqrt 7591 RNA:skin +7592 GTEX-14PJN-0826-SM-686ZL.1 /home/drk/tillage/datasets/human/rna/recount3/small_intestine/GTEX-14PJN-0826-SM-686ZL.1/coverage.w5 768 384 0.01 sum_sqrt 7592 RNA:small_intestine +7593 GTEX-1PDJ9-1826-SM-E9U66.1 /home/drk/tillage/datasets/human/rna/recount3/small_intestine/GTEX-1PDJ9-1826-SM-E9U66.1/coverage.w5 768 384 0.01 sum_sqrt 7593 RNA:small_intestine +7594 GTEX-ZF2S-1726-SM-57WFT.1 /home/drk/tillage/datasets/human/rna/recount3/small_intestine/GTEX-ZF2S-1726-SM-57WFT.1/coverage.w5 768 384 0.01 sum_sqrt 7594 RNA:small_intestine +7595 GTEX-14PJ4-0526-SM-6871G.1 /home/drk/tillage/datasets/human/rna/recount3/spleen/GTEX-14PJ4-0526-SM-6871G.1/coverage.w5 768 384 0.01 sum_sqrt 7595 RNA:spleen +7596 GTEX-15DYW-1426-SM-6LPIW.1 /home/drk/tillage/datasets/human/rna/recount3/spleen/GTEX-15DYW-1426-SM-6LPIW.1/coverage.w5 768 384 0.01 sum_sqrt 7596 RNA:spleen +7597 GTEX-1F75I-0226-SM-7MKHQ.1 /home/drk/tillage/datasets/human/rna/recount3/spleen/GTEX-1F75I-0226-SM-7MKHQ.1/coverage.w5 768 384 0.01 sum_sqrt 7597 RNA:spleen +7598 GTEX-1399U-1626-SM-5P9J3.1 /home/drk/tillage/datasets/human/rna/recount3/stomach/GTEX-1399U-1626-SM-5P9J3.1/coverage.w5 768 384 0.01 sum_sqrt 7598 RNA:stomach +7599 GTEX-14PKV-1826-SM-69LO9.1 /home/drk/tillage/datasets/human/rna/recount3/stomach/GTEX-14PKV-1826-SM-69LO9.1/coverage.w5 768 384 0.01 sum_sqrt 7599 RNA:stomach +7600 GTEX-1IDJU-1526-SM-CNNR3.1 /home/drk/tillage/datasets/human/rna/recount3/stomach/GTEX-1IDJU-1526-SM-CNNR3.1/coverage.w5 768 384 0.01 sum_sqrt 7600 RNA:stomach +7601 GTEX-1KD5A-1826-SM-DHXJI.1 /home/drk/tillage/datasets/human/rna/recount3/testis/GTEX-1KD5A-1826-SM-DHXJI.1/coverage.w5 768 384 0.01 sum_sqrt 7601 RNA:testis +7602 GTEX-1MA7X-1526-SM-DHXJF.1 /home/drk/tillage/datasets/human/rna/recount3/testis/GTEX-1MA7X-1526-SM-DHXJF.1/coverage.w5 768 384 0.01 sum_sqrt 7602 RNA:testis +7603 GTEX-13FH7-0126-SM-5KLZ1.1 /home/drk/tillage/datasets/human/rna/recount3/thyroid/GTEX-13FH7-0126-SM-5KLZ1.1/coverage.w5 768 384 0.01 sum_sqrt 7603 RNA:thyroid +7604 GTEX-15CHQ-0826-SM-69LOT.1 /home/drk/tillage/datasets/human/rna/recount3/thyroid/GTEX-15CHQ-0826-SM-69LOT.1/coverage.w5 768 384 0.01 sum_sqrt 7604 RNA:thyroid +7605 GTEX-1J1OQ-0526-SM-A9G2P.1 /home/drk/tillage/datasets/human/rna/recount3/thyroid/GTEX-1J1OQ-0526-SM-A9G2P.1/coverage.w5 768 384 0.01 sum_sqrt 7605 RNA:thyroid +7606 GTEX-13FTX-1026-SM-5J2O5.1 /home/drk/tillage/datasets/human/rna/recount3/uterus/GTEX-13FTX-1026-SM-5J2O5.1/coverage.w5 768 384 0.01 sum_sqrt 7606 RNA:uterus +7607 GTEX-1MA7W-1526-SM-DHXKS.1 /home/drk/tillage/datasets/human/rna/recount3/uterus/GTEX-1MA7W-1526-SM-DHXKS.1/coverage.w5 768 384 0.01 sum_sqrt 7607 RNA:uterus +7608 GTEX-11EMC-1926-SM-5A5JU.1 /home/drk/tillage/datasets/human/rna/recount3/vagina/GTEX-11EMC-1926-SM-5A5JU.1/coverage.w5 768 384 0.01 sum_sqrt 7608 RNA:vagina +7609 GTEX-12WSB-2426-SM-5EGJC.1 /home/drk/tillage/datasets/human/rna/recount3/vagina/GTEX-12WSB-2426-SM-5EGJC.1/coverage.w5 768 384 0.01 sum_sqrt 7609 RNA:vagina +7610 GTEX-W5WG-1026-SM-4LMIF.1 /home/drk/tillage/datasets/human/rna/recount3/vagina/GTEX-W5WG-1026-SM-4LMIF.1/coverage.w5 768 384 0.01 sum_sqrt 7610 RNA:vagina diff --git a/pyproject.toml b/pyproject.toml index 52f16d5..bd28622 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,27 +1,50 @@ [build-system] -requires = [ - "setuptools>=45", - "wheel", - "setuptools_scm>=6.2" -] +requires = ["setuptools>=69.0.3", "setuptools_scm>=8.0.4"] build-backend = "setuptools.build_meta" [project] name = "borzoi" +description = "borzoi" authors = [ {name = "David Kelley", email = "drk@calicolabs.com"}, {name = "Johannes Linder", email = "jlinder@calicolabs.com"} ] readme = "README.md" -requires-python = ">=3.8, <3.11" classifiers = ["License :: OSI Approved :: Apache License"] -dynamic = ["version", "description"] +dynamic = ["version"] + +requires-python = ">=3.9" +dependencies = [ + "h5py~=3.10.0", + "intervaltree~=3.1.0", + "joblib~=1.1.1", + "matplotlib~=3.7.1", + "google-cloud-storage~=2.0.0", + "natsort~=7.1.1", + "networkx~=2.8.4", + "numpy~=1.24.3", + "pandas~=1.5.3", + "pybigwig~=0.3.18", + "pybedtools~=0.10.0", + "pysam~=0.22.0", + "qnorm~=0.8.1", + "seaborn~=0.12.2", + "scikit-learn~=1.2.2", + "scipy~=1.9.1", + "tensorflow~=2.15.0", + "tqdm~=4.65.0", + "pyfaidx~=0.7.1", + "pyranges~=0.0.129", +] [project.optional-dependencies] dev = [ - "black==22.3.0", - "pytest==7.1.2" + "black~=23.12.1", + "pytest~=7.4.4", + "ruff~=0.1.11", ] -[tool.setuptools_scm] +[project.urls] +Homepage = "https://github.com/calico/borzoi" +[tool.setuptools_scm] \ No newline at end of file diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 5c596bb..0000000 --- a/setup.cfg +++ /dev/null @@ -1,43 +0,0 @@ -[metadata] -name = borzoi -author = David Kelley -author_email = drk@calicolabs.com -description = Machine learning methods for DNA sequence analysis. -long_description = file: README.md -long_description_content_type = text/markdown -url = https://github.com/calico/borzoi -project_urls = - Bug Tracker = https://github.com/calico/borzoi/issues -classifiers = - Programming Language :: Python :: 3 - License :: OSI Approved :: Apache License - Operating System :: OS Independent - -[options] -package_dir = - = src -packages = find: -python_requires = >=3.8, <3.11 -install_requires = - h5py~=3.7.0 - intervaltree~=3.1.0 - matplotlib~=3.7.1 - numpy~=1.24.3 - pandas~=1.5.3 - pybigwig~=0.3.18 - pysam~=0.21.0 - pybedtools~=0.9.0 - seaborn~=0.12.2 - scikit-learn~=1.2.2 - scipy~=1.9.1 - tensorflow~=2.12.0 - pyfaidx~=0.7.1 -# baskerville==0.0.1 - -[options.extras_require] -dev = - black==22.3.0 - pytest==7.1.2 - -[options.packages.find] -where = src diff --git a/src/scripts/borzoi_bench_crispr.py b/src/scripts/_archive/borzoi_bench_crispr.py similarity index 100% rename from src/scripts/borzoi_bench_crispr.py rename to src/scripts/_archive/borzoi_bench_crispr.py diff --git a/src/scripts/borzoi_bench_crispr_folds.py b/src/scripts/_archive/borzoi_bench_crispr_folds.py similarity index 100% rename from src/scripts/borzoi_bench_crispr_folds.py rename to src/scripts/_archive/borzoi_bench_crispr_folds.py diff --git a/src/scripts/borzoi_bench_flowfish_folds.py b/src/scripts/_archive/borzoi_bench_flowfish_folds.py similarity index 100% rename from src/scripts/borzoi_bench_flowfish_folds.py rename to src/scripts/_archive/borzoi_bench_flowfish_folds.py diff --git a/src/scripts/borzoi_bench_gasperini_folds.py b/src/scripts/_archive/borzoi_bench_gasperini_folds.py similarity index 100% rename from src/scripts/borzoi_bench_gasperini_folds.py rename to src/scripts/_archive/borzoi_bench_gasperini_folds.py diff --git a/src/scripts/_archive/borzoi_satg_gene.py b/src/scripts/_archive/borzoi_satg_gene.py new file mode 100755 index 0000000..eb3c5c6 --- /dev/null +++ b/src/scripts/_archive/borzoi_satg_gene.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python +# Copyright 2022 Calico LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========================================================================= +from optparse import OptionParser + +import gc +import json +import os +import pickle + +import h5py +import numpy as np +import pandas as pd +import pysam + +from baskerville.dataset import targets_prep_strand +from baskerville import dna_io +from baskerville import gene as bgene +from baskerville import seqnn + +""" +borzoi_satg_gene.py + +Perform a gradient saliency analysis for genes specified in a GTF file. +""" + +################################################################################ +# main +################################################################################ +def main(): + usage = "usage: %prog [options] " + parser = OptionParser(usage) + parser.add_option( + "-f", + dest="genome_fasta", + default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + help="Genome FASTA for sequences [Default: %default]", + ) + parser.add_option( + "-o", + dest="out_dir", + default="satg_out", + help="Output directory [Default: %default]", + ) + parser.add_option( + "-p", + dest="processes", + default=None, + type="int", + help="Number of processes, passed by multi script", + ) + parser.add_option( + "--rc", + dest="rc", + default=False, + action="store_true", + help="Ensemble forward and reverse complement predictions [Default: %default]", + ) + parser.add_option( + "--shifts", + dest="shifts", + default="0", + type="str", + help="Ensemble prediction shifts [Default: %default]", + ) + parser.add_option( + "--span", + dest="span", + default=False, + action="store_true", + help="Aggregate entire gene span [Default: %default]", + ) + parser.add_option( + "--sum", + dest="sum_targets", + default=False, + action="store_true", + help="Sum targets for single output [Default: %default]", + ) + parser.add_option( + "-t", + dest="targets_file", + default=None, + type="str", + help="File specifying target indexes and labels in table format", + ) + (options, args) = parser.parse_args() + + if len(args) == 3: + # single worker + params_file = args[0] + model_file = args[1] + genes_gtf_file = args[2] + + elif len(args) == 4: + # master script + options_pkl_file = args[0] + params_file = args[1] + model_file = args[2] + genes_gtf_file = args[3] + + # load options + options_pkl = open(options_pkl_file, "rb") + options = pickle.load(options_pkl) + options_pkl.close() + + elif len(args) == 5: + # multi worker + options_pkl_file = args[0] + params_file = args[1] + model_file = args[2] + genes_gtf_file = args[3] + worker_index = int(args[4]) + + # load options + options_pkl = open(options_pkl_file, "rb") + options = pickle.load(options_pkl) + options_pkl.close() + + # update output directory + options.out_dir = "%s/job%d" % (options.out_dir, worker_index) + + else: + parser.error("Must provide parameter and model files and BED file") + + if not os.path.isdir(options.out_dir): + os.mkdir(options.out_dir) + + options.shifts = [int(shift) for shift in options.shifts.split(",")] + + ################################################################# + # read parameters and targets + + # read model parameters + with open(params_file) as params_open: + params = json.load(params_open) + params_model = params["model"] + params_train = params["train"] + seq_len = params_model["seq_length"] + + if options.targets_file is None: + parser.error("Must provide targets table to properly handle strands.") + else: + targets_df = pd.read_csv(options.targets_file, sep="\t", index_col=0) + + # prep strand + orig_new_index = dict(zip(targets_df.index, np.arange(targets_df.shape[0]))) + targets_strand_pair = np.array( + [orig_new_index[ti] for ti in targets_df.strand_pair] + ) + targets_strand_df = targets_prep_strand(targets_df) + num_targets = targets_strand_df.shape[0] + if options.sum_targets: + num_targets = 1 + + # params strand_pair unnecessary because I'm not building ensemble in graph + + ################################################################# + # setup model + + seqnn_model = seqnn.SeqNN(params_model) + seqnn_model.restore(model_file) + seqnn_model.build_slice(targets_df.index, options.sum_targets) + + model_stride = seqnn_model.model_strides[0] + model_crop = seqnn_model.target_crops[0] + target_length = seqnn_model.target_lengths[0] + + ################################################################# + # read genes + + # parse GTF + transcriptome = bgene.Transcriptome(genes_gtf_file) + + # order valid genes + genome_open = pysam.Fastafile(options.genome_fasta) + gene_list = sorted(transcriptome.genes.keys()) + num_genes = len(gene_list) + + # filter for worker genes + if options.processes is not None: + # determine boundaries + worker_bounds = np.linspace(0, num_genes, options.processes + 1, dtype="int") + worker_start = worker_bounds[worker_index] + worker_end = worker_bounds[worker_index + 1] + gene_list = [gene_list[gi] for gi in range(worker_start, worker_end)] + num_genes = len(gene_list) + + ################################################################# + # setup output + + min_start = -model_stride * model_crop + + # choose gene sequences + genes_chr = [] + genes_start = [] + genes_end = [] + genes_strand = [] + for gene_id in gene_list: + gene = transcriptome.genes[gene_id] + genes_chr.append(gene.chrom) + genes_strand.append(gene.strand) + + gene_midpoint = gene.midpoint() + gene_start = max(min_start, gene_midpoint - seq_len // 2) + gene_end = gene_start + seq_len + genes_start.append(gene_start) + genes_end.append(gene_end) + + # initialize HDF5 + scores_h5_file = "%s/scores.h5" % options.out_dir + if os.path.isfile(scores_h5_file): + os.remove(scores_h5_file) + scores_h5 = h5py.File(scores_h5_file, "w") + scores_h5.create_dataset("seqs", dtype="bool", shape=(num_genes, seq_len, 4)) + scores_h5.create_dataset( + "grads", dtype="float16", shape=(num_genes, seq_len, 4, num_targets) + ) + scores_h5.create_dataset("gene", data=np.array(gene_list, dtype="S")) + scores_h5.create_dataset("chr", data=np.array(genes_chr, dtype="S")) + scores_h5.create_dataset("start", data=np.array(genes_start)) + scores_h5.create_dataset("end", data=np.array(genes_end)) + scores_h5.create_dataset("strand", data=np.array(genes_strand, dtype="S")) + + ################################################################# + # predict scores, write output + + for gi, gene_id in enumerate(gene_list): + print("Predicting %d, %s" % (gi, gene_id), flush=True) + gene = transcriptome.genes[gene_id] + + # make sequence + seq_1hot = make_seq_1hot( + genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len + ) + + # determine output sequence start + seq_out_start = genes_start[gi] + model_stride * model_crop + seq_out_len = model_stride * target_length + + # determine output positions + gene_slice = gene.output_slice( + seq_out_start, seq_out_len, model_stride, options.span + ) + if options.rc: + gene_slice_rc = target_length - gene_slice - 1 + + if len(gene_slice) == 0: + print("WARNING: %s no gene positions found." % gene_id) + grads = np.zeros((seq_len, 4, num_targets), dtype="float16") + + else: + grads_ens = [] + for shift in options.shifts: + seq_1hot_aug = dna_io.hot1_augment(seq_1hot, shift=shift) + grads_aug = seqnn_model.gradients(seq_1hot_aug, pos_slice=gene_slice) + grads_aug = unaugment_grads(grads_aug, fwdrc=True, shift=shift) + grads_ens.append(grads_aug) + + if options.rc: + seq_1hot_aug = dna_io.hot1_rc(seq_1hot_aug) + grads_aug = seqnn_model.gradients( + seq_1hot_aug, pos_slice=gene_slice_rc + ) + grads_aug = unaugment_grads(grads_aug, fwdrc=False, shift=shift) + grads_aug = grads_aug[..., targets_strand_pair] + grads_ens.append(grads_aug) + + # ensemble mean + grads = np.array(grads_ens).mean(axis=0) + + # slice relevant strand targets + if genes_strand[gi] == "+": + gene_strand_mask = targets_df.strand != "-" + else: + gene_strand_mask = targets_df.strand != "+" + grads = grads[..., gene_strand_mask] + + # write to HDF5 + scores_h5["seqs"][gi] = seq_1hot + scores_h5["grads"][gi] = grads + + gc.collect() + + # close files + genome_open.close() + scores_h5.close() + + +def unaugment_grads(grads, fwdrc=False, shift=0): + """Undo sequence augmentation.""" + # reverse complement + if not fwdrc: + # reverse + grads = grads[::-1, :, :] + + # swap A and T + grads[:, [0, 3], :] = grads[:, [3, 0], :] + + # swap C and G + grads[:, [1, 2], :] = grads[:, [2, 1], :] + + # undo shift + if shift < 0: + # shift sequence right + grads[-shift:, :, :] = grads[:shift, :, :] + + # fill in left unknowns + grads[:-shift, :, :] = 0 + + elif shift > 0: + # shift sequence left + grads[:-shift, :, :] = grads[shift:, :, :] + + # fill in right unknowns + grads[-shift:, :, :] = 0 + + return grads + + +def make_seq_1hot(genome_open, chrm, start, end, seq_len): + if start < 0: + seq_dna = "N" * (-start) + genome_open.fetch(chrm, 0, end) + else: + seq_dna = genome_open.fetch(chrm, start, end) + + # extend to full length + if len(seq_dna) < seq_len: + seq_dna += "N" * (seq_len - len(seq_dna)) + + seq_1hot = dna_io.dna_1hot(seq_dna) + return seq_1hot + + +################################################################################ +# __main__ +################################################################################ +if __name__ == "__main__": + main() diff --git a/src/scripts/borzoi_satg_gene_multi.py b/src/scripts/_archive/borzoi_satg_gene_multi.py similarity index 100% rename from src/scripts/borzoi_satg_gene_multi.py rename to src/scripts/_archive/borzoi_satg_gene_multi.py diff --git a/src/scripts/basenji_bench_classify.py b/src/scripts/borzoi_bench_classify.py old mode 100644 new mode 100755 similarity index 99% rename from src/scripts/basenji_bench_classify.py rename to src/scripts/borzoi_bench_classify.py index 5df06e9..2e8a5a4 --- a/src/scripts/basenji_bench_classify.py +++ b/src/scripts/borzoi_bench_classify.py @@ -17,7 +17,7 @@ import seaborn as sns ''' -basenji_bench_classify.py +borzoi_bench_classify.py ''' ################################################################################ diff --git a/src/scripts/basenji_bench_gtex_folds.py b/src/scripts/borzoi_bench_gtex_folds_sad.py old mode 100644 new mode 100755 similarity index 96% rename from src/scripts/basenji_bench_gtex_folds.py rename to src/scripts/borzoi_bench_gtex_folds_sad.py index 4ea5110..17211d7 --- a/src/scripts/basenji_bench_gtex_folds.py +++ b/src/scripts/borzoi_bench_gtex_folds_sad.py @@ -29,9 +29,9 @@ import slurm """ -basenji_bench_gtex_folds.py +borzoi_bench_gtex_folds_sad.py -Benchmark Basenji model replicates on GTEx eQTL classification task. +Benchmark Borzoi model replicates on GTEx eQTL classification task (gene-agnostic). """ ################################################################################ @@ -42,11 +42,11 @@ def main(): parser = OptionParser(usage) # sad options - sad_options = OptionGroup(parser, 'basenji_sad.py options') + sad_options = OptionGroup(parser, 'borzoi_sad.py options') sad_options.add_option( '-f', dest='genome_fasta', - default='%s/assembly/ucsc/hg38.fa' % os.environ['HG38'], + default='%s/assembly/ucsc/hg38.fa' % os.environ.get('BORZOI_HG38', 'hg38'), help='Genome FASTA for sequences [Default: %default]' ) sad_options.add_option( @@ -117,7 +117,7 @@ def main(): parser.add_option_group(sad_options) # classify - class_options = OptionGroup(parser, 'basenji_bench_classify.py options') + class_options = OptionGroup(parser, 'borzoi_bench_classify.py options') class_options.add_option( '--cn', dest='class_name', @@ -179,6 +179,11 @@ def main(): dest='gtex_vcf_dir', default='/home/drk/seqnn/data/gtex_fine/susie_pip90' ) + fold_options.add_option( + '--susie', + dest='susie_dir', + default='/home/drk/seqnn/data/gtex_fine/tissues_susie', + ) fold_options.add_option( '--name', dest='name', @@ -243,8 +248,8 @@ def main(): # SAD # SAD command base - cmd_base = '. /home/jlinder/anaconda3/etc/profile.d/conda.sh;' - cmd_base += ' conda activate %s;' % options.conda_env + cmd_base = ('. %s; ' % os.environ['BORZOI_CONDA']) if 'BORZOI_CONDA' in os.environ else '' + cmd_base += 'conda activate %s;' % options.conda_env cmd_base += ' echo $HOSTNAME;' jobs = [] @@ -275,7 +280,7 @@ def main(): options_pkl.close() # create base fold command - cmd_fold = '%s time basenji_sad.py %s %s %s' % ( + cmd_fold = '%s time borzoi_sad.py %s %s %s' % ( cmd_base, options_pkl_file, params_file, model_file) for pi in range(options.processes): @@ -302,7 +307,7 @@ def main(): options_pkl.close() # create base fold command - cmd_fold = '%s time basenji_sad.py %s %s %s' % ( + cmd_fold = '%s time borzoi_sad.py %s %s %s' % ( cmd_base, options_pkl_file, params_file, model_file) for pi in range(options.processes): @@ -399,7 +404,7 @@ def main(): # fit classifiers ################################################################ - cmd_base = 'basenji_bench_classify.py -i 100 -p 2 -r 44 -s' + cmd_base = 'borzoi_bench_classify.py -i 100 -p 2 -r 44 -s' cmd_base += ' --msl %d' % options.msl if options.class_targets_file is not None: @@ -451,7 +456,7 @@ def main(): ################################################################ # coefficient analysis - cmd_base = 'basenji_gtex_coef.py -g %s' % options.gtex_vcf_dir + cmd_base = 'borzoi_gtex_coef_sad.py -g %s --susie %s' % (options.gtex_vcf_dir, options.susie_dir) jobs = [] for ci in range(options.crosses): diff --git a/src/scripts/borzoi_bench_gtex_folds.py b/src/scripts/borzoi_bench_gtex_folds_sed.py old mode 100644 new mode 100755 similarity index 97% rename from src/scripts/borzoi_bench_gtex_folds.py rename to src/scripts/borzoi_bench_gtex_folds_sed.py index a3f3c81..c762b28 --- a/src/scripts/borzoi_bench_gtex_folds.py +++ b/src/scripts/borzoi_bench_gtex_folds_sed.py @@ -29,9 +29,9 @@ import slurm """ -borzoi_bench_gtex_folds.py +borzoi_bench_gtex_folds_sed.py -Benchmark Borzoi model replicates on GTEx eQTL coefficient task. +Benchmark Borzoi model replicates on GTEx eQTL coefficient task (gene-specific). """ ################################################################################ @@ -53,13 +53,13 @@ def main(): sed_options.add_option( '-f', dest='genome_fasta', - default='%s/assembly/ucsc/hg38.fa' % os.environ['HG38'], + default='%s/assembly/ucsc/hg38.fa' % os.environ.get('BORZOI_HG38', 'hg38'), help='Genome FASTA for sequences [Default: %default]', ) sed_options.add_option( '-g', dest='genes_gtf', - default='%s/genes/gencode41/gencode41_basic_nort.gtf' % os.environ['HG38'], + default='%s/genes/gencode41/gencode41_basic_nort.gtf' % os.environ.get('BORZOI_HG38', 'hg38'), help='GTF for gene definition [Default %default]', ) sed_options.add_option( @@ -156,6 +156,11 @@ def main(): dest='gtex_vcf_dir', default='/home/drk/seqnn/data/gtex_fine/susie_pip90', ) + fold_options.add_option( + '--susie', + dest='susie_dir', + default='/home/drk/seqnn/data/gtex_fine/tissues_susie', + ) fold_options.add_option( '--name', dest='name', @@ -220,8 +225,8 @@ def main(): # SED # SED command base - cmd_base = '. /home/drk/anaconda3/etc/profile.d/conda.sh;' - cmd_base += ' conda activate %s;' % options.conda_env + cmd_base = ('. %s; ' % os.environ['BORZOI_CONDA']) if 'BORZOI_CONDA' in os.environ else '' + cmd_base += 'conda activate %s;' % options.conda_env cmd_base += ' echo $HOSTNAME;' jobs = [] @@ -376,7 +381,7 @@ def main(): ################################################################ # coefficient analysis - cmd_base = 'borzoi_gtex_coef.py -g %s' % options.gtex_vcf_dir + cmd_base = 'borzoi_gtex_coef_sed.py -g %s --susie %s' % (options.gtex_vcf_dir, options.susie_dir) jobs = [] for ci in range(options.crosses): diff --git a/src/scripts/borzoi_bench_ipaqtl_folds.py b/src/scripts/borzoi_bench_ipaqtl_folds.py index 9ddc9af..b7efb66 100755 --- a/src/scripts/borzoi_bench_ipaqtl_folds.py +++ b/src/scripts/borzoi_bench_ipaqtl_folds.py @@ -41,19 +41,20 @@ def main(): sed_options.add_option( "-f", dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) sed_options.add_option( "-g", dest="genes_gtf", - default="%s/genes/gencode41/gencode41_basic_nort.gtf" % os.environ["HG38"], + default="%s/genes/gencode41/gencode41_basic_nort.gtf" % os.environ.get('BORZOI_HG38', 'hg38'), help="GTF for gene definition [Default %default]", ) sed_options.add_option( "--apafile", dest="apa_file", - default="polyadb_human_v3.csv.gz" + default="%s/genes/polyadb/polyadb_human_v3.csv.gz" % os.environ.get('BORZOI_HG38', 'hg38'), + help="Csv for polya site definition [Default %default]", ) sed_options.add_option( "-o", @@ -236,8 +237,8 @@ def main(): # SNP scores # command base - cmd_base = ". /home/drk/anaconda3/etc/profile.d/conda.sh;" - cmd_base += " conda activate %s;" % options.conda_env + cmd_base = ('. %s; ' % os.environ['BORZOI_CONDA']) if 'BORZOI_CONDA' in os.environ else '' + cmd_base += "conda activate %s;" % options.conda_env cmd_base += " echo $HOSTNAME;" jobs = [] @@ -370,7 +371,7 @@ def main(): cmd_base += " conda activate %s;" % options.conda_env cmd_base += " echo $HOSTNAME;" - cmd_base += " basenji_bench_classify.py -i 100 -p 2 -r 44 -s --stat COVR" + cmd_base += " borzoi_bench_classify.py -i 100 -p 2 -r 44 -s --stat COVR" cmd_base += " --msl %d" % options.msl jobs = [] diff --git a/src/scripts/borzoi_bench_paqtl_folds.py b/src/scripts/borzoi_bench_paqtl_folds.py index 9fc97c5..46697ff 100755 --- a/src/scripts/borzoi_bench_paqtl_folds.py +++ b/src/scripts/borzoi_bench_paqtl_folds.py @@ -41,19 +41,20 @@ def main(): sed_options.add_option( "-f", dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) sed_options.add_option( "-g", dest="genes_gtf", - default="%s/genes/gencode41/gencode41_basic_nort.gtf" % os.environ["HG38"], + default="%s/genes/gencode41/gencode41_basic_nort.gtf" % os.environ.get('BORZOI_HG38', 'hg38'), help="GTF for gene definition [Default %default]", ) sed_options.add_option( "--apafile", dest="apa_file", - default="polyadb_human_v3.csv.gz" + default="%s/genes/polyadb/polyadb_human_v3.csv.gz" % os.environ.get('BORZOI_HG38', 'hg38'), + help="Csv for polya site definition [Default %default]", ) sed_options.add_option( "-o", @@ -236,8 +237,8 @@ def main(): # SNP scores # command base - cmd_base = ". /home/drk/anaconda3/etc/profile.d/conda.sh;" - cmd_base += " conda activate %s;" % options.conda_env + cmd_base = ('. %s; ' % os.environ['BORZOI_CONDA']) if 'BORZOI_CONDA' in os.environ else '' + cmd_base += "conda activate %s;" % options.conda_env cmd_base += " echo $HOSTNAME;" jobs = [] @@ -370,7 +371,7 @@ def main(): cmd_base += " conda activate %s;" % options.conda_env cmd_base += " echo $HOSTNAME;" - cmd_base += " basenji_bench_classify.py -i 100 -p 2 -r 44 -s --stat COVR" + cmd_base += " borzoi_bench_classify.py -i 100 -p 2 -r 44 -s --stat COVR" cmd_base += " --msl %d" % options.msl jobs = [] diff --git a/src/scripts/borzoi_bench_sqtl_folds.py b/src/scripts/borzoi_bench_sqtl_folds.py index 6c74788..90d1237 100755 --- a/src/scripts/borzoi_bench_sqtl_folds.py +++ b/src/scripts/borzoi_bench_sqtl_folds.py @@ -41,14 +41,14 @@ def main(): sed_options.add_option( "-f", dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) sed_options.add_option( "-g", dest="genes_gtf", - default="%s/genes/gencode41/gencode41_basic_nort_protein.gtf" - % os.environ["HG38"], + default="%s/genes/gencode41/gencode41_basic_nort.gtf" + % os.environ.get('BORZOI_HG38', 'hg38'), help="GTF for gene definition [Default %default]", ) sed_options.add_option( @@ -223,8 +223,8 @@ def main(): # SNP scores # command base - cmd_base = ". /home/drk/anaconda3/etc/profile.d/conda.sh;" - cmd_base += " conda activate %s;" % options.conda_env + cmd_base = ('. %s; ' % os.environ['BORZOI_CONDA']) if 'BORZOI_CONDA' in os.environ else '' + cmd_base += "conda activate %s;" % options.conda_env cmd_base += " echo $HOSTNAME;" jobs = [] @@ -360,7 +360,7 @@ def main(): cmd_base += " conda activate %s;" % options.conda_env cmd_base += " echo $HOSTNAME;" - cmd_base += " basenji_bench_classify.py -i 100 -p 2 -r 44 -s --stat nDi" + cmd_base += " borzoi_bench_classify.py -i 100 -p 2 -r 44 -s --stat nDi" cmd_base += " --msl %d" % options.msl jobs = [] diff --git a/src/scripts/borzoi_bench_trip_folds.py b/src/scripts/borzoi_bench_trip_folds.py index 7a8e3ca..bcc2dd6 100755 --- a/src/scripts/borzoi_bench_trip_folds.py +++ b/src/scripts/borzoi_bench_trip_folds.py @@ -36,7 +36,7 @@ def main(): trip_options.add_option( "-f", dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) trip_options.add_option( @@ -184,8 +184,8 @@ def main(): # TRIP prediction jobs # command base - cmd_base = ". /home/drk/anaconda3/etc/profile.d/conda.sh;" - cmd_base += " conda activate %s;" % options.conda_env + cmd_base = ('. %s; ' % os.environ['BORZOI_CONDA']) if 'BORZOI_CONDA' in os.environ else '' + cmd_base += "conda activate %s;" % options.conda_env cmd_base += " echo $HOSTNAME;" jobs = [] diff --git a/src/scripts/basenji_gtex_coef.py b/src/scripts/borzoi_gtex_coef_sad.py old mode 100644 new mode 100755 similarity index 96% rename from src/scripts/basenji_gtex_coef.py rename to src/scripts/borzoi_gtex_coef_sad.py index 264c750..64ee40a --- a/src/scripts/basenji_gtex_coef.py +++ b/src/scripts/borzoi_gtex_coef_sad.py @@ -15,7 +15,7 @@ import seaborn as sns ''' -basenji_gtex_coef.py +borzoi_gtex_coef_sad.py Evaluate concordance of variant effect prediction sign classifcation and coefficient correlations (gene-agnostic). @@ -37,9 +37,15 @@ def main(): parser.add_option( '-g', dest='gtex_vcf_dir', - default='/home/drk/seqnn/data/gtex_fine/susie_pip90', + default='susie_pip90', help='GTEx VCF directory' ) + parser.add_option( + '--susie', + dest='susie_dir', + default='susie_pip90', + help='SuSiE directory' + ) parser.add_option( '-m', dest='min_variants', @@ -114,7 +120,7 @@ def main(): if options.verbose: print(tissue) # read causal variants - eqtl_df = read_eqtl(tissue, options.gtex_vcf_dir) + eqtl_df = read_eqtl(tissue, options.gtex_vcf_dir, susie_dir=options.susie_dir) if eqtl_df is not None and eqtl_df.shape[0] > options.min_variants: # read model predictions gtex_scores_file = f'{gtex_dir}/{tissue}_pos/sad.h5' @@ -180,7 +186,7 @@ def main(): print('Class AUROC: %.4f' % np.mean(metrics_df.auroc_class)) -def read_eqtl(tissue: str, gtex_vcf_dir: str, pip_t: float=0.9): +def read_eqtl(tissue: str, gtex_vcf_dir: str, pip_t: float=0.9, susie_dir: str='tissues_susie'): """Reads eQTLs from SUSIE output. Args: @@ -191,7 +197,6 @@ def read_eqtl(tissue: str, gtex_vcf_dir: str, pip_t: float=0.9): Returns: eqtl_df (pd.DataFrame): eQTL dataframe, or None if tissue skipped. """ - susie_dir = '/home/drk/seqnn/data/gtex_fine/tissues_susie' # read causal variants eqtl_file = f'{susie_dir}/{tissue}.tsv' diff --git a/src/scripts/borzoi_gtex_coef.py b/src/scripts/borzoi_gtex_coef_sed.py old mode 100644 new mode 100755 similarity index 97% rename from src/scripts/borzoi_gtex_coef.py rename to src/scripts/borzoi_gtex_coef_sed.py index b44d548..d9a4459 --- a/src/scripts/borzoi_gtex_coef.py +++ b/src/scripts/borzoi_gtex_coef_sed.py @@ -15,7 +15,7 @@ import seaborn as sns ''' -borzoi_gtex_coef.py +borzoi_gtex_coef_sed.py Evaluate concordance of variant effect prediction sign classifcation and coefficient correlations (gene-specific). @@ -37,9 +37,15 @@ def main(): parser.add_option( '-g', dest='gtex_vcf_dir', - default='/home/drk/seqnn/data/gtex_fine/susie_pip90', + default='susie_pip90', help='GTEx VCF directory', ) + parser.add_option( + '--susie', + dest='susie_dir', + default='susie_pip90', + help='SuSiE directory' + ) parser.add_option( '-m', dest='min_variants', @@ -138,7 +144,7 @@ def main(): if options.verbose: print(tissue) # read causal variants - eqtl_df = read_eqtl(tissue, options.gtex_vcf_dir) + eqtl_df = read_eqtl(tissue, options.gtex_vcf_dir, susie_dir=options.susie_dir) if eqtl_df is not None and eqtl_df.shape[0] > options.min_variants: # read model predictions @@ -203,7 +209,7 @@ def main(): print('Class AUROC: %.4f' % np.mean(metrics_df.auroc_class)) -def read_eqtl(tissue: str, gtex_vcf_dir: str, pip_t: float=0.9): +def read_eqtl(tissue: str, gtex_vcf_dir: str, pip_t: float=0.9, susie_dir: str='tissues_susie'): """Reads eQTLs from SUSIE output. Args: @@ -214,7 +220,6 @@ def read_eqtl(tissue: str, gtex_vcf_dir: str, pip_t: float=0.9): Returns: eqtl_df (pd.DataFrame): eQTL dataframe, or None if tissue skipped. """ - susie_dir = '/home/drk/seqnn/data/gtex_fine/tissues_susie' # read causal variants eqtl_file = f'{susie_dir}/{tissue}.tsv' diff --git a/src/scripts/basenji_sad.py b/src/scripts/borzoi_sad.py old mode 100644 new mode 100755 similarity index 99% rename from src/scripts/basenji_sad.py rename to src/scripts/borzoi_sad.py index d832139..2c12782 --- a/src/scripts/basenji_sad.py +++ b/src/scripts/borzoi_sad.py @@ -38,7 +38,7 @@ from baskerville import vcf as bvcf ''' -basenji_sad.py +borzoi_sad.py Compute SNP Activity Difference (SAD) scores for SNPs in a VCF file. ''' @@ -52,7 +52,7 @@ def main(): parser.add_option( '-f', dest='genome_fasta', - default='%s/assembly/ucsc/hg38.fa' % os.environ["HG38"], + default='%s/assembly/ucsc/hg38.fa' % os.environ.get('BORZOI_HG38', 'hg38'), help='Genome FASTA for sequences [Default: %default]' ) parser.add_option( diff --git a/src/scripts/borzoi_sad_folds.py b/src/scripts/borzoi_sad_folds.py new file mode 100755 index 0000000..49f7959 --- /dev/null +++ b/src/scripts/borzoi_sad_folds.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python +# Copyright 2019 Calico LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========================================================================= +from optparse import OptionParser, OptionGroup +import glob +import h5py +import json +import pdb +import os +import sys + +import numpy as np +import pandas as pd + +import slurm + +""" +borzoi_sad_folds.py + +Compute SAD scores across model folds. +""" + +################################################################################ +# main +################################################################################ +def main(): + usage = 'usage: %prog [options] ' + parser = OptionParser(usage) + + # sad + sad_options = OptionGroup(parser, 'borzoi_sad.py options') + sad_options.add_option( + '-f', + dest='genome_fasta', + default='%s/assembly/ucsc/hg38.fa' % os.environ.get('BORZOI_HG38', 'hg38'), + help='Genome FASTA for sequences [Default: %default]', + ) + sad_options.add_option( + '-o', + dest='out_dir', + default='sad', + help='Output directory for tables and plots [Default: %default]' + ) + sad_options.add_option( + '-p', + dest='processes', + default=None, + type='int', + help='Number of processes, passed by multi script' + ) + sad_options.add_option( + '--rc', + dest='rc', + default=False, + action='store_true', + help='Average forward and reverse complement predictions [Default: %default]' + ) + sad_options.add_option( + '--shifts', dest='shifts', + default='0', + type='str', + help='Ensemble prediction shifts [Default: %default]' + ) + sad_options.add_option( + '--stats', + dest='sad_stats', + default='SAD', + help='Comma-separated list of stats to save. [Default: %default]' + ) + sad_options.add_option( + '-t', + dest='targets_file', + default=None, + type='str', + help='File specifying target indexes and labels in table format' + ) + sad_options.add_option( + '-u', + dest='untransform_old', + default=False, + action='store_true', + ) + sad_options.add_option( + '--no_untransform', + dest='no_untransform', + default=False, + action='store_true', + ) + parser.add_option_group(sad_options) + + # cross-fold + fold_options = OptionGroup(parser, 'cross-fold options') + fold_options.add_option( + '-c', + dest='crosses', + default=1, + type='int', + help='Number of cross-fold rounds [Default:%default]', + ) + fold_options.add_option( + '--folds', + dest='fold_subset', + default=1, + type='int', + help='Run a subset of folds [Default:%default]', + ) + fold_options.add_option( + '--f_list', + dest='fold_subset_list', + default=None, + help='Run a subset of folds (encoded as comma-separated string) [Default:%default]', + ) + fold_options.add_option( + '-d', + dest='data_head', + default=None, + type='int', + help='Index for dataset/head [Default: %default]' + ) + fold_options.add_option( + '-e', + dest='conda_env', + default='tf210', + help='Anaconda environment [Default: %default]' + ) + fold_options.add_option( + '--name', + dest='name', + default='sad', + help='SLURM name prefix [Default: %default]' + ) + fold_options.add_option( + '--max_proc', + dest='max_proc', + default=None, + type='int', + help='Maximum concurrent processes [Default: %default]' + ) + fold_options.add_option( + '-q', + dest='queue', + default='geforce', + help='SLURM queue on which to run the jobs [Default: %default]' + ) + fold_options.add_option( + '-r', + dest='restart', + default=False, + action='store_true', + help='Restart a partially completed job [Default: %default]' + ) + fold_options.add_option( + '--vcf', + dest='vcf_file', + default='/home/jlinder/seqnn/data/satmutmpra/satmutmpra_v1.vcf' + ) + parser.add_option_group(fold_options) + + (options, args) = parser.parse_args() + + if len(args) != 2: + parser.error('Must provide parameters file and cross-fold directory') + else: + params_file = args[0] + exp_dir = args[1] + + ####################################################### + # prep work + + # set folds + num_folds = 1 + if options.fold_subset is not None: + num_folds = options.fold_subset + + fold_index = [fold_i for fold_i in range(num_folds)] + + # subset folds (list) + if options.fold_subset_list is not None: + fold_index = [int(fold_str) for fold_str in options.fold_subset_list.split(",")] + + ################################################################ + # SNP scores + + # command base + cmd_base = ('. %s; ' % os.environ['BORZOI_CONDA']) if 'BORZOI_CONDA' in os.environ else '' + cmd_base += 'conda activate %s;' % options.conda_env + cmd_base += ' echo $HOSTNAME;' + + jobs = [] + + for ci in range(options.crosses): + for fi in fold_index: + it_dir = '%s/f%dc%d' % (exp_dir, fi, ci) + name = '%s-f%dc%d' % (options.name, fi, ci) + + # update output directory + it_out_dir = '%s/%s' % (it_dir, options.out_dir) + os.makedirs(it_out_dir, exist_ok=True) + + model_file = '%s/train/model_best.h5' % it_dir + if options.data_head is not None: + model_file = '%s/train/model%d_best.h5' % (it_dir, options.data_head) + + cmd_fold = '%s time borzoi_sad.py %s %s' % (cmd_base, params_file, model_file) + + # variant scoring job + job_out_dir = it_out_dir + if not options.restart or not os.path.isfile('%s/sad.h5'%job_out_dir): + cmd_job = '%s %s' % (cmd_fold, options.vcf_file) + cmd_job += ' %s' % options_string(options, sad_options, job_out_dir) + j = slurm.Job(cmd_job, '%s' % name, + '%s.out'%job_out_dir, '%s.err'%job_out_dir, '%s.sb'%job_out_dir, + queue=options.queue, gpu=1, + mem=45000, time='30-0:0:0') + jobs.append(j) + + slurm.multi_run(jobs, max_proc=options.max_proc, verbose=True, + launch_sleep=10, update_sleep=60) + +def options_string(options, group_options, rep_dir): + options_str = '' + + for opt in group_options.option_list: + opt_str = opt.get_opt_string() + opt_value = options.__dict__[opt.dest] + + # wrap askeriks in "" + if type(opt_value) == str and opt_value.find('*') != -1: + opt_value = '"%s"' % opt_value + + # no value for bools + elif type(opt_value) == bool: + if not opt_value: + opt_str = '' + opt_value = '' + + # skip Nones + elif opt_value is None: + opt_str = '' + opt_value = '' + + # modify + elif opt.dest == 'out_dir': + opt_value = rep_dir + + options_str += ' %s %s' % (opt_str, opt_value) + + return options_str + +################################################################################ +# __main__ +################################################################################ +if __name__ == '__main__': + main() diff --git a/src/scripts/borzoi_satg_gene.py b/src/scripts/borzoi_satg_gene.py index eb3c5c6..1c96712 100755 --- a/src/scripts/borzoi_satg_gene.py +++ b/src/scripts/borzoi_satg_gene.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2022 Calico LLC +# Copyright 2017 Calico LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,20 +13,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # ========================================================================= +from __future__ import print_function + from optparse import OptionParser import gc import json import os -import pickle +import time import h5py import numpy as np import pandas as pd import pysam +import tensorflow as tf from baskerville.dataset import targets_prep_strand -from baskerville import dna_io +from baskerville import dna as dna_io from baskerville import gene as bgene from baskerville import seqnn @@ -38,14 +41,14 @@ ################################################################################ # main -################################################################################ +# ############################################################################### def main(): usage = "usage: %prog [options] " parser = OptionParser(usage) parser.add_option( - "-f", + "--fa", dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) parser.add_option( @@ -54,13 +57,6 @@ def main(): default="satg_out", help="Output directory [Default: %default]", ) - parser.add_option( - "-p", - dest="processes", - default=None, - type="int", - help="Number of processes, passed by multi script", - ) parser.add_option( "--rc", dest="rc", @@ -68,6 +64,27 @@ def main(): action="store_true", help="Ensemble forward and reverse complement predictions [Default: %default]", ) + parser.add_option( + "-f", + dest="folds", + default="0", + type="str", + help="Model folds to use in ensemble (comma-separated list) [Default: %default]", + ) + parser.add_option( + '-c', + dest='crosses', + default="0", + type="str", + help='Model crosses (replicates) to use in ensemble (comma-separated list) [Default:%default]', + ) + parser.add_option( + "--head", + dest="head_i", + default=0, + type="int", + help="Model head index [Default: %default]", + ) parser.add_option( "--shifts", dest="shifts", @@ -83,11 +100,67 @@ def main(): help="Aggregate entire gene span [Default: %default]", ) parser.add_option( - "--sum", - dest="sum_targets", + "--clip_soft", + dest="clip_soft", + default=None, + type="float", + help="Model clip_soft setting [Default: %default]", + ) + parser.add_option( + "--track_scale", + dest="track_scale", + default=0.02, + type="float", + help="Target transform scale [Default: %default]", + ) + parser.add_option( + "--track_transform", + dest="track_transform", + default=0.75, + type="float", + help="Target transform exponent [Default: %default]", + ) + parser.add_option( + "--untransform_old", + dest="untransform_old", + default=False, + action="store_true", + help="Run gradients with old version of inverse transforms [Default: %default]", + ) + parser.add_option( + "--no_untransform", + dest="no_untransform", + default=False, + action="store_true", + help="Run gradients with no inverse transforms [Default: %default]", + ) + parser.add_option( + "--get_preds", + dest="get_preds", default=False, action="store_true", - help="Sum targets for single output [Default: %default]", + help="Store scalar predictions in addition to their gradients [Default: %default]", + ) + parser.add_option( + "--pseudo_qtl", + dest="pseudo_qtl", + default=None, + type="float", + help="Quantile of predicted scalars to choose as pseudo count [Default: %default]", + ) + parser.add_option( + "--pseudo_tissue", + dest="pseudo_tissue", + default=None, + type="str", + help="Tissue to filter genes on when calculating pseudo count [Default: %default]", + ) + parser.add_option( + "--gene_file", + dest="gene_file", + default=None, + type="str", + help="Csv-file of gene metadata [Default: %default]", ) parser.add_option( "-t", @@ -101,43 +174,16 @@ def main(): if len(args) == 3: # single worker params_file = args[0] - model_file = args[1] + model_folder = args[1] genes_gtf_file = args[2] - - elif len(args) == 4: - # master script - options_pkl_file = args[0] - params_file = args[1] - model_file = args[2] - genes_gtf_file = args[3] - - # load options - options_pkl = open(options_pkl_file, "rb") - options = pickle.load(options_pkl) - options_pkl.close() - - elif len(args) == 5: - # multi worker - options_pkl_file = args[0] - params_file = args[1] - model_file = args[2] - genes_gtf_file = args[3] - worker_index = int(args[4]) - - # load options - options_pkl = open(options_pkl_file, "rb") - options = pickle.load(options_pkl) - options_pkl.close() - - # update output directory - options.out_dir = "%s/job%d" % (options.out_dir, worker_index) - else: - parser.error("Must provide parameter and model files and BED file") + parser.error("Must provide parameter file, model folder and GTF file") if not os.path.isdir(options.out_dir): - os.mkdir(options.out_dir) + os.makedirs(options.out_dir, exist_ok=True) + options.folds = [int(fold) for fold in options.folds.split(",")] + options.crosses = [int(cross) for cross in options.crosses.split(",")] options.shifts = [int(shift) for shift in options.shifts.split(",")] ################################################################# @@ -161,18 +207,34 @@ def main(): [orig_new_index[ti] for ti in targets_df.strand_pair] ) targets_strand_df = targets_prep_strand(targets_df) - num_targets = targets_strand_df.shape[0] - if options.sum_targets: - num_targets = 1 + num_targets = 1 + + # Load gene dataframe and select tissue + tissue_genes = None + if options.gene_file is not None and options.pseudo_tissue is not None: + gene_df = pd.read_csv(options.gene_file, sep="\t") + gene_df = ( + gene_df.query("tissue == '" + str(options.pseudo_tissue) + "'") + .copy() + .reset_index(drop=True) + ) + gene_df = gene_df.drop(columns=["Unnamed: 0"]) + + # Get list of gene for tissue + tissue_genes = gene_df["gene_base"].values.tolist() - # params strand_pair unnecessary because I'm not building ensemble in graph + print("len(tissue_genes) = " + str(len(tissue_genes))) ################################################################# - # setup model + # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore(model_file) - seqnn_model.build_slice(targets_df.index, options.sum_targets) + seqnn_model.restore( + model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + options.head_i + ) + seqnn_model.build_slice(targets_df.index, False) + # seqnn_model.build_ensemble(options.rc, options.shifts) model_stride = seqnn_model.model_strides[0] model_crop = seqnn_model.target_crops[0] @@ -189,15 +251,6 @@ def main(): gene_list = sorted(transcriptome.genes.keys()) num_genes = len(gene_list) - # filter for worker genes - if options.processes is not None: - # determine boundaries - worker_bounds = np.linspace(0, num_genes, options.processes + 1, dtype="int") - worker_start = worker_bounds[worker_index] - worker_end = worker_bounds[worker_index + 1] - gene_list = [gene_list[gi] for gi in range(worker_start, worker_end)] - num_genes = len(gene_list) - ################################################################# # setup output @@ -219,80 +272,369 @@ def main(): genes_start.append(gene_start) genes_end.append(gene_end) - # initialize HDF5 - scores_h5_file = "%s/scores.h5" % options.out_dir - if os.path.isfile(scores_h5_file): - os.remove(scores_h5_file) - scores_h5 = h5py.File(scores_h5_file, "w") - scores_h5.create_dataset("seqs", dtype="bool", shape=(num_genes, seq_len, 4)) - scores_h5.create_dataset( - "grads", dtype="float16", shape=(num_genes, seq_len, 4, num_targets) - ) - scores_h5.create_dataset("gene", data=np.array(gene_list, dtype="S")) - scores_h5.create_dataset("chr", data=np.array(genes_chr, dtype="S")) - scores_h5.create_dataset("start", data=np.array(genes_start)) - scores_h5.create_dataset("end", data=np.array(genes_end)) - scores_h5.create_dataset("strand", data=np.array(genes_strand, dtype="S")) - ################################################################# # predict scores, write output - for gi, gene_id in enumerate(gene_list): - print("Predicting %d, %s" % (gi, gene_id), flush=True) - gene = transcriptome.genes[gene_id] - - # make sequence - seq_1hot = make_seq_1hot( - genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len - ) - - # determine output sequence start - seq_out_start = genes_start[gi] + model_stride * model_crop - seq_out_len = model_stride * target_length - - # determine output positions - gene_slice = gene.output_slice( - seq_out_start, seq_out_len, model_stride, options.span - ) - if options.rc: - gene_slice_rc = target_length - gene_slice - 1 - - if len(gene_slice) == 0: - print("WARNING: %s no gene positions found." % gene_id) - grads = np.zeros((seq_len, 4, num_targets), dtype="float16") + buffer_size = 1024 + + print("clip_soft = " + str(options.clip_soft)) + + print("n genes = " + str(len(genes_chr))) + + # loop over folds + for fold_ix in options.folds: + for cross_ix in options.crosses: + + print("-- fold = f" + str(fold_ix) + "c" + str(cross_ix) + " --") + + # (re-)initialize HDF5 + scores_h5_file = "%s/scores_f%dc%d.h5" % (options.out_dir, fold_ix, cross_ix) + if os.path.isfile(scores_h5_file): + os.remove(scores_h5_file) + scores_h5 = h5py.File(scores_h5_file, "w") + scores_h5.create_dataset("seqs", dtype="bool", shape=(num_genes, seq_len, 4)) + scores_h5.create_dataset( + "grads", dtype="float16", shape=(num_genes, seq_len, 4, num_targets) + ) + if options.get_preds: + scores_h5.create_dataset( + "preds", dtype="float32", shape=(num_genes, num_targets) + ) + scores_h5.create_dataset("gene", data=np.array(gene_list, dtype="S")) + scores_h5.create_dataset("chr", data=np.array(genes_chr, dtype="S")) + scores_h5.create_dataset("start", data=np.array(genes_start)) + scores_h5.create_dataset("end", data=np.array(genes_end)) + scores_h5.create_dataset("strand", data=np.array(genes_strand, dtype="S")) + + # load model fold + seqnn_model = seqnn.SeqNN(params_model) + seqnn_model.restore( + model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + options.head_i + ) + seqnn_model.build_slice(targets_df.index, False) + + # optionally get (and store) scalar predictions before computing their gradients + if options.get_preds: + print(" - (prediction) - ", flush=True) + + for shift in options.shifts: + print("Processing shift %d" % shift, flush=True) + + for rev_comp in [False, True] if options.rc else [False]: + + if options.rc: + print( + "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), + flush=True, + ) + + seq_1hots = [] + gene_slices = [] + gene_targets = [] + + for gi, gene_id in enumerate(gene_list): + + if gi % 500 == 0: + print("Processing %d, %s" % (gi, gene_id), flush=True) + + gene = transcriptome.genes[gene_id] + + # make sequence + seq_1hot = make_seq_1hot( + genome_open, + genes_chr[gi], + genes_start[gi], + genes_end[gi], + seq_len, + ) + seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) + + # determine output sequence start + seq_out_start = genes_start[gi] + model_stride * model_crop + seq_out_len = model_stride * target_length + + # determine output positions + gene_slice = gene.output_slice( + seq_out_start, seq_out_len, model_stride, options.span + ) + + if rev_comp: + seq_1hot = dna_io.hot1_rc(seq_1hot) + gene_slice = target_length - gene_slice - 1 + + # slice relevant strand targets + if genes_strand[gi] == "+": + gene_strand_mask = ( + (targets_df.strand != "-") + if not rev_comp + else (targets_df.strand != "+") + ) + else: + gene_strand_mask = ( + (targets_df.strand != "+") + if not rev_comp + else (targets_df.strand != "-") + ) + + gene_target = np.array( + targets_df.index[gene_strand_mask].values + ) + + # accumulate data tensors + seq_1hots.append(seq_1hot[None, ...]) + gene_slices.append(gene_slice[None, ...]) + gene_targets.append(gene_target[None, ...]) + + if gi == len(gene_list) - 1 or len(seq_1hots) >= buffer_size: + + # concat sequences + seq_1hots = np.concatenate(seq_1hots, axis=0) + + # pad gene slices to same length (mark valid positions in mask tensor) + max_slice_len = int( + np.max( + [gene_slice.shape[1] for gene_slice in gene_slices] + ) + ) + + gene_masks = np.zeros( + (len(gene_slices), max_slice_len), dtype="float32" + ) + gene_slices_padded = np.zeros( + (len(gene_slices), max_slice_len), dtype="int32" + ) + for gii, gene_slice in enumerate(gene_slices): + for j in range(gene_slice.shape[1]): + gene_masks[gii, j] = 1.0 + gene_slices_padded[gii, j] = gene_slice[0, j] + + gene_slices = gene_slices_padded + + # concat gene-specific targets + gene_targets = np.concatenate(gene_targets, axis=0) + + # batch call count predictions + preds = predict_counts( + seqnn_model, + seq_1hots, + head_i=0, + target_slice=gene_targets, + pos_slice=gene_slices, + pos_mask=gene_masks, + chunk_size=buffer_size, + batch_size=1, + track_scale=options.track_scale, + track_transform=options.track_transform, + clip_soft=options.clip_soft, + untransform_old=options.untransform_old, + use_mean=False, + dtype="float32", + ) + + # save predictions + for gii, gene_slice in enumerate(gene_slices): + h5_gi = (gi // buffer_size) * buffer_size + gii + + # write to HDF5 + scores_h5["preds"][h5_gi, :] += preds[gii] / float( + len(options.shifts) + ) + + # clear sequence buffer + seq_1hots = [] + gene_slices = [] + gene_targets = [] + + # collect garbage + gc.collect() + + # optionally set pseudo count from predictions + pseudo_count = 0.0 + if options.pseudo_qtl is not None: + gene_preds = scores_h5["preds"][:] + + # filter on tissue + tissue_preds = None + + if tissue_genes is not None: + tissue_set = set(tissue_genes) + + # get subset of genes and predictions belonging to the pseudo count tissue + tissue_preds = [] + for gi, gene_id in enumerate(gene_list): + if gene_id.split(".")[0] in tissue_set: + tissue_preds.append(gene_preds[gi, 0]) + + tissue_preds = np.array(tissue_preds, dtype="float32") + else: + tissue_preds = np.array(gene_preds[:, 0], dtype="float32") + + print("tissue_preds.shape[0] = " + str(tissue_preds.shape[0])) + + print("np.min(tissue_preds) = " + str(np.min(tissue_preds))) + print("np.max(tissue_preds) = " + str(np.max(tissue_preds))) + + # set pseudo count based on quantile of predictions + pseudo_count = np.quantile(tissue_preds, q=options.pseudo_qtl) + + print("") + print("pseudo_count = " + str(round(pseudo_count, 6))) + + # compute gradients + print(" - (gradients) - ", flush=True) - else: - grads_ens = [] for shift in options.shifts: - seq_1hot_aug = dna_io.hot1_augment(seq_1hot, shift=shift) - grads_aug = seqnn_model.gradients(seq_1hot_aug, pos_slice=gene_slice) - grads_aug = unaugment_grads(grads_aug, fwdrc=True, shift=shift) - grads_ens.append(grads_aug) - - if options.rc: - seq_1hot_aug = dna_io.hot1_rc(seq_1hot_aug) - grads_aug = seqnn_model.gradients( - seq_1hot_aug, pos_slice=gene_slice_rc - ) - grads_aug = unaugment_grads(grads_aug, fwdrc=False, shift=shift) - grads_aug = grads_aug[..., targets_strand_pair] - grads_ens.append(grads_aug) - - # ensemble mean - grads = np.array(grads_ens).mean(axis=0) - - # slice relevant strand targets - if genes_strand[gi] == "+": - gene_strand_mask = targets_df.strand != "-" - else: - gene_strand_mask = targets_df.strand != "+" - grads = grads[..., gene_strand_mask] - - # write to HDF5 - scores_h5["seqs"][gi] = seq_1hot - scores_h5["grads"][gi] = grads - - gc.collect() + print("Processing shift %d" % shift, flush=True) + + for rev_comp in [False, True] if options.rc else [False]: + + if options.rc: + print( + "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), flush=True + ) + + seq_1hots = [] + gene_slices = [] + gene_targets = [] + + for gi, gene_id in enumerate(gene_list): + + if gi % 500 == 0: + print("Processing %d, %s" % (gi, gene_id), flush=True) + + gene = transcriptome.genes[gene_id] + + # make sequence + seq_1hot = make_seq_1hot( + genome_open, + genes_chr[gi], + genes_start[gi], + genes_end[gi], + seq_len, + ) + seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) + + # determine output sequence start + seq_out_start = genes_start[gi] + model_stride * model_crop + seq_out_len = model_stride * target_length + + # determine output positions + gene_slice = gene.output_slice( + seq_out_start, seq_out_len, model_stride, options.span + ) + + if rev_comp: + seq_1hot = dna_io.hot1_rc(seq_1hot) + gene_slice = target_length - gene_slice - 1 + + # slice relevant strand targets + if genes_strand[gi] == "+": + gene_strand_mask = ( + (targets_df.strand != "-") + if not rev_comp + else (targets_df.strand != "+") + ) + else: + gene_strand_mask = ( + (targets_df.strand != "+") + if not rev_comp + else (targets_df.strand != "-") + ) + + gene_target = np.array(targets_df.index[gene_strand_mask].values) + + # accumulate data tensors + seq_1hots.append(seq_1hot[None, ...]) + gene_slices.append(gene_slice[None, ...]) + gene_targets.append(gene_target[None, ...]) + + if gi == len(gene_list) - 1 or len(seq_1hots) >= buffer_size: + + # concat sequences + seq_1hots = np.concatenate(seq_1hots, axis=0) + + # pad gene slices to same length (mark valid positions in mask tensor) + max_slice_len = int( + np.max([gene_slice.shape[1] for gene_slice in gene_slices]) + ) + + gene_masks = np.zeros( + (len(gene_slices), max_slice_len), dtype="float32" + ) + gene_slices_padded = np.zeros( + (len(gene_slices), max_slice_len), dtype="int32" + ) + for gii, gene_slice in enumerate(gene_slices): + for j in range(gene_slice.shape[1]): + gene_masks[gii, j] = 1.0 + gene_slices_padded[gii, j] = gene_slice[0, j] + + gene_slices = gene_slices_padded + + # concat gene-specific targets + gene_targets = np.concatenate(gene_targets, axis=0) + + # batch call gradient computation + grads = seqnn_model.gradients( + seq_1hots, + head_i=0, + target_slice=gene_targets, + pos_slice=gene_slices, + pos_mask=gene_masks, + chunk_size=buffer_size, + batch_size=1, + track_scale=options.track_scale, + track_transform=options.track_transform, + clip_soft=options.clip_soft, + pseudo_count=pseudo_count, + untransform_old=options.untransform_old, + no_untransform=options.no_untransform, + use_mean=False, + use_ratio=False, + use_logodds=False, + subtract_avg=True, + input_gate=False, + dtype="float16", + ) + + # undo augmentations and save gradients + for gii, gene_slice in enumerate(gene_slices): + grad = unaugment_grads( + grads[gii, :, :, None], + fwdrc=(not rev_comp), + shift=shift, + ) + + h5_gi = (gi // buffer_size) * buffer_size + gii + + # write to HDF5 + scores_h5["grads"][h5_gi] += grad + + # clear sequence buffer + seq_1hots = [] + gene_slices = [] + gene_targets = [] + + # collect garbage + gc.collect() + + # save sequences and normalize gradients by total size of ensemble + for gi, gene_id in enumerate(gene_list): + + # re-make original sequence + seq_1hot = make_seq_1hot( + genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len + ) + + # write to HDF5 + scores_h5["seqs"][gi] = seq_1hot + scores_h5["grads"][gi] /= float( + (len(options.shifts) * (2 if options.rc else 1)) + ) + + # collect garbage + gc.collect() # close files genome_open.close() @@ -344,8 +686,219 @@ def make_seq_1hot(genome_open, chrm, start, end, seq_len): return seq_1hot +# tf code for predicting raw sum-of-expression counts on GPU +@tf.function +def _count_func( + model, + seq_1hot, + target_slice, + pos_slice, + pos_mask=None, + track_scale=1.0, + track_transform=1.0, + clip_soft=None, + untransform_old=False, + use_mean=False, +): + + # predict + preds = tf.gather( + model(seq_1hot, training=False), target_slice, axis=-1, batch_dims=1 + ) + + if untransform_old: + # undo scale + preds = preds / track_scale + + # undo clip_soft + if clip_soft is not None: + preds = tf.where( + preds > clip_soft, (preds - clip_soft) ** 2 + clip_soft, preds + ) + + # undo sqrt + preds = preds ** (1. / track_transform) + else: + # undo clip_soft + if clip_soft is not None: + preds = tf.where( + preds > clip_soft, (preds - clip_soft + 1) ** 2 + clip_soft - 1, preds + ) + + # undo sqrt + preds = -1 + (preds + 1) ** (1. / track_transform) + + # scale + preds = preds / track_scale + + # aggregate over tracks (average) + preds = tf.reduce_mean(preds, axis=-1) + + # slice specified positions + preds_slice = tf.gather(preds, pos_slice, axis=-1, batch_dims=1) + if pos_mask is not None: + preds_slice = preds_slice * pos_mask + + # aggregate over positions + if not use_mean: + preds_agg = tf.reduce_sum(preds_slice, axis=-1) + else: + if pos_mask is not None: + preds_agg = tf.reduce_sum(preds_slice, axis=-1) / tf.reduce_sum( + pos_mask, axis=-1 + ) + else: + preds_agg = tf.reduce_mean(preds_slice, axis=-1) + + return preds_agg + + +# code for getting model predictions from a tensor of input sequence patterns +def predict_counts( + seqnn_model, + seq_1hot, + head_i=None, + target_slice=None, + pos_slice=None, + pos_mask=None, + chunk_size=None, + batch_size=1, + track_scale=1.0, + track_transform=1.0, + clip_soft=None, + untransform_old=False, + use_mean=False, + dtype="float32", +): + + # start time + t0 = time.time() + + # choose model + if seqnn_model.ensemble is not None: + model = seqnn_model.ensemble + elif head_i is not None: + model = seqnn_model.models[head_i] + else: + model = seqnn_model.model + + # verify tensor shape(s) + seq_1hot = seq_1hot.astype("float32") + target_slice = np.array(target_slice).astype("int32") + pos_slice = np.array(pos_slice).astype("int32") + + # convert constants to tf tensors + track_scale = tf.constant(track_scale, dtype=tf.float32) + track_transform = tf.constant(track_transform, dtype=tf.float32) + if clip_soft is not None: + clip_soft = tf.constant(clip_soft, dtype=tf.float32) + + if pos_mask is not None: + pos_mask = np.array(pos_mask).astype("float32") + + if len(seq_1hot.shape) < 3: + seq_1hot = seq_1hot[None, ...] + + if len(target_slice.shape) < 2: + target_slice = target_slice[None, ...] + + if len(pos_slice.shape) < 2: + pos_slice = pos_slice[None, ...] + + if pos_mask is not None and len(pos_mask.shape) < 2: + pos_mask = pos_mask[None, ...] + + # chunk parameters + num_chunks = 1 + if chunk_size is None: + chunk_size = seq_1hot.shape[0] + else: + num_chunks = int(np.ceil(seq_1hot.shape[0] / chunk_size)) + + # loop over chunks + pred_chunks = [] + for ci in range(num_chunks): + + # collect chunk + seq_1hot_chunk = seq_1hot[ci * chunk_size : (ci + 1) * chunk_size, ...] + target_slice_chunk = target_slice[ci * chunk_size : (ci + 1) * chunk_size, ...] + pos_slice_chunk = pos_slice[ci * chunk_size : (ci + 1) * chunk_size, ...] + + pos_mask_chunk = None + if pos_mask is not None: + pos_mask_chunk = pos_mask[ci * chunk_size : (ci + 1) * chunk_size, ...] + + actual_chunk_size = seq_1hot_chunk.shape[0] + + # convert to tf tensors + seq_1hot_chunk = tf.convert_to_tensor(seq_1hot_chunk, dtype=tf.float32) + target_slice_chunk = tf.convert_to_tensor(target_slice_chunk, dtype=tf.int32) + pos_slice_chunk = tf.convert_to_tensor(pos_slice_chunk, dtype=tf.int32) + + if pos_mask is not None: + pos_mask_chunk = tf.convert_to_tensor(pos_mask_chunk, dtype=tf.float32) + + # batching parameters + num_batches = int(np.ceil(actual_chunk_size / batch_size)) + + # loop over batches + pred_batches = [] + for bi in range(num_batches): + + # collect batch + seq_1hot_batch = seq_1hot_chunk[ + bi * batch_size : (bi + 1) * batch_size, ... + ] + target_slice_batch = target_slice_chunk[ + bi * batch_size : (bi + 1) * batch_size, ... + ] + pos_slice_batch = pos_slice_chunk[ + bi * batch_size : (bi + 1) * batch_size, ... + ] + + pos_mask_batch = None + if pos_mask is not None: + pos_mask_batch = pos_mask_chunk[ + bi * batch_size : (bi + 1) * batch_size, ... + ] + + pred_batch = ( + _count_func( + model, + seq_1hot_batch, + target_slice_batch, + pos_slice_batch, + pos_mask_batch, + track_scale, + track_transform, + clip_soft, + untransform_old, + use_mean, + ) + .numpy() + .astype(dtype) + ) + + pred_batches.append(pred_batch) + + # concat predicted batches + preds = np.concatenate(pred_batches, axis=0) + + pred_chunks.append(preds) + + # collect garbage + gc.collect() + + # concat predicted chunks + preds = np.concatenate(pred_chunks, axis=0) + + print("Made predictions in %ds" % (time.time() - t0)) + + return preds + + ################################################################################ # __main__ -################################################################################ +# ############################################################################### if __name__ == "__main__": main() diff --git a/src/scripts/borzoi_satg_gene_gpu_crispr_ism_shuffle.py b/src/scripts/borzoi_satg_gene_crispr_ism_shuffle.py old mode 100644 new mode 100755 similarity index 98% rename from src/scripts/borzoi_satg_gene_gpu_crispr_ism_shuffle.py rename to src/scripts/borzoi_satg_gene_crispr_ism_shuffle.py index 6177ef0..b3fd477 --- a/src/scripts/borzoi_satg_gene_gpu_crispr_ism_shuffle.py +++ b/src/scripts/borzoi_satg_gene_crispr_ism_shuffle.py @@ -35,7 +35,7 @@ import pygene import tensorflow as tf -from baskerville import dna_io +from baskerville import dna as dna_io from baskerville import gene as bgene from baskerville import seqnn from baskerville.dataset import targets_prep_strand @@ -43,7 +43,7 @@ from scipy.ndimage import gaussian_filter1d ''' -borzoi_satg_gene_gpu_crispr_ism_shuffle.py +borzoi_satg_gene_crispr_ism_shuffle.py Perform a windowed shuffle analysis for genes specified in a GTF file, targeting regions specified in a separate csv. ''' @@ -58,7 +58,7 @@ def main(): parser.add_option( "--fa", dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) parser.add_option( @@ -84,9 +84,9 @@ def main(): parser.add_option( '-c', dest='crosses', - default=1, - type='int', - help='Number of cross-fold rounds [Default:%default]', + default="0", + type="str", + help='Model crosses (replicates) to use in ensemble (comma-separated list) [Default:%default]', ) parser.add_option( "--head", @@ -218,9 +218,10 @@ def main(): parser.error('Must provide parameter file, model folder and BED file') if not os.path.isdir(options.out_dir): - os.mkdir(options.out_dir) + os.makedirs(options.out_dir, exist_ok=True) options.folds = [int(fold) for fold in options.folds.split(',')] + options.crosses = [int(cross) for cross in options.crosses.split(",")] options.shifts = [int(shift) for shift in options.shifts.split(',')] ################################################################# diff --git a/src/scripts/borzoi_satg_gene_gpu_focused_ism.py b/src/scripts/borzoi_satg_gene_focused_ism.py similarity index 98% rename from src/scripts/borzoi_satg_gene_gpu_focused_ism.py rename to src/scripts/borzoi_satg_gene_focused_ism.py index 0f6cd08..f095be8 100755 --- a/src/scripts/borzoi_satg_gene_gpu_focused_ism.py +++ b/src/scripts/borzoi_satg_gene_focused_ism.py @@ -27,12 +27,12 @@ import tensorflow as tf from baskerville.dataset import targets_prep_strand -from baskerville import dna_io +from baskerville import dna as dna_io from baskerville import gene as bgene from baskerville import seqnn """ -borzoi_satg_gene_gpu_focused_ism.py +borzoi_satg_gene_focused_ism.py Perform an ISM analysis for genes specified in a GTF file, targeting high-saliency regions based on gradient scores. """ @@ -46,7 +46,7 @@ def main(): parser.add_option( "--fa", dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) parser.add_option( @@ -72,9 +72,9 @@ def main(): parser.add_option( '-c', dest='crosses', - default=1, - type='int', - help='Number of cross-fold rounds [Default:%default]', + default="0", + type="str", + help='Model crosses (replicates) to use in ensemble (comma-separated list) [Default:%default]', ) parser.add_option( "--head", @@ -228,9 +228,10 @@ def main(): parser.error("Must provide parameter file, model folder and GTF file") if not os.path.isdir(options.out_dir): - os.mkdir(options.out_dir) + os.makedirs(options.out_dir, exist_ok=True) options.folds = [int(fold) for fold in options.folds.split(",")] + options.crosses = [int(cross) for cross in options.crosses.split(",")] options.shifts = [int(shift) for shift in options.shifts.split(",")] options.tissue_files = [tissue for tissue in options.tissue_files.split(",")] diff --git a/src/scripts/borzoi_satg_gene_gpu.py b/src/scripts/borzoi_satg_gene_gpu.py deleted file mode 100755 index fd22b45..0000000 --- a/src/scripts/borzoi_satg_gene_gpu.py +++ /dev/null @@ -1,903 +0,0 @@ -#!/usr/bin/env python -# Copyright 2017 Calico LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================= -from __future__ import print_function - -from optparse import OptionParser - -import gc -import json -import os -import time - -import h5py -import numpy as np -import pandas as pd -import pysam -import tensorflow as tf - -from baskerville.dataset import targets_prep_strand -from baskerville import dna_io -from baskerville import gene as bgene -from baskerville import seqnn - -""" -borzoi_satg_gene_gpu.py - -Perform a gradient saliency analysis for genes specified in a GTF file (GPU-friendly). -""" - -################################################################################ -# main -# ############################################################################### -def main(): - usage = "usage: %prog [options] " - parser = OptionParser(usage) - parser.add_option( - "--fa", - dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], - help="Genome FASTA for sequences [Default: %default]", - ) - parser.add_option( - "-o", - dest="out_dir", - default="satg_out", - help="Output directory [Default: %default]", - ) - parser.add_option( - "--rc", - dest="rc", - default=False, - action="store_true", - help="Ensemble forward and reverse complement predictions [Default: %default]", - ) - parser.add_option( - "-f", - dest="folds", - default="0", - type="str", - help="Model folds to use in ensemble (comma-separated list) [Default: %default]", - ) - parser.add_option( - '-c', - dest='crosses', - default=1, - type='int', - help='Number of cross-fold rounds [Default:%default]', - ) - parser.add_option( - "--head", - dest="head_i", - default=0, - type="int", - help="Model head index [Default: %default]", - ) - parser.add_option( - "--shifts", - dest="shifts", - default="0", - type="str", - help="Ensemble prediction shifts [Default: %default]", - ) - parser.add_option( - "--span", - dest="span", - default=False, - action="store_true", - help="Aggregate entire gene span [Default: %default]", - ) - parser.add_option( - "--clip_soft", - dest="clip_soft", - default=None, - type="float", - help="Model clip_soft setting [Default: %default]", - ) - parser.add_option( - "--track_scale", - dest="track_scale", - default=0.02, - type="float", - help="Target transform scale [Default: %default]", - ) - parser.add_option( - "--track_transform", - dest="track_transform", - default=0.75, - type="float", - help="Target transform exponent [Default: %default]", - ) - parser.add_option( - "--untransform_old", - dest="untransform_old", - default=False, - action="store_true", - help="Run gradients with old version of inverse transforms [Default: %default]", - ) - parser.add_option( - "--no_untransform", - dest="no_untransform", - default=False, - action="store_true", - help="Run gradients with no inverse transforms [Default: %default]", - ) - parser.add_option( - "--get_preds", - dest="get_preds", - default=False, - action="store_true", - help="Store scalar predictions in addition to their gradients [Default: %default]", - ) - parser.add_option( - "--pseudo_qtl", - dest="pseudo_qtl", - default=None, - type="float", - help="Quantile of predicted scalars to choose as pseudo count [Default: %default]", - ) - parser.add_option( - "--pseudo_tissue", - dest="pseudo_tissue", - default=None, - type="str", - help="Tissue to filter genes on when calculating pseudo count [Default: %default]", - ) - parser.add_option( - "--gene_file", - dest="gene_file", - default=None, - type="str", - help="Csv-file of gene metadata [Default: %default]", - ) - parser.add_option( - "-t", - dest="targets_file", - default=None, - type="str", - help="File specifying target indexes and labels in table format", - ) - (options, args) = parser.parse_args() - - if len(args) == 3: - # single worker - params_file = args[0] - model_folder = args[1] - genes_gtf_file = args[2] - else: - parser.error("Must provide parameter file, model folder and GTF file") - - if not os.path.isdir(options.out_dir): - os.mkdir(options.out_dir) - - options.folds = [int(fold) for fold in options.folds.split(",")] - options.shifts = [int(shift) for shift in options.shifts.split(",")] - - ################################################################# - # read parameters and targets - - # read model parameters - with open(params_file) as params_open: - params = json.load(params_open) - params_model = params["model"] - params_train = params["train"] - seq_len = params_model["seq_length"] - - if options.targets_file is None: - parser.error("Must provide targets table to properly handle strands.") - else: - targets_df = pd.read_csv(options.targets_file, sep="\t", index_col=0) - - # prep strand - orig_new_index = dict(zip(targets_df.index, np.arange(targets_df.shape[0]))) - targets_strand_pair = np.array( - [orig_new_index[ti] for ti in targets_df.strand_pair] - ) - targets_strand_df = targets_prep_strand(targets_df) - num_targets = 1 - - # Load gene dataframe and select tissue - tissue_genes = None - if options.gene_file is not None and options.pseudo_tissue is not None: - gene_df = pd.read_csv(options.gene_file, sep="\t") - gene_df = ( - gene_df.query("tissue == '" + str(options.pseudo_tissue) + "'") - .copy() - .reset_index(drop=True) - ) - gene_df = gene_df.drop(columns=["Unnamed: 0"]) - - # Get list of gene for tissue - tissue_genes = gene_df["gene_base"].values.tolist() - - print("len(tissue_genes) = " + str(len(tissue_genes))) - - ################################################################# - # load first model fold to get parameters - - seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore( - model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", - options.head_i - ) - seqnn_model.build_slice(targets_df.index, False) - # seqnn_model.build_ensemble(options.rc, options.shifts) - - model_stride = seqnn_model.model_strides[0] - model_crop = seqnn_model.target_crops[0] - target_length = seqnn_model.target_lengths[0] - - ################################################################# - # read genes - - # parse GTF - transcriptome = bgene.Transcriptome(genes_gtf_file) - - # order valid genes - genome_open = pysam.Fastafile(options.genome_fasta) - gene_list = sorted(transcriptome.genes.keys()) - num_genes = len(gene_list) - - ################################################################# - # setup output - - min_start = -model_stride * model_crop - - # choose gene sequences - genes_chr = [] - genes_start = [] - genes_end = [] - genes_strand = [] - for gene_id in gene_list: - gene = transcriptome.genes[gene_id] - genes_chr.append(gene.chrom) - genes_strand.append(gene.strand) - - gene_midpoint = gene.midpoint() - gene_start = max(min_start, gene_midpoint - seq_len // 2) - gene_end = gene_start + seq_len - genes_start.append(gene_start) - genes_end.append(gene_end) - - ################################################################# - # predict scores, write output - - buffer_size = 1024 - - print("clip_soft = " + str(options.clip_soft)) - - print("n genes = " + str(len(genes_chr))) - - # loop over folds - for fold_ix in options.folds: - for cross_ix in options.crosses: - - print("-- fold = f" + str(fold_ix) + "c" + str(cross_ix) + " --") - - # (re-)initialize HDF5 - scores_h5_file = "%s/scores_f%dc%d.h5" % (options.out_dir, fold_ix, cross_ix) - if os.path.isfile(scores_h5_file): - os.remove(scores_h5_file) - scores_h5 = h5py.File(scores_h5_file, "w") - scores_h5.create_dataset("seqs", dtype="bool", shape=(num_genes, seq_len, 4)) - scores_h5.create_dataset( - "grads", dtype="float16", shape=(num_genes, seq_len, 4, num_targets) - ) - if options.get_preds: - scores_h5.create_dataset( - "preds", dtype="float32", shape=(num_genes, num_targets) - ) - scores_h5.create_dataset("gene", data=np.array(gene_list, dtype="S")) - scores_h5.create_dataset("chr", data=np.array(genes_chr, dtype="S")) - scores_h5.create_dataset("start", data=np.array(genes_start)) - scores_h5.create_dataset("end", data=np.array(genes_end)) - scores_h5.create_dataset("strand", data=np.array(genes_strand, dtype="S")) - - # load model fold - seqnn_model = seqnn.SeqNN(params_model) - seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", - options.head_i - ) - seqnn_model.build_slice(targets_df.index, False) - - # optionally get (and store) scalar predictions before computing their gradients - if options.get_preds: - print(" - (prediction) - ", flush=True) - - for shift in options.shifts: - print("Processing shift %d" % shift, flush=True) - - for rev_comp in [False, True] if options.rc else [False]: - - if options.rc: - print( - "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), - flush=True, - ) - - seq_1hots = [] - gene_slices = [] - gene_targets = [] - - for gi, gene_id in enumerate(gene_list): - - if gi % 500 == 0: - print("Processing %d, %s" % (gi, gene_id), flush=True) - - gene = transcriptome.genes[gene_id] - - # make sequence - seq_1hot = make_seq_1hot( - genome_open, - genes_chr[gi], - genes_start[gi], - genes_end[gi], - seq_len, - ) - seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) - - # determine output sequence start - seq_out_start = genes_start[gi] + model_stride * model_crop - seq_out_len = model_stride * target_length - - # determine output positions - gene_slice = gene.output_slice( - seq_out_start, seq_out_len, model_stride, options.span - ) - - if rev_comp: - seq_1hot = dna_io.hot1_rc(seq_1hot) - gene_slice = target_length - gene_slice - 1 - - # slice relevant strand targets - if genes_strand[gi] == "+": - gene_strand_mask = ( - (targets_df.strand != "-") - if not rev_comp - else (targets_df.strand != "+") - ) - else: - gene_strand_mask = ( - (targets_df.strand != "+") - if not rev_comp - else (targets_df.strand != "-") - ) - - gene_target = np.array( - targets_df.index[gene_strand_mask].values - ) - - # accumulate data tensors - seq_1hots.append(seq_1hot[None, ...]) - gene_slices.append(gene_slice[None, ...]) - gene_targets.append(gene_target[None, ...]) - - if gi == len(gene_list) - 1 or len(seq_1hots) >= buffer_size: - - # concat sequences - seq_1hots = np.concatenate(seq_1hots, axis=0) - - # pad gene slices to same length (mark valid positions in mask tensor) - max_slice_len = int( - np.max( - [gene_slice.shape[1] for gene_slice in gene_slices] - ) - ) - - gene_masks = np.zeros( - (len(gene_slices), max_slice_len), dtype="float32" - ) - gene_slices_padded = np.zeros( - (len(gene_slices), max_slice_len), dtype="int32" - ) - for gii, gene_slice in enumerate(gene_slices): - for j in range(gene_slice.shape[1]): - gene_masks[gii, j] = 1.0 - gene_slices_padded[gii, j] = gene_slice[0, j] - - gene_slices = gene_slices_padded - - # concat gene-specific targets - gene_targets = np.concatenate(gene_targets, axis=0) - - # batch call count predictions - preds = predict_counts( - seqnn_model, - seq_1hots, - head_i=0, - target_slice=gene_targets, - pos_slice=gene_slices, - pos_mask=gene_masks, - chunk_size=buffer_size, - batch_size=1, - track_scale=options.track_scale, - track_transform=options.track_transform, - clip_soft=options.clip_soft, - untransform_old=options.untransform_old, - use_mean=False, - dtype="float32", - ) - - # save predictions - for gii, gene_slice in enumerate(gene_slices): - h5_gi = (gi // buffer_size) * buffer_size + gii - - # write to HDF5 - scores_h5["preds"][h5_gi, :] += preds[gii] / float( - len(options.shifts) - ) - - # clear sequence buffer - seq_1hots = [] - gene_slices = [] - gene_targets = [] - - # collect garbage - gc.collect() - - # optionally set pseudo count from predictions - pseudo_count = 0.0 - if options.pseudo_qtl is not None: - gene_preds = scores_h5["preds"][:] - - # filter on tissue - tissue_preds = None - - if tissue_genes is not None: - tissue_set = set(tissue_genes) - - # get subset of genes and predictions belonging to the pseudo count tissue - tissue_preds = [] - for gi, gene_id in enumerate(gene_list): - if gene_id.split(".")[0] in tissue_set: - tissue_preds.append(gene_preds[gi, 0]) - - tissue_preds = np.array(tissue_preds, dtype="float32") - else: - tissue_preds = np.array(gene_preds[:, 0], dtype="float32") - - print("tissue_preds.shape[0] = " + str(tissue_preds.shape[0])) - - print("np.min(tissue_preds) = " + str(np.min(tissue_preds))) - print("np.max(tissue_preds) = " + str(np.max(tissue_preds))) - - # set pseudo count based on quantile of predictions - pseudo_count = np.quantile(tissue_preds, q=options.pseudo_qtl) - - print("") - print("pseudo_count = " + str(round(pseudo_count, 6))) - - # compute gradients - print(" - (gradients) - ", flush=True) - - for shift in options.shifts: - print("Processing shift %d" % shift, flush=True) - - for rev_comp in [False, True] if options.rc else [False]: - - if options.rc: - print( - "Fwd/rev = %s" % ("fwd" if not rev_comp else "rev"), flush=True - ) - - seq_1hots = [] - gene_slices = [] - gene_targets = [] - - for gi, gene_id in enumerate(gene_list): - - if gi % 500 == 0: - print("Processing %d, %s" % (gi, gene_id), flush=True) - - gene = transcriptome.genes[gene_id] - - # make sequence - seq_1hot = make_seq_1hot( - genome_open, - genes_chr[gi], - genes_start[gi], - genes_end[gi], - seq_len, - ) - seq_1hot = dna_io.hot1_augment(seq_1hot, shift=shift) - - # determine output sequence start - seq_out_start = genes_start[gi] + model_stride * model_crop - seq_out_len = model_stride * target_length - - # determine output positions - gene_slice = gene.output_slice( - seq_out_start, seq_out_len, model_stride, options.span - ) - - if rev_comp: - seq_1hot = dna_io.hot1_rc(seq_1hot) - gene_slice = target_length - gene_slice - 1 - - # slice relevant strand targets - if genes_strand[gi] == "+": - gene_strand_mask = ( - (targets_df.strand != "-") - if not rev_comp - else (targets_df.strand != "+") - ) - else: - gene_strand_mask = ( - (targets_df.strand != "+") - if not rev_comp - else (targets_df.strand != "-") - ) - - gene_target = np.array(targets_df.index[gene_strand_mask].values) - - # accumulate data tensors - seq_1hots.append(seq_1hot[None, ...]) - gene_slices.append(gene_slice[None, ...]) - gene_targets.append(gene_target[None, ...]) - - if gi == len(gene_list) - 1 or len(seq_1hots) >= buffer_size: - - # concat sequences - seq_1hots = np.concatenate(seq_1hots, axis=0) - - # pad gene slices to same length (mark valid positions in mask tensor) - max_slice_len = int( - np.max([gene_slice.shape[1] for gene_slice in gene_slices]) - ) - - gene_masks = np.zeros( - (len(gene_slices), max_slice_len), dtype="float32" - ) - gene_slices_padded = np.zeros( - (len(gene_slices), max_slice_len), dtype="int32" - ) - for gii, gene_slice in enumerate(gene_slices): - for j in range(gene_slice.shape[1]): - gene_masks[gii, j] = 1.0 - gene_slices_padded[gii, j] = gene_slice[0, j] - - gene_slices = gene_slices_padded - - # concat gene-specific targets - gene_targets = np.concatenate(gene_targets, axis=0) - - # batch call gradient computation - grads = seqnn_model.gradients( - seq_1hots, - head_i=0, - target_slice=gene_targets, - pos_slice=gene_slices, - pos_mask=gene_masks, - chunk_size=buffer_size, - batch_size=1, - track_scale=options.track_scale, - track_transform=options.track_transform, - clip_soft=options.clip_soft, - pseudo_count=pseudo_count, - untransform_old=options.untransform_old, - no_untransform=options.no_untransform, - use_mean=False, - use_ratio=False, - use_logodds=False, - subtract_avg=True, - input_gate=False, - dtype="float16", - ) - - # undo augmentations and save gradients - for gii, gene_slice in enumerate(gene_slices): - grad = unaugment_grads( - grads[gii, :, :, None], - fwdrc=(not rev_comp), - shift=shift, - ) - - h5_gi = (gi // buffer_size) * buffer_size + gii - - # write to HDF5 - scores_h5["grads"][h5_gi] += grad - - # clear sequence buffer - seq_1hots = [] - gene_slices = [] - gene_targets = [] - - # collect garbage - gc.collect() - - # save sequences and normalize gradients by total size of ensemble - for gi, gene_id in enumerate(gene_list): - - # re-make original sequence - seq_1hot = make_seq_1hot( - genome_open, genes_chr[gi], genes_start[gi], genes_end[gi], seq_len - ) - - # write to HDF5 - scores_h5["seqs"][gi] = seq_1hot - scores_h5["grads"][gi] /= float( - (len(options.shifts) * (2 if options.rc else 1)) - ) - - # collect garbage - gc.collect() - - # close files - genome_open.close() - scores_h5.close() - - -def unaugment_grads(grads, fwdrc=False, shift=0): - """Undo sequence augmentation.""" - # reverse complement - if not fwdrc: - # reverse - grads = grads[::-1, :, :] - - # swap A and T - grads[:, [0, 3], :] = grads[:, [3, 0], :] - - # swap C and G - grads[:, [1, 2], :] = grads[:, [2, 1], :] - - # undo shift - if shift < 0: - # shift sequence right - grads[-shift:, :, :] = grads[:shift, :, :] - - # fill in left unknowns - grads[:-shift, :, :] = 0 - - elif shift > 0: - # shift sequence left - grads[:-shift, :, :] = grads[shift:, :, :] - - # fill in right unknowns - grads[-shift:, :, :] = 0 - - return grads - - -def make_seq_1hot(genome_open, chrm, start, end, seq_len): - if start < 0: - seq_dna = "N" * (-start) + genome_open.fetch(chrm, 0, end) - else: - seq_dna = genome_open.fetch(chrm, start, end) - - # extend to full length - if len(seq_dna) < seq_len: - seq_dna += "N" * (seq_len - len(seq_dna)) - - seq_1hot = dna_io.dna_1hot(seq_dna) - return seq_1hot - - -# tf code for predicting raw sum-of-expression counts on GPU -@tf.function -def _count_func( - model, - seq_1hot, - target_slice, - pos_slice, - pos_mask=None, - track_scale=1.0, - track_transform=1.0, - clip_soft=None, - untransform_old=False, - use_mean=False, -): - - # predict - preds = tf.gather( - model(seq_1hot, training=False), target_slice, axis=-1, batch_dims=1 - ) - - if untransform_old: - # undo scale - preds = preds / track_scale - - # undo clip_soft - if clip_soft is not None: - preds = tf.where( - preds > clip_soft, (preds - clip_soft) ** 2 + clip_soft, preds - ) - - # undo sqrt - preds = preds ** (1. / track_transform) - else: - # undo clip_soft - if clip_soft is not None: - preds = tf.where( - preds > clip_soft, (preds - clip_soft + 1) ** 2 + clip_soft - 1, preds - ) - - # undo sqrt - preds = -1 + (preds + 1) ** (1. / track_transform) - - # scale - preds = preds / track_scale - - # aggregate over tracks (average) - preds = tf.reduce_mean(preds, axis=-1) - - # slice specified positions - preds_slice = tf.gather(preds, pos_slice, axis=-1, batch_dims=1) - if pos_mask is not None: - preds_slice = preds_slice * pos_mask - - # aggregate over positions - if not use_mean: - preds_agg = tf.reduce_sum(preds_slice, axis=-1) - else: - if pos_mask is not None: - preds_agg = tf.reduce_sum(preds_slice, axis=-1) / tf.reduce_sum( - pos_mask, axis=-1 - ) - else: - preds_agg = tf.reduce_mean(preds_slice, axis=-1) - - return preds_agg - - -# code for getting model predictions from a tensor of input sequence patterns -def predict_counts( - seqnn_model, - seq_1hot, - head_i=None, - target_slice=None, - pos_slice=None, - pos_mask=None, - chunk_size=None, - batch_size=1, - track_scale=1.0, - track_transform=1.0, - clip_soft=None, - untransform_old=False, - use_mean=False, - dtype="float32", -): - - # start time - t0 = time.time() - - # choose model - if seqnn_model.ensemble is not None: - model = seqnn_model.ensemble - elif head_i is not None: - model = seqnn_model.models[head_i] - else: - model = seqnn_model.model - - # verify tensor shape(s) - seq_1hot = seq_1hot.astype("float32") - target_slice = np.array(target_slice).astype("int32") - pos_slice = np.array(pos_slice).astype("int32") - - # convert constants to tf tensors - track_scale = tf.constant(track_scale, dtype=tf.float32) - track_transform = tf.constant(track_transform, dtype=tf.float32) - if clip_soft is not None: - clip_soft = tf.constant(clip_soft, dtype=tf.float32) - - if pos_mask is not None: - pos_mask = np.array(pos_mask).astype("float32") - - if len(seq_1hot.shape) < 3: - seq_1hot = seq_1hot[None, ...] - - if len(target_slice.shape) < 2: - target_slice = target_slice[None, ...] - - if len(pos_slice.shape) < 2: - pos_slice = pos_slice[None, ...] - - if pos_mask is not None and len(pos_mask.shape) < 2: - pos_mask = pos_mask[None, ...] - - # chunk parameters - num_chunks = 1 - if chunk_size is None: - chunk_size = seq_1hot.shape[0] - else: - num_chunks = int(np.ceil(seq_1hot.shape[0] / chunk_size)) - - # loop over chunks - pred_chunks = [] - for ci in range(num_chunks): - - # collect chunk - seq_1hot_chunk = seq_1hot[ci * chunk_size : (ci + 1) * chunk_size, ...] - target_slice_chunk = target_slice[ci * chunk_size : (ci + 1) * chunk_size, ...] - pos_slice_chunk = pos_slice[ci * chunk_size : (ci + 1) * chunk_size, ...] - - pos_mask_chunk = None - if pos_mask is not None: - pos_mask_chunk = pos_mask[ci * chunk_size : (ci + 1) * chunk_size, ...] - - actual_chunk_size = seq_1hot_chunk.shape[0] - - # convert to tf tensors - seq_1hot_chunk = tf.convert_to_tensor(seq_1hot_chunk, dtype=tf.float32) - target_slice_chunk = tf.convert_to_tensor(target_slice_chunk, dtype=tf.int32) - pos_slice_chunk = tf.convert_to_tensor(pos_slice_chunk, dtype=tf.int32) - - if pos_mask is not None: - pos_mask_chunk = tf.convert_to_tensor(pos_mask_chunk, dtype=tf.float32) - - # batching parameters - num_batches = int(np.ceil(actual_chunk_size / batch_size)) - - # loop over batches - pred_batches = [] - for bi in range(num_batches): - - # collect batch - seq_1hot_batch = seq_1hot_chunk[ - bi * batch_size : (bi + 1) * batch_size, ... - ] - target_slice_batch = target_slice_chunk[ - bi * batch_size : (bi + 1) * batch_size, ... - ] - pos_slice_batch = pos_slice_chunk[ - bi * batch_size : (bi + 1) * batch_size, ... - ] - - pos_mask_batch = None - if pos_mask is not None: - pos_mask_batch = pos_mask_chunk[ - bi * batch_size : (bi + 1) * batch_size, ... - ] - - pred_batch = ( - _count_func( - model, - seq_1hot_batch, - target_slice_batch, - pos_slice_batch, - pos_mask_batch, - track_scale, - track_transform, - clip_soft, - untransform_old, - use_mean, - ) - .numpy() - .astype(dtype) - ) - - pred_batches.append(pred_batch) - - # concat predicted batches - preds = np.concatenate(pred_batches, axis=0) - - pred_chunks.append(preds) - - # collect garbage - gc.collect() - - # concat predicted chunks - preds = np.concatenate(pred_chunks, axis=0) - - print("Made predictions in %ds" % (time.time() - t0)) - - return preds - - -################################################################################ -# __main__ -# ############################################################################### -if __name__ == "__main__": - main() diff --git a/src/scripts/borzoi_satg_polya_gpu.py b/src/scripts/borzoi_satg_polya.py similarity index 98% rename from src/scripts/borzoi_satg_polya_gpu.py rename to src/scripts/borzoi_satg_polya.py index 2b4b8e0..9f26eba 100755 --- a/src/scripts/borzoi_satg_polya_gpu.py +++ b/src/scripts/borzoi_satg_polya.py @@ -25,12 +25,12 @@ import pysam from baskerville.dataset import targets_prep_strand -from baskerville import dna_io +from baskerville import dna as dna_io from baskerville import gene as bgene from baskerville import seqnn """ -borzoi_satg_polya_gpu.py +borzoi_satg_polya.py Perform a gradient saliency analysis for genes specified in a GTF file (polyadenylation-centric). """ @@ -44,7 +44,7 @@ def main(): parser.add_option( "--fa", dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) parser.add_option( @@ -70,9 +70,9 @@ def main(): parser.add_option( '-c', dest='crosses', - default=1, - type='int', - help='Number of cross-fold rounds [Default:%default]', + default="0", + type="str", + help='Model crosses (replicates) to use in ensemble (comma-separated list) [Default:%default]', ) parser.add_option( "--head", @@ -133,7 +133,7 @@ def main(): parser.add_option( "-a", dest="apa_file", - default="%s/genes/polyadb/polyadb_human_v3.csv.gz" % os.environ["HG38"], + default="%s/genes/polyadb/polyadb_human_v3.csv.gz" % os.environ.get('BORZOI_HG38', 'hg38'), help="Polyadenylation site annotation [Default: %default]", ) (options, args) = parser.parse_args() @@ -147,9 +147,10 @@ def main(): parser.error("Must provide parameter file, model folder and GTF file") if not os.path.isdir(options.out_dir): - os.mkdir(options.out_dir) + os.makedirs(options.out_dir, exist_ok=True) options.folds = [int(fold) for fold in options.folds.split(",")] + options.crosses = [int(cross) for cross in options.crosses.split(",")] options.shifts = [int(shift) for shift in options.shifts.split(",")] ################################################################# diff --git a/src/scripts/borzoi_satg_splice_gpu.py b/src/scripts/borzoi_satg_splice.py similarity index 98% rename from src/scripts/borzoi_satg_splice_gpu.py rename to src/scripts/borzoi_satg_splice.py index 8d01451..473192f 100755 --- a/src/scripts/borzoi_satg_splice_gpu.py +++ b/src/scripts/borzoi_satg_splice.py @@ -25,12 +25,12 @@ import pysam from baskerville.dataset import targets_prep_strand -from baskerville import dna_io +from baskerville import dna as dna_io from baskerville import gene as bgene from baskerville import seqnn """ -borzoi_satg_splice_gpu.py +borzoi_satg_splice.py Perform a gradient saliency analysis for genes specified in a GTF file (splice-centric). """ @@ -44,7 +44,7 @@ def main(): parser.add_option( "--fa", dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) parser.add_option( @@ -70,9 +70,9 @@ def main(): parser.add_option( '-c', dest='crosses', - default=1, - type='int', - help='Number of cross-fold rounds [Default:%default]', + default="0", + type="str", + help='Model crosses (replicates) to use in ensemble (comma-separated list) [Default:%default]', ) parser.add_option( "--head", @@ -134,7 +134,7 @@ def main(): "-s", dest="splice_gff", default="%s/genes/gencode41/gencode41_basic_protein_splice.gff" - % os.environ["HG38"], + % os.environ.get('BORZOI_HG38', 'hg38'), help="Splice site annotation [Default: %default]", ) (options, args) = parser.parse_args() @@ -148,9 +148,10 @@ def main(): parser.error("Must provide parameter file, model folder and GTF file") if not os.path.isdir(options.out_dir): - os.mkdir(options.out_dir) + os.makedirs(options.out_dir, exist_ok=True) options.folds = [int(fold) for fold in options.folds.split(",")] + options.crosses = [int(cross) for cross in options.crosses.split(",")] options.shifts = [int(shift) for shift in options.shifts.split(",")] ################################################################# diff --git a/src/scripts/borzoi_sed.py b/src/scripts/borzoi_sed.py index dc0a736..353382b 100755 --- a/src/scripts/borzoi_sed.py +++ b/src/scripts/borzoi_sed.py @@ -55,13 +55,13 @@ def main(): parser.add_option( "-f", dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) parser.add_option( "-g", dest="genes_gtf", - default="%s/genes/gencode41/gencode41_basic_nort.gtf" % os.environ["HG38"], + default="%s/genes/gencode41/gencode41_basic_nort.gtf" % os.environ.get('BORZOI_HG38', 'hg38'), help="GTF for gene definition [Default %default]", ) parser.add_option( diff --git a/src/scripts/borzoi_sed_folds.py b/src/scripts/borzoi_sed_folds.py old mode 100644 new mode 100755 index e7f92a9..02c5ccd --- a/src/scripts/borzoi_sed_folds.py +++ b/src/scripts/borzoi_sed_folds.py @@ -44,13 +44,13 @@ def main(): sed_options.add_option( '-f', dest='genome_fasta', - default='%s/assembly/ucsc/hg38.fa' % os.environ['HG38'], + default='%s/assembly/ucsc/hg38.fa' % os.environ.get('BORZOI_HG38', 'hg38'), help='Genome FASTA for sequences [Default: %default]', ) sed_options.add_option( '-g', dest='genes_gtf', - default='%s/genes/gencode41/gencode41_basic_nort.gtf' % os.environ['HG38'], + default='%s/genes/gencode41/gencode41_basic_nort.gtf' % os.environ.get('BORZOI_HG38', 'hg38'), help='GTF for gene definition [Default %default]', ) sed_options.add_option( @@ -208,8 +208,8 @@ def main(): # SNP scores # command base - cmd_base = '. /home/drk/anaconda3/etc/profile.d/conda.sh;' - cmd_base += ' conda activate %s;' % options.conda_env + cmd_base = ('. %s; ' % os.environ['BORZOI_CONDA']) if 'BORZOI_CONDA' in os.environ else '' + cmd_base += 'conda activate %s;' % options.conda_env cmd_base += ' echo $HOSTNAME;' jobs = [] diff --git a/src/scripts/borzoi_sed_ipaqtl_cov.py b/src/scripts/borzoi_sed_ipaqtl_cov.py index 9e08f94..74000bc 100755 --- a/src/scripts/borzoi_sed_ipaqtl_cov.py +++ b/src/scripts/borzoi_sed_ipaqtl_cov.py @@ -48,19 +48,20 @@ def main(): parser.add_option( "-f", dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) parser.add_option( "-g", dest="genes_gtf", - default="%s/genes/gencode41/gencode41_basic_nort.gtf" % os.environ["HG38"], + default="%s/genes/gencode41/gencode41_basic_nort.gtf" % os.environ.get('BORZOI_HG38', 'hg38'), help="GTF for gene definition [Default %default]", ) parser.add_option( "--apafile", dest="apa_file", - default="polyadb_human_v3.csv.gz" + default="%s/genes/polyadb/polyadb_human_v3.csv.gz" % os.environ.get('BORZOI_HG38', 'hg38'), + help="Csv for polya site definition [Default %default]", ) parser.add_option( "-o", diff --git a/src/scripts/borzoi_sed_paqtl_cov.py b/src/scripts/borzoi_sed_paqtl_cov.py index 84b84cc..7027a92 100755 --- a/src/scripts/borzoi_sed_paqtl_cov.py +++ b/src/scripts/borzoi_sed_paqtl_cov.py @@ -48,19 +48,20 @@ def main(): parser.add_option( "-f", dest="genome_fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) parser.add_option( "-g", dest="genes_gtf", - default="%s/genes/gencode41/gencode41_basic_nort.gtf" % os.environ["HG38"], + default="%s/genes/gencode41/gencode41_basic_nort.gtf" % os.environ.get('BORZOI_HG38', 'hg38'), help="GTF for gene definition [Default %default]", ) parser.add_option( "--apafile", dest="apa_file", - default="polyadb_human_v3.csv.gz" + default="%s/genes/polyadb/polyadb_human_v3.csv.gz" % os.environ.get('BORZOI_HG38', 'hg38'), + help="Csv for polya site definition [Default %default]", ) parser.add_option( "-o", diff --git a/src/scripts/borzoi_test_apa_polaydb.py b/src/scripts/borzoi_test_apa.py similarity index 97% rename from src/scripts/borzoi_test_apa_polaydb.py rename to src/scripts/borzoi_test_apa.py index eecf01c..21e4179 100755 --- a/src/scripts/borzoi_test_apa_polaydb.py +++ b/src/scripts/borzoi_test_apa.py @@ -28,7 +28,7 @@ from baskerville import seqnn """ -borzoi_test_apa_polaydb.py +borzoi_test_apa.py Measure accuracy at polyadenylation-level. """ @@ -322,16 +322,16 @@ def main(): apa_preds = np.array(apa_preds) # save numpy arrays with values - np.save("%s/apa_targets_polyadb.npy" % options.out_dir, apa_targets) - np.save("%s/apa_preds_polyadb.npy" % options.out_dir, apa_preds) + np.save("%s/apa_targets.npy" % options.out_dir, apa_targets) + np.save("%s/apa_preds.npy" % options.out_dir, apa_preds) # save values apa_targets_df = pd.DataFrame(apa_targets, index=pas_ids) apa_targets_df.to_csv( - "%s/apa_targets_polyadb.tsv.gz" % options.out_dir, sep="\t" + "%s/apa_targets.tsv.gz" % options.out_dir, sep="\t" ) apa_preds_df = pd.DataFrame(apa_preds, index=pas_ids) - apa_preds_df.to_csv("%s/apa_preds_polyadb.tsv.gz" % options.out_dir, sep="\t") + apa_preds_df.to_csv("%s/apa_preds.tsv.gz" % options.out_dir, sep="\t") ################################################################################ diff --git a/src/scripts/borzoi_test_apa_folds_polaydb.py b/src/scripts/borzoi_test_apa_folds.py similarity index 93% rename from src/scripts/borzoi_test_apa_folds_polaydb.py rename to src/scripts/borzoi_test_apa_folds.py index 423bb41..ba4fbbb 100755 --- a/src/scripts/borzoi_test_apa_folds_polaydb.py +++ b/src/scripts/borzoi_test_apa_folds.py @@ -20,7 +20,7 @@ import slurm """ -borzoi_test_apa_folds_polaydb.py +borzoi_test_apa_folds.py Measure accuracy at polyadenylation-level for multiple model replicates. """ @@ -67,7 +67,8 @@ def main(): parser.add_option( "-g", dest="apa_file", - default="polyadb_human_v3.csv.gz" + default="%s/genes/polyadb/polyadb_human_v3.csv.gz" % os.environ.get('BORZOI_HG38', 'hg38'), + help="Csv for polya site definition [Default %default]", ) parser.add_option( "--name", @@ -172,15 +173,15 @@ def main(): model_file = "%s/train/model%d_best.h5" % (it_dir, options.dataset_i) # check if done - acc_file = "%s/apa_preds_polyadb.tsv.gz" % out_dir + acc_file = "%s/apa_preds.tsv.gz" % out_dir if os.path.isfile(acc_file): # print('%s already generated.' % acc_file) pass else: # evaluate - cmd = ". /home/drk/anaconda3/etc/profile.d/conda.sh;" - cmd += " conda activate %s;" % options.conda_env - cmd += " time borzoi_test_apa_polaydb.py" + cmd = ('. %s; ' % os.environ['BORZOI_CONDA']) if 'BORZOI_CONDA' in os.environ else '' + cmd += "conda activate %s;" % options.conda_env + cmd += " time borzoi_test_apa.py" cmd += " --head %d" % head_i cmd += " -o %s" % out_dir if options.rc: diff --git a/src/scripts/borzoi_test_exons.py b/src/scripts/borzoi_test_exons.py index fe63e62..a599059 100755 --- a/src/scripts/borzoi_test_exons.py +++ b/src/scripts/borzoi_test_exons.py @@ -99,6 +99,13 @@ def main(): default=None, help="TFR pattern string appended to data_dir/tfrecords for subsetting [Default: %default]", ) + parser.add_option( + "-u", + dest="untransform_old", + default=False, + action="store_true", + help="Untransform old models [Default: %default]", + ) (options, args) = parser.parse_args() if len(args) != 4: @@ -235,7 +242,12 @@ def main(): # predict only if gene overlaps yh = None y = y.numpy()[..., targets_df.index] - y = dataset.untransform_preds1(y, targets_df, unscale=True) + + # untransform + if options.untransform_old: + y = dataset.untransform_preds1(y, targets_df, unscale=True) + else: + y = dataset.untransform_preds(y, targets_df, unscale=True) t0 = time.time() print("Sequence %d..." % si, end="") @@ -268,7 +280,13 @@ def main(): if yh is None: yh = seqnn_model(x) print(yh.max(), " untransformed to ", end="") - yh = dataset.untransform_preds1(yh, targets_df, unscale=True) + + # untransform + if options.untransform_old: + yh = dataset.untransform_preds1(yh, targets_df, unscale=True) + else: + yh = dataset.untransform_preds(yh, targets_df, unscale=True) + print(yh.max()) # slice gene region diff --git a/src/scripts/borzoi_test_exons_folds.py b/src/scripts/borzoi_test_exons_folds.py index 7091465..93aaf40 100755 --- a/src/scripts/borzoi_test_exons_folds.py +++ b/src/scripts/borzoi_test_exons_folds.py @@ -104,7 +104,7 @@ def main(): "-g", dest="exons_gff", default="%s/genes/gencode41/gencode41_basic_nort_protein_exons.gff" - % os.environ["HG38"], + % os.environ.get('BORZOI_HG38', 'hg38'), ) parser.add_option( "--label_exp", @@ -137,9 +137,16 @@ def main(): help="Output experiment directory [Default: %default]", ) parser.add_option( - "-p", dest="out_stem", default=None, help="Output plot stem [Default: %default]" + "-p", + dest="out_stem", + default=None, + help="Output plot stem [Default: %default]" + ) + parser.add_option( + "-q", + dest="queue", + default="geforce" ) - parser.add_option("-q", dest="queue", default="geforce") parser.add_option( "-r", dest="ref_dir", @@ -160,13 +167,6 @@ def main(): type="str", help="Ensemble prediction shifts [Default: %default]", ) - parser.add_option( - "--status", - dest="status", - default=False, - action="store_true", - help="Update metric status; do not run jobs [Default: %default]", - ) parser.add_option( "-t", dest="targets_file", @@ -174,6 +174,13 @@ def main(): type="str", help="File specifying target indexes and labels in table format", ) + parser.add_option( + '-u', + dest='untransform_old', + default=False, + action='store_true', + help='Untransform old models [Default: %default]', + ) (options, args) = parser.parse_args() if len(args) < 2: @@ -238,8 +245,8 @@ def main(): pass else: # evaluate - cmd = ". /home/drk/anaconda3/etc/profile.d/conda.sh;" - cmd += " conda activate %s;" % options.conda_env + cmd = ('. %s; ' % os.environ['BORZOI_CONDA']) if 'BORZOI_CONDA' in os.environ else '' + cmd += "conda activate %s;" % options.conda_env cmd += " time borzoi_test_exons.py" cmd += " --head %d" % head_i cmd += " -o %s" % out_dir @@ -249,6 +256,11 @@ def main(): cmd += " --shifts %s" % options.shifts if options.targets_file is not None: cmd += " -t %s" % options.targets_file + if options.exons_bed is not None: + cmd += " -b %s" % options.exons_bed + if options.untransform_old: + cmd += " -u" + cmd += " -e %d" % options.exon_end cmd += " %s" % params_file cmd += " %s" % model_file cmd += " %s/data%d" % (it_dir, head_i) diff --git a/src/scripts/borzoi_test_genes.py b/src/scripts/borzoi_test_genes.py index bfdb2aa..8a7c4b9 100755 --- a/src/scripts/borzoi_test_genes.py +++ b/src/scripts/borzoi_test_genes.py @@ -425,7 +425,7 @@ def main(): ) acc_nr2.append(nr2_ti) var_mask = gene_wvar[:, ti] > wvar_t[ti] - wr_ti = gene_within[var_mask].mean() + wr_ti = gene_within[:, ti][var_mask].mean() acc_wpearsonr.append(wr_ti) acc_df = pd.DataFrame( diff --git a/src/scripts/borzoi_test_genes_folds.py b/src/scripts/borzoi_test_genes_folds.py index b558205..332a0fe 100755 --- a/src/scripts/borzoi_test_genes_folds.py +++ b/src/scripts/borzoi_test_genes_folds.py @@ -148,7 +148,7 @@ def main(): parser.add_option( "-g", dest="genes_gtf", - default="%s/genes/gencode41/gencode41_basic_protein.gtf" % os.environ["HG38"], + default="%s/genes/gencode41/gencode41_basic_protein.gtf" % os.environ.get('BORZOI_HG38', 'hg38'), ) parser.add_option( "--label_exp", @@ -268,8 +268,8 @@ def main(): pass else: # evaluate - cmd = ". /home/drk/anaconda3/etc/profile.d/conda.sh;" - cmd += " conda activate %s;" % options.conda_env + cmd = ('. %s; ' % os.environ['BORZOI_CONDA']) if 'BORZOI_CONDA' in os.environ else '' + cmd += "conda activate %s;" % options.conda_env cmd += " time borzoi_test_genes.py" cmd += ' --head %d' % head_i cmd += " -o %s" % out_dir diff --git a/src/scripts/borzoi_test_tss_gencode.py b/src/scripts/borzoi_test_tss.py old mode 100644 new mode 100755 similarity index 96% rename from src/scripts/borzoi_test_tss_gencode.py rename to src/scripts/borzoi_test_tss.py index 3e88a02..ac1eb3d --- a/src/scripts/borzoi_test_tss_gencode.py +++ b/src/scripts/borzoi_test_tss.py @@ -28,7 +28,7 @@ from baskerville import seqnn ''' -borzoi_test_tss_gencode.py +borzoi_test_tss.py Measure accuracy at TSS-level. ''' @@ -309,14 +309,14 @@ def main(): tss_preds = np.array(tss_preds) # save numpy arrays with values - np.save('%s/tss_targets_gencode.npy' % options.out_dir, tss_targets) - np.save('%s/tss_preds_gencode.npy' % options.out_dir, tss_preds) + np.save('%s/tss_targets.npy' % options.out_dir, tss_targets) + np.save('%s/tss_preds.npy' % options.out_dir, tss_preds) # save values tss_targets_df = pd.DataFrame(tss_targets, index=tss_ids) - tss_targets_df.to_csv('%s/tss_targets_gencode.tsv.gz' % options.out_dir, sep='\t') + tss_targets_df.to_csv('%s/tss_targets.tsv.gz' % options.out_dir, sep='\t') tss_preds_df = pd.DataFrame(tss_preds, index=tss_ids) - tss_preds_df.to_csv('%s/tss_preds_gencode.tsv.gz' % options.out_dir, sep='\t') + tss_preds_df.to_csv('%s/tss_preds.tsv.gz' % options.out_dir, sep='\t') ################################################################################ # __main__ diff --git a/src/scripts/borzoi_test_tss_folds_gencode.py b/src/scripts/borzoi_test_tss_folds.py old mode 100644 new mode 100755 similarity index 94% rename from src/scripts/borzoi_test_tss_folds_gencode.py rename to src/scripts/borzoi_test_tss_folds.py index 1b65130..3fc4fed --- a/src/scripts/borzoi_test_tss_folds_gencode.py +++ b/src/scripts/borzoi_test_tss_folds.py @@ -20,7 +20,7 @@ import slurm """ -borzoi_test_tss_folds_gencode.py +borzoi_test_tss_folds.py Measure accuracy at TSS-level for multiple model replicates. """ @@ -67,7 +67,8 @@ def main(): parser.add_option( '-g', dest='tss_file', - default='/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_tss2.bed', + default='%s/genes/gencode41/gencode41_basic_tss2.bed' % os.environ.get('BORZOI_HG38', 'hg38'), + help='Bed for tss definition [Default %default]', ) parser.add_option( '--name', @@ -194,15 +195,15 @@ def main(): model_file = '%s/train/model%d_best.h5' % (it_dir, options.dataset_i) # check if done - acc_file = '%s/tss_preds_gencode.tsv.gz' % out_dir + acc_file = '%s/tss_preds.tsv.gz' % out_dir if os.path.isfile(acc_file): # print('%s already generated.' % acc_file) pass else: # evaluate - cmd = '. /home/drk/anaconda3/etc/profile.d/conda.sh;' - cmd += ' conda activate %s;' % options.conda_env - cmd += ' time borzoi_test_tss_gencode.py' + cmd = ('. %s; ' % os.environ['BORZOI_CONDA']) if 'BORZOI_CONDA' in os.environ else '' + cmd += 'conda activate %s;' % options.conda_env + cmd += ' time borzoi_test_tss.py' cmd += ' --head %d' % head_i cmd += ' -o %s' % out_dir if options.rc: diff --git a/src/scripts/borzoi_tfmodisco.py b/src/scripts/borzoi_tfmodisco.py old mode 100644 new mode 100755 index 4caa384..6433461 --- a/src/scripts/borzoi_tfmodisco.py +++ b/src/scripts/borzoi_tfmodisco.py @@ -43,13 +43,13 @@ def main(): parser.add_option( '-d', dest='meme_db', - default='/homde/drk/code/meme-5.4.1/motif_databases/CIS-BP_2.00/Homo_sapiens.meme', + default='meme-5.4.1/motif_databases/CIS-BP_2.00/Homo_sapiens.meme', help='Meme database [Default: %default]', ) parser.add_option( '-g', dest='genes_gtf_file', - default='/home/drk/common/data/genomes/hg38/genes/gencode38/gencode38_basic_protein.gtf', + default='%s/genes/gencode38/gencode38_basic_protein.gtf' % os.environ.get('BORZOI_HG38', 'hg38'), help='Gencode GTF [Default: %default]', ) parser.add_option( @@ -361,7 +361,7 @@ def main(): modisco_meme_open.close() # run tomtom - tomtom_cmd = '/home/drk/bin/tomtom -dist pearson -thresh 0.1 -oc %s %s %s' % \ + tomtom_cmd = 'tomtom -dist pearson -thresh 0.1 -oc %s %s %s' % \ (options.out_dir, modisco_meme_file, options.meme_db) subprocess.call(tomtom_cmd, shell=True) diff --git a/src/scripts/borzoi_tfmodisco_diff.py b/src/scripts/borzoi_tfmodisco_diff.py old mode 100644 new mode 100755 index a0119f9..b97fe20 --- a/src/scripts/borzoi_tfmodisco_diff.py +++ b/src/scripts/borzoi_tfmodisco_diff.py @@ -45,13 +45,13 @@ def main(): parser.add_option( '-d', dest='meme_db', - default='/homde/drk/code/meme-5.4.1/motif_databases/CIS-BP_2.00/Homo_sapiens.meme', + default='meme-5.4.1/motif_databases/CIS-BP_2.00/Homo_sapiens.meme', help='Meme database [Default: %default]', ) parser.add_option( '-g', dest='genes_gtf_file', - default='/home/drk/common/data/genomes/hg38/genes/gencode38/gencode38_basic_protein.gtf', + default='%s/genes/gencode38/gencode38_basic_protein.gtf' % os.environ.get('BORZOI_HG38', 'hg38'), help='Gencode GTF [Default: %default]', ) parser.add_option( @@ -401,7 +401,7 @@ def main(): modisco_meme_open.close() # run tomtom - tomtom_cmd = '/home/drk/bin/tomtom -dist pearson -thresh 0.1 -oc %s %s %s' % \ + tomtom_cmd = 'tomtom -dist pearson -thresh 0.1 -oc %s %s %s' % \ (options.out_dir, modisco_meme_file, options.meme_db) subprocess.call(tomtom_cmd, shell=True) diff --git a/src/scripts/borzoi_trip.py b/src/scripts/borzoi_trip.py index 0d84ac5..64fccd9 100755 --- a/src/scripts/borzoi_trip.py +++ b/src/scripts/borzoi_trip.py @@ -24,7 +24,7 @@ import pandas as pd import pysam -from baskerville import dna_io +from baskerville import dna as dna_io from baskerville import seqnn from baskerville import stream @@ -48,7 +48,7 @@ def main(): parser.add_option( "-f", dest="fasta", - default="%s/assembly/ucsc/hg38.fa" % os.environ["HG38"], + default="%s/assembly/ucsc/hg38.fa" % os.environ.get('BORZOI_HG38', 'hg38'), help="Genome FASTA for sequences [Default: %default]", ) parser.add_option( diff --git a/src/scripts/bw_h5.py b/src/scripts/bw_h5.py new file mode 100755 index 0000000..2780016 --- /dev/null +++ b/src/scripts/bw_h5.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +from optparse import OptionParser +import sys + +import h5py +import numpy as np +import pyBigWig +import scipy.interpolate + +''' +bw_h5.py + +Convert a BigWig to HDF5. +''' + +################################################################################ +# main +################################################################################ +def main(): + usage = 'usage: %prog [options] ' + parser = OptionParser(usage) + parser.add_option('-c', '--chr_strip', dest='chr_strip', + default=False, action='store_true') + parser.add_option('-i', dest='interp_nan', + default=False, action='store_true', + help='Interpolate NaNs [Default: %default]') + parser.add_option('-m', dest='min_norm', + default=False, action='store_true', + help='Normalize the minimum nonzero value to 1 [Default: %default]') + # parser.add_option('--mode_max', dest='mode_norm_max', + # default=10, type='float', + # help='Maximum norm scale value determined by mode [Default: %default]') + parser.add_option('-s', dest='scale', + default=1.0, type='float', + help='Scale all values (e.g. to undo normalization) [Default: %default]') + parser.add_option('-v', dest='verbose', + default=False, action='store_true') + parser.add_option('-z', dest='clip_zero', + default=False, action='store_true', + help='Clip negative values at zero [Default: %default]') + (options,args) = parser.parse_args() + + if len(args) != 2: + parser.error('Must provide input BigWig and output HDF5.') + else: + bw_file = args[0] + hdf5_file = args[1] + + # open files + bw_in = pyBigWig.open(bw_file) + h5_out = h5py.File(hdf5_file, 'w') + + # process chromosomes in length order + chrom_lengths = bw_in.chroms() + chroms = sorted(chrom_lengths.keys()) + length_chroms = [(chrom_lengths[chrm],chrm) for chrm in chroms] + length_chroms = sorted(length_chroms)[::-1] + min_factor = None + + # for each chromosome + for clength, chrom in length_chroms: + if options.verbose: + print(chrom) + + # read values + x = bw_in.values(chrom, 0, chrom_lengths[chrom], numpy=True) + + # scale + if options.scale != 1: + x = x*options.scale + + if options.min_norm: + if min_factor is None: + min_factor = x[x>0].min() + # vals, counts = np.unique(x[x>0], return_counts=True) + # mode_factor = vals[0] + # mode_factor = np.clip(vals[0], 1/options.mode_norm_max, options.mode_norm_max) + print('Min normalization factor: %f' % min_factor, file=sys.stderr) + x /= min_factor + + # interpolate NaN + if options.interp_nan: + x = interp_nan(x) + else: + x = np.nan_to_num(x) + + # clip negative values + if options.clip_zero: + x = np.clip(x, 0, np.inf) + + # clip float16 min/max + x = np.clip(x, np.finfo(np.float16).min, np.finfo(np.float16).max) + + # strip "chr" + if options.chr_strip: + chrom = chrom.replace('chr','') + + # write gzipped into HDF5 + x = x.astype('float16') + h5_out.create_dataset(chrom, data=x, dtype='float16', compression='gzip', shuffle=True) + + # close files + h5_out.close() + bw_in.close() + + +def interp_nan(x, kind='linear'): + '''Linearly interpolate to fill NaN.''' + + # pad zeroes + xp = np.zeros(len(x)+2) + xp[1:-1] = x + + # find NaN + x_nan = np.isnan(xp) + + if np.sum(x_nan) == 0: + # unnecessary + return x + + else: + # interpolate + inds = np.arange(len(xp)) + interpolator = scipy.interpolate.interp1d( + inds[~x_nan], + xp[~x_nan], + kind=kind, + bounds_error=False) + + loc = np.where(x_nan) + xp[loc] = interpolator(loc) + + # slice off pad + return xp[1:-1] + +################################################################################ +# __main__ +################################################################################ +if __name__ == '__main__': + main() diff --git a/src/scripts/data/qtl_data/README.md b/src/scripts/data/qtl_data/README.md deleted file mode 100644 index 8831abf..0000000 --- a/src/scripts/data/qtl_data/README.md +++ /dev/null @@ -1,33 +0,0 @@ -## QTL data processing - -The scripts in this folder are used to extract fine-mapped causal sQTLs, paQTLs and ipaQTLs from the results of the eQTL Catalogue, as well as construct distance- and expression-matched negative SNPs.
- -*Notes*: -- The pipeline requires the GTEx v8 (median) TPM matrix, which can be downloaded [here](https://storage.googleapis.com/adult-gtex/bulk-gex/v8/rna-seq/GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_median_tpm.gct.gz). -
- -As a prerequisite to generating any of the QTL datasets, run the following scripts (in order): -1. download_finemap.py -2. download_sumstat.py -3. merge_finemapping_tables.py -4. make_expression_tables.py -
- -To prepare the sQTL dataset, run these scripts: -1. sqtl_make_positive_sets.py -2. sqtl_make_negative_sets.py -
- -To prepare the paQTL dataset, run these scripts: -1. paqtl_make_positive_sets.py -2. paqtl_make_negative_sets.py -
- -To prepare the ipaQTL dataset, run these scripts: -1. ipaqtl_make_positive_sets.py -2. ipaqtl_make_negative_sets.py -
- -Finally, to generate the QTL VCF files, run this script: -1. make_vcfs.py -
diff --git a/src/scripts/data/qtl_data/download_finemap.py b/src/scripts/data/qtl_data/download_finemap.py deleted file mode 100644 index 558e3ef..0000000 --- a/src/scripts/data/qtl_data/download_finemap.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser - -import os - -import pandas as pd - -import util - -''' -download_finemap.py - -Download QTL Catalogue fine-mapping results. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options] arg' - parser = OptionParser(usage) - #parser.add_option() - (options,args) = parser.parse_args() - - # read remote table - samples_df = pd.read_csv('https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/master/tabix/tabix_ftp_paths.tsv', sep='\t') - - # filter GTEx (for now) - samples_df = samples_df[samples_df.study == 'GTEx'] - - - ################################################ - # txrevise for splicing / polyA / TSS QTLs - - os.makedirs('txrev', exist_ok=True) - txrev_df = samples_df[samples_df.quant_method == 'txrev'] - - jobs = [] - for all_ftp_path in txrev_df.ftp_path: - # ftp://ftp.ebi.ac.uk/pub/databases/spot/eQTL/sumstats/Alasoo_2018/txrev/Alasoo_2018_txrev_macrophage_IFNg+Salmonella.all.tsv.gz - # ftp://ftp.ebi.ac.uk/pub/databases/spot/eQTL/credible_sets//Alasoo_2018_txrev_macrophage_IFNg+Salmonella.purity_filtered.txt.gz - - all_ftp_file = all_ftp_path.split('/')[-1] - fine_ftp_file = all_ftp_file.replace('all.tsv', 'purity_filtered.txt') - - fine_ftp_path = 'ftp://ftp.ebi.ac.uk/pub/databases/spot/eQTL/credible_sets/' - fine_ftp_path += fine_ftp_file - - local_path = 'txrev/%s' % fine_ftp_file - if not os.path.isfile(local_path): - cmd = 'curl -o %s %s' % (local_path, fine_ftp_path) - jobs.append(cmd) - - util.exec_par(jobs, 4, verbose=True) - # print('\n'.join(jobs)) - - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/download_sumstat.py b/src/scripts/data/qtl_data/download_sumstat.py deleted file mode 100644 index ca402df..0000000 --- a/src/scripts/data/qtl_data/download_sumstat.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser - -import os - -import pandas as pd - -import util - -''' -download_sumstat.py - -Download QTL Catalogue sumstats. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options] arg' - parser = OptionParser(usage) - #parser.add_option() - (options,args) = parser.parse_args() - - # read remote table - samples_df = pd.read_csv('https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/master/tabix/tabix_ftp_paths.tsv', sep='\t') - - # filter GTEx (for now) - samples_df = samples_df[samples_df.study == 'GTEx'] - - - ################################################ - # ge for sumstat (we want SNPs and possibly also base expression) - - os.makedirs('ge', exist_ok=True) - txrev_df = samples_df[samples_df.quant_method == 'ge'] - - jobs = [] - for all_ftp_path in txrev_df.ftp_path: - # ftp://ftp.ebi.ac.uk/pub/databases/spot/eQTL/sumstats/Alasoo_2018/txrev/Alasoo_2018_txrev_macrophage_IFNg+Salmonella.all.tsv.gz - - local_path = 'ge/%s' % all_ftp_path.split("/")[-1] - - if not os.path.isfile(local_path): - cmd = 'curl -o %s %s' % (local_path, all_ftp_path) - jobs.append(cmd) - - util.exec_par(jobs, 4, verbose=True) - # print('\n'.join(jobs)) - - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/ipaqtl_make_negative_sets.py b/src/scripts/data/qtl_data/ipaqtl_make_negative_sets.py deleted file mode 100644 index 3f4d49d..0000000 --- a/src/scripts/data/qtl_data/ipaqtl_make_negative_sets.py +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser - -import os - -import util - -import numpy as np -import pandas as pd - -import pyranges as pr - -''' -paqtl_make_negative_sets.py - -Build tables with negative (non-causal) SNPs for paQTLs. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options] arg' - parser = OptionParser(usage) - #parser.add_option() - (options,args) = parser.parse_args() - - #Parameters - pip_cutoff = 0.01 - max_distance = 10000 - gene_pad = 50 - apa_file = 'polyadb_intron.bed' - gtf_file = '/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_nort.gtf' - finemap_file = 'txrev/GTEx_txrev_finemapped_merged.csv.gz' - - #Define tissues - tissue_names = [ - 'adipose_subcutaneous', - 'adipose_visceral', - 'adrenal_gland', - 'artery_aorta', - 'artery_coronary', - 'artery_tibial', - 'blood', - 'brain_amygdala', - 'brain_anterior_cingulate_cortex', - 'brain_caudate', - 'brain_cerebellar_hemisphere', - 'brain_cerebellum', - 'brain_cortex', - 'brain_frontal_cortex', - 'brain_hippocampus', - 'brain_hypothalamus', - 'brain_nucleus_accumbens', - 'brain_putamen', - 'brain_spinal_cord', - 'brain_substantia_nigra', - 'breast', - 'colon_sigmoid', - 'colon_transverse', - 'esophagus_gej', - 'esophagus_mucosa', - 'esophagus_muscularis', - 'fibroblast', - 'heart_atrial_appendage', - 'heart_left_ventricle', - 'kidney_cortex', - 'LCL', - 'liver', - 'lung', - 'minor_salivary_gland', - 'muscle', - 'nerve_tibial', - 'ovary', - 'pancreas', - 'pituitary', - 'prostate', - 'skin_not_sun_exposed', - 'skin_sun_exposed', - 'small_intestine', - 'spleen', - 'stomach', - 'testis', - 'thyroid', - 'uterus', - 'vagina', - ] - - #Compile negative SNP set for each tissue - for tissue_name in tissue_names : - - print("-- " + str(tissue_name) + " --") - - #Load summary stats and extract unique set of SNPs - vcf_df = pd.read_csv("ge/GTEx_ge_" + tissue_name + ".all.tsv.gz", sep='\t', compression='gzip', usecols=['chromosome', 'position', 'ref', 'alt']).drop_duplicates(subset=['chromosome', 'position', 'ref', 'alt'], keep='first').copy().reset_index(drop=True) - - #Only keep SNPs (no indels) - vcf_df = vcf_df.loc[(vcf_df['ref'].str.len() == vcf_df['alt'].str.len()) & (vcf_df['ref'].str.len() == 1)].copy().reset_index(drop=True) - - vcf_df['chromosome'] = 'chr' + vcf_df['chromosome'].astype(str) - vcf_df['start'] = vcf_df['position'].astype(int) - vcf_df['end'] = vcf_df['start'] + 1 - vcf_df['strand'] = "." - - vcf_df = vcf_df[['chromosome', 'start', 'end', 'ref', 'alt', 'strand']] - vcf_df = vcf_df.rename(columns={'chromosome' : 'Chromosome', 'start' : 'Start', 'end' : 'End', 'strand' : 'Strand'}) - - print("len(vcf_df) = " + str(len(vcf_df))) - - #Store intermediate SNPs - #vcf_df.to_csv("ge/GTEx_snps_" + tissue_name + ".bed.gz", sep='\t', index=False, header=False) - - #Load polyadenylation site annotation - apa_df = pd.read_csv(apa_file, sep='\t', names=['Chromosome', 'Start', 'End', 'pas_id', 'feat1', 'Strand']) - apa_df['Start'] += 1 - - #Load gene span annotation - gtf_df = pd.read_csv(gtf_file, sep='\t', skiprows=5, names=['Chromosome', 'havana_str', 'feature', 'Start', 'End', 'feat1', 'Strand', 'feat2', 'id_str']) - gtf_df = gtf_df.query("feature == 'gene'").copy().reset_index(drop=True) - - gtf_df['gene_id'] = gtf_df['id_str'].apply(lambda x: x.split("gene_id \"")[1].split("\";")[0].split(".")[0]) - - gtf_df = gtf_df[['Chromosome', 'Start', 'End', 'gene_id', 'feat1', 'Strand']].drop_duplicates(subset=['gene_id'], keep='first').copy().reset_index(drop=True) - - gtf_df['Start'] = gtf_df['Start'].astype(int) - gene_pad - gtf_df['End'] = gtf_df['End'].astype(int) + gene_pad - - #Join dataframes against gtf annotation - apa_pr = pr.PyRanges(apa_df) - gtf_pr = pr.PyRanges(gtf_df) - vcf_pr = pr.PyRanges(vcf_df) - - apa_gtf_pr = apa_pr.join(gtf_pr, strandedness='same') - vcf_gtf_pr = vcf_pr.join(gtf_pr, strandedness=False) - - apa_gtf_df = apa_gtf_pr.df[['Chromosome', 'Start', 'End', 'pas_id', 'gene_id', 'Strand']].copy().reset_index(drop=True) - vcf_gtf_df = vcf_gtf_pr.df[['Chromosome', 'Start', 'End', 'ref', 'alt', 'Strand', 'gene_id']].copy().reset_index(drop=True) - - apa_gtf_df['Start'] -= max_distance - apa_gtf_df['End'] += max_distance - - #Join vcf against polyadenylation annotation - apa_gtf_pr = pr.PyRanges(apa_gtf_df) - vcf_gtf_pr = pr.PyRanges(vcf_gtf_df) - - vcf_apa_pr = vcf_gtf_pr.join(apa_gtf_pr, strandedness=False) - - #Force gene_id of SNP to be same as the gene_id of the polyA site - vcf_apa_df = vcf_apa_pr.df.query("gene_id == gene_id_b").copy().reset_index(drop=True) - vcf_apa_df = vcf_apa_df[['Chromosome', 'Start', 'ref', 'alt', 'gene_id', 'pas_id', 'Strand_b', 'Start_b']] - - #PolyA site position - vcf_apa_df['Start_b'] += max_distance - vcf_apa_df = vcf_apa_df.rename(columns={'Start' : 'Pos', 'Start_b' : 'pas_pos', 'Strand_b' : 'Strand'}) - - #Distance to polyA site - vcf_apa_df['distance'] = np.abs(vcf_apa_df['Pos'] - vcf_apa_df['pas_pos']) - - #Choose unique SNPs by shortest distance to polyA site - vcf_apa_df = vcf_apa_df.sort_values(by='distance', ascending=True).drop_duplicates(subset=['Chromosome', 'Pos', 'ref', 'alt'], keep='first').copy().reset_index(drop=True) - vcf_apa_df = vcf_apa_df.sort_values(['Chromosome', 'Pos', 'alt'], ascending=True).copy().reset_index(drop=True) - - vcf_df_filtered = vcf_apa_df.rename(columns={'Chromosome' : 'chrom', 'Pos' : 'pos', 'Strand' : 'strand'}) - vcf_df_filtered = vcf_df_filtered[['chrom', 'pos', 'ref', 'alt', 'gene_id', 'pas_id', 'strand', 'pas_pos', 'distance']] - - print("len(vcf_df_filtered) = " + str(len(vcf_df_filtered))) - - #Store intermediate SNPs (filtered) - vcf_df_filtered.to_csv("ge/GTEx_snps_" + tissue_name + "_intronic_polya_filtered.bed.gz", sep='\t', index=False) - - #Reload filtered SNP file - vcf_df_filtered = pd.read_csv("ge/GTEx_snps_" + tissue_name + "_intronic_polya_filtered.bed.gz", sep='\t', compression='gzip') - - #Create variant identifier - vcf_df_filtered['variant'] = vcf_df_filtered['chrom'] + "_" + vcf_df_filtered['pos'].astype(str) + "_" + vcf_df_filtered['ref'] + "_" + vcf_df_filtered['alt'] - - #Load merged fine-mapping dataframe - finemap_df = pd.read_csv(finemap_file, sep='\t')[['variant', 'pip']] - - #Join against fine-mapping dataframe - neg_df = vcf_df_filtered.join(finemap_df.set_index('variant'), on='variant', how='left') - neg_df.loc[neg_df['pip'].isnull(), 'pip'] = 0. - - #Only keep SNPs with PIP < cutoff - neg_df = neg_df.query("pip < " + str(pip_cutoff)).copy().reset_index(drop=True) - - #Store final table of negative SNPs - neg_df.to_csv("ge/GTEx_snps_" + tissue_name + "_intronic_polya_negatives.bed.gz", sep='\t', index=False) - - print("len(neg_df) = " + str(len(neg_df))) - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/ipaqtl_make_positive_sets.py b/src/scripts/data/qtl_data/ipaqtl_make_positive_sets.py deleted file mode 100644 index f1afb7b..0000000 --- a/src/scripts/data/qtl_data/ipaqtl_make_positive_sets.py +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser - -import os - -import util - -import numpy as np -import pandas as pd - -import pyranges as pr - -''' -paqtl_make_positive_sets.py - -Build tables with positive (causal) SNPs for paQTLs. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options] arg' - parser = OptionParser(usage) - #parser.add_option() - (options,args) = parser.parse_args() - - #Parameters - pip_cutoff = 0.01 - max_distance = 10000 - gene_pad = 50 - apa_file = 'polyadb_intron.bed' - gtf_file = '/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_nort.gtf' - - #Define tissues - tissue_names = [ - 'adipose_subcutaneous', - 'adipose_visceral', - 'adrenal_gland', - 'artery_aorta', - 'artery_coronary', - 'artery_tibial', - 'blood', - 'brain_amygdala', - 'brain_anterior_cingulate_cortex', - 'brain_caudate', - 'brain_cerebellar_hemisphere', - 'brain_cerebellum', - 'brain_cortex', - 'brain_frontal_cortex', - 'brain_hippocampus', - 'brain_hypothalamus', - 'brain_nucleus_accumbens', - 'brain_putamen', - 'brain_spinal_cord', - 'brain_substantia_nigra', - 'breast', - 'colon_sigmoid', - 'colon_transverse', - 'esophagus_gej', - 'esophagus_mucosa', - 'esophagus_muscularis', - 'fibroblast', - 'heart_atrial_appendage', - 'heart_left_ventricle', - 'kidney_cortex', - 'LCL', - 'liver', - 'lung', - 'minor_salivary_gland', - 'muscle', - 'nerve_tibial', - 'ovary', - 'pancreas', - 'pituitary', - 'prostate', - 'skin_not_sun_exposed', - 'skin_sun_exposed', - 'small_intestine', - 'spleen', - 'stomach', - 'testis', - 'thyroid', - 'uterus', - 'vagina', - ] - - #Compile positive SNP set for each tissue - for tissue_name in tissue_names : - - print("-- " + str(tissue_name) + " --") - - #Load fine-mapping table - vcf_df = pd.read_csv("txrev/GTEx_txrev_" + tissue_name + ".purity_filtered.txt.gz", sep='\t', usecols=['chromosome', 'position', 'ref', 'alt', 'variant', 'pip', 'molecular_trait_id'], low_memory=False) - - #Only keep SNPs (no indels) - vcf_df = vcf_df.loc[(vcf_df['ref'].str.len() == vcf_df['alt'].str.len()) & (vcf_df['ref'].str.len() == 1)].copy().reset_index(drop=True) - - #Only keep SNPs associated with polyadenylation events - vcf_df = vcf_df.loc[vcf_df['molecular_trait_id'].str.contains(".downstream.")].copy().reset_index(drop=True) - - vcf_df['chromosome'] = 'chr' + vcf_df['chromosome'].astype(str) - vcf_df['start'] = vcf_df['position'].astype(int) - vcf_df['end'] = vcf_df['start'] + 1 - vcf_df['strand'] = "." - - vcf_df = vcf_df[['chromosome', 'start', 'end', 'ref', 'alt', 'strand', 'variant', 'pip', 'molecular_trait_id']] - vcf_df = vcf_df.rename(columns={'chromosome' : 'Chromosome', 'start' : 'Start', 'end' : 'End', 'strand' : 'Strand'}) - - print("len(vcf_df) = " + str(len(vcf_df))) - - #Load polyadenylation site annotation - apa_df = pd.read_csv(apa_file, sep='\t', names=['Chromosome', 'Start', 'End', 'pas_id', 'feat1', 'Strand']) - apa_df['Start'] += 1 - - #Load gene span annotation - gtf_df = pd.read_csv(gtf_file, sep='\t', skiprows=5, names=['Chromosome', 'havana_str', 'feature', 'Start', 'End', 'feat1', 'Strand', 'feat2', 'id_str']) - gtf_df = gtf_df.query("feature == 'gene'").copy().reset_index(drop=True) - - gtf_df['gene_id'] = gtf_df['id_str'].apply(lambda x: x.split("gene_id \"")[1].split("\";")[0].split(".")[0]) - - gtf_df = gtf_df[['Chromosome', 'Start', 'End', 'gene_id', 'feat1', 'Strand']].drop_duplicates(subset=['gene_id'], keep='first').copy().reset_index(drop=True) - - gtf_df['Start'] = gtf_df['Start'].astype(int) - gene_pad - gtf_df['End'] = gtf_df['End'].astype(int) + gene_pad - - #Join dataframes against gtf annotation - apa_pr = pr.PyRanges(apa_df) - gtf_pr = pr.PyRanges(gtf_df) - vcf_pr = pr.PyRanges(vcf_df) - - apa_gtf_pr = apa_pr.join(gtf_pr, strandedness='same') - vcf_gtf_pr = vcf_pr.join(gtf_pr, strandedness=False) - - apa_gtf_df = apa_gtf_pr.df[['Chromosome', 'Start', 'End', 'pas_id', 'gene_id', 'Strand']].copy().reset_index(drop=True) - vcf_gtf_df = vcf_gtf_pr.df[['Chromosome', 'Start', 'End', 'ref', 'alt', 'Strand', 'gene_id', 'variant', 'pip', 'molecular_trait_id']].copy().reset_index(drop=True) - - apa_gtf_df['Start'] -= max_distance - apa_gtf_df['End'] += max_distance - - #Join vcf against polyadenylation annotation - apa_gtf_pr = pr.PyRanges(apa_gtf_df) - vcf_gtf_pr = pr.PyRanges(vcf_gtf_df) - - vcf_apa_pr = vcf_gtf_pr.join(apa_gtf_pr, strandedness=False) - - #Force gene_id of SNP to be same as the gene_id of the polyA site - vcf_apa_df = vcf_apa_pr.df.query("gene_id == gene_id_b").copy().reset_index(drop=True) - vcf_apa_df = vcf_apa_df[['Chromosome', 'Start', 'ref', 'alt', 'gene_id', 'pas_id', 'Strand_b', 'Start_b', 'variant', 'pip', 'molecular_trait_id']] - - #Force gene_id of SNP to be same as the gene_id of the finemapped molecular trait - vcf_apa_df['molecular_trait_gene_id'] = vcf_apa_df['molecular_trait_id'].apply(lambda x: x.split(".")[0]) - vcf_apa_df = vcf_apa_df.query("gene_id == molecular_trait_gene_id").copy().reset_index(drop=True) - - #PolyA site position - vcf_apa_df['Start_b'] += max_distance - vcf_apa_df = vcf_apa_df.rename(columns={'Start' : 'Pos', 'Start_b' : 'pas_pos', 'Strand_b' : 'Strand'}) - - #Distance to polyA site - vcf_apa_df['distance'] = np.abs(vcf_apa_df['Pos'] - vcf_apa_df['pas_pos']) - - #Choose unique SNPs by shortest distance to polyA site (and inverse PIP for tie-breaking) - vcf_apa_df['pip_inv'] = 1. - vcf_apa_df['pip'] - - vcf_apa_df = vcf_apa_df.sort_values(by=['distance', 'pip_inv'], ascending=True).drop_duplicates(subset=['Chromosome', 'Pos', 'ref', 'alt'], keep='first').copy().reset_index(drop=True) - vcf_apa_df = vcf_apa_df.sort_values(['Chromosome', 'Pos', 'alt'], ascending=True).copy().reset_index(drop=True) - - vcf_df_filtered = vcf_apa_df.rename(columns={'Chromosome' : 'chrom', 'Pos' : 'pos', 'Strand' : 'strand'}) - vcf_df_filtered = vcf_df_filtered[['chrom', 'pos', 'ref', 'alt', 'gene_id', 'pas_id', 'strand', 'pas_pos', 'distance', 'variant', 'pip', 'molecular_trait_id']] - - print("len(vcf_df_filtered) = " + str(len(vcf_df_filtered))) - - #Store intermediate SNPs (filtered) - vcf_df_filtered.to_csv("txrev/GTEx_snps_" + tissue_name + "_intronic_polya_finemapped_filtered.bed.gz", sep='\t', index=False) - - #Reload filtered SNP file - vcf_df_filtered = pd.read_csv("txrev/GTEx_snps_" + tissue_name + "_intronic_polya_finemapped_filtered.bed.gz", sep='\t', compression='gzip') - - #Only keep SNPs with PIP > cutoff - pos_df = vcf_df_filtered.query("pip > " + str(pip_cutoff)).copy().reset_index(drop=True) - - #Store final table of positive SNPs - pos_df.to_csv("txrev/GTEx_snps_" + tissue_name + "_intronic_polya_positives.bed.gz", sep='\t', index=False) - - print("len(pos_df) = " + str(len(pos_df))) - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/ipaqtl_vcfs.py b/src/scripts/data/qtl_data/ipaqtl_vcfs.py deleted file mode 100644 index 773c45e..0000000 --- a/src/scripts/data/qtl_data/ipaqtl_vcfs.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser -import os -import pdb -import time - -import numpy as np -import pandas as pd -import pyranges as pr -from tqdm import tqdm - -''' -ipaqtl_vcfs.py - -Generate positive and negative intronic paQTL sets from the QTL catalog txrevise. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options]' - parser = OptionParser(usage) - parser.add_option('--neg_pip', dest='neg_pip', - default=0.01, type='float', - help='PIP upper limit for negative examples. [Default: %default]') - parser.add_option('--pos_pip', dest='pos_pip', - default=0.9, type='float', - help='PIP lower limit for positive examples. [Default: %default]') - parser.add_option('--match_gene', dest='match_gene', - default=0, type='int', - help='Try finding negative in same gene as positive. [Default: %default]') - parser.add_option('--match_allele', dest='match_allele', - default=0, type='int', - help='Try finding negative with same ref and alt alleles. [Default: %default]') - parser.add_option('-o', dest='out_prefix', - default='qtlcat_ipaqtl') - (options,args) = parser.parse_args() - - tissue_name = options.out_prefix.split('txrev_')[1] - - gtf_file = '/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_nort_protein.gtf' - - # read variant table - qtlcat_df_neg = pd.read_csv("ge/GTEx_snps_" + tissue_name + "_intronic_polya_negatives.bed.gz", sep='\t') - qtlcat_df_pos = pd.read_csv("txrev/GTEx_snps_" + tissue_name + "_intronic_polya_positives.bed.gz", sep='\t') - - # read TPM bin table and construct lookup dictionaries - tpm_df = pd.read_csv('ge/GTEx_ge_' + tissue_name + "_tpms.csv", sep='\t')[['gene_id', 'tpm', 'bin_index', 'bin_index_l', 'bin_index_r']] - gene_to_tpm_dict = tpm_df.set_index('gene_id').to_dict(orient='index') - - # filter on SNPs with genes in TPM bin dict - qtlcat_df_neg = qtlcat_df_neg.loc[qtlcat_df_neg['gene_id'].isin(tpm_df['gene_id'].values.tolist())].copy().reset_index(drop=True) - qtlcat_df_pos = qtlcat_df_pos.loc[qtlcat_df_pos['gene_id'].isin(tpm_df['gene_id'].values.tolist())].copy().reset_index(drop=True) - - #Load gene span annotation (protein-coding/categorized only) - gtf_df = pd.read_csv(gtf_file, sep='\t', skiprows=5, names=['id_str']) - gtf_genes = gtf_df['id_str'].apply(lambda x: x.split("gene_id \"")[1].split("\";")[0].split(".")[0]).unique().tolist() - - # filter on SNPs with genes in GTF file - qtlcat_df_neg = qtlcat_df_neg.loc[qtlcat_df_neg['gene_id'].isin(gtf_genes)].copy().reset_index(drop=True) - qtlcat_df_pos = qtlcat_df_pos.loc[qtlcat_df_pos['gene_id'].isin(gtf_genes)].copy().reset_index(drop=True) - - bin_to_genes_dict = {} - for _, row in tpm_df.iterrows() : - - if row['bin_index'] not in bin_to_genes_dict : - bin_to_genes_dict[row['bin_index']] = [] - - bin_to_genes_dict[row['bin_index']].append(row['gene_id']) - - for sample_bin in bin_to_genes_dict : - bin_to_genes_dict[sample_bin] = set(bin_to_genes_dict[sample_bin]) - - # split molecular trait id and filter for polyadenylation (for positives) - qtlcat_df_pos['gene'] = [mti.split('.')[0] for mti in qtlcat_df_pos.molecular_trait_id] - qtlcat_df_pos['event'] = [mti.split('.')[2] for mti in qtlcat_df_pos.molecular_trait_id] - - qtlcat_df_pos = qtlcat_df_pos[qtlcat_df_pos.event == 'downstream'] - qtlcat_df_pos = qtlcat_df_pos.rename(columns={'distance' : 'pas_dist'}) - - qtlcat_df_neg['molecular_trait_id'] = qtlcat_df_neg['gene_id'] + "." + "grp_0.downstream.negative" - qtlcat_df_neg['gene'] = qtlcat_df_neg['gene_id'] - qtlcat_df_neg['event'] = 'downstream' - qtlcat_df_neg = qtlcat_df_neg.rename(columns={'distance' : 'pas_dist'}) - - paqtl_df = pd.concat([qtlcat_df_neg, qtlcat_df_pos]).copy().reset_index(drop=True) - - # determine positive variants - paqtl_pos_df = paqtl_df[paqtl_df.pip >= options.pos_pip] - paqtl_neg_df = paqtl_df[paqtl_df.pip < options.neg_pip] - pos_variants = set(paqtl_pos_df.variant) - - neg_gene_and_allele_variants = 0 - neg_gene_variants = 0 - - neg_expr_and_allele_variants = 0 - neg_expr_variants = 0 - - unmatched_variants = 0 - - # choose negative variants - neg_variants = set() - neg_dict = {} - for pvariant in tqdm(pos_variants): - paqtl_this_df = paqtl_pos_df[paqtl_pos_df.variant == pvariant] - - neg_found = False - - # optionally prefer negative from positive's gene set - if options.match_gene == 1 and options.match_allele == 1 : - pgenes = set(paqtl_this_df.gene) - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, True) - - if neg_found : - neg_gene_and_allele_variants += 1 - - if not neg_found and options.match_gene == 1 : - pgenes = set(paqtl_this_df.gene) - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, False) - - if neg_found : - neg_gene_variants += 1 - - if not neg_found and options.match_allele == 1 : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, True) - - if not neg_found and gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index'] != gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_l'] : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_l']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, True) - - if not neg_found and gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index'] != gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_r'] : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_r']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, True) - - if neg_found : - neg_expr_and_allele_variants += 1 - - if not neg_found : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, False) - - if not neg_found and gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index'] != gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_l'] : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_l']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, False) - - if not neg_found and gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index'] != gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_r'] : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_r']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, False) - - if neg_found : - neg_expr_variants += 1 - - if not neg_found : - print("[Warning] Could not find a matching negative for '" + pvariant + "'") - unmatched_variants += 1 - - print('%d positive variants' % len(pos_variants)) - print('%d negative variants' % len(neg_variants)) - print(' - %d gene-matched negatives with same alleles' % neg_gene_and_allele_variants) - print(' - %d gene-matched negatives ' % neg_gene_variants) - print(' - %d expr-matched negatives with same alleles' % neg_expr_and_allele_variants) - print(' - %d expr-matched negatives ' % neg_expr_variants) - print(' - %d unmatched negatives ' % unmatched_variants) - - pos_dict = {pv: pv for pv in pos_variants} - - # write VCFs - write_vcf('%s_pos.vcf' % options.out_prefix, paqtl_df, pos_variants, pos_dict) - write_vcf('%s_neg.vcf' % options.out_prefix, paqtl_df, neg_variants, neg_dict) - -def find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, match_allele) : - - gene_mask = np.array([gene in pgenes for gene in paqtl_neg_df.gene]) - paqtl_neg_gene_df = paqtl_neg_df[gene_mask] - - # match PAS distance - this_dist = paqtl_this_df.iloc[0].pas_dist - dist_cmp = np.abs(paqtl_neg_gene_df.pas_dist - this_dist) - dist_cmp_unique = np.sort(np.unique(dist_cmp.values)) - - this_ref = paqtl_this_df.iloc[0].ref - this_alt = paqtl_this_df.iloc[0].alt - - for ni_unique in dist_cmp_unique: - - paqtl_neg_gene_dist_df = paqtl_neg_gene_df.loc[dist_cmp == ni_unique] - - shuffle_index = np.arange(len(paqtl_neg_gene_dist_df), dtype='int32') - np.random.shuffle(shuffle_index) - - for npaqtl_i in range(len(paqtl_neg_gene_dist_df)) : - npaqtl = paqtl_neg_gene_dist_df.iloc[shuffle_index[npaqtl_i]] - - if not match_allele or (npaqtl.ref == this_ref and npaqtl.alt == this_alt): - if npaqtl.variant not in neg_variants and npaqtl.variant not in pos_variants: - - neg_variants.add(npaqtl.variant) - neg_dict[npaqtl.variant] = paqtl_this_df.iloc[0].variant - - return True - - return False - -def write_vcf(vcf_file, df, variants_write, variants_dict): - vcf_open = open(vcf_file, 'w') - print('##fileformat=VCFv4.2', file=vcf_open) - print('##INFO=', - file=vcf_open) - print('##INFO=', - file=vcf_open) - print('##INFO=', - file=vcf_open) - cols = ['#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO'] - print('\t'.join(cols), file=vcf_open) - - variants_written = set() - - for v in df.itertuples(): - if v.variant in variants_write and v.variant not in variants_written: - cols = [v.chrom, str(v.pos), v.variant, v.ref, v.alt, '.', '.'] - cols += ['MT=%s;PD=%d;PI=%s' % (v.molecular_trait_id, v.pas_dist, variants_dict[v.variant])] - print('\t'.join(cols), file=vcf_open) - variants_written.add(v.variant) - - vcf_open.close() - - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/make_expression_tables.py b/src/scripts/data/qtl_data/make_expression_tables.py deleted file mode 100644 index ddc2a63..0000000 --- a/src/scripts/data/qtl_data/make_expression_tables.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser - -import os - -import util - -import numpy as np -import pandas as pd - -import pyranges as pr - -import matplotlib.pyplot as plt - -''' -make_expression_tables.py - -Contruct TPM bucket to sample genes from. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options] arg' - parser = OptionParser(usage) - #parser.add_option() - (options,args) = parser.parse_args() - - #Define tissue column-to-file mapping - tissue_dict = { - 'Adipose - Subcutaneous' : 'adipose_subcutaneous', - 'Adipose - Visceral (Omentum)' : 'adipose_visceral', - 'Adrenal Gland' : 'adrenal_gland', - 'Artery - Aorta' : 'artery_aorta', - 'Artery - Coronary' : 'artery_coronary', - 'Artery - Tibial' : 'artery_tibial', - 'Whole Blood' : 'blood', - 'Brain - Amygdala' : 'brain_amygdala', - 'Brain - Anterior cingulate cortex (BA24)' : 'brain_anterior_cingulate_cortex', - 'Brain - Caudate (basal ganglia)' : 'brain_caudate', - 'Brain - Cerebellar Hemisphere' : 'brain_cerebellar_hemisphere', - 'Brain - Cerebellum' : 'brain_cerebellum', - 'Brain - Cortex' : 'brain_cortex', - 'Brain - Frontal Cortex (BA9)' : 'brain_frontal_cortex', - 'Brain - Hippocampus' : 'brain_hippocampus', - 'Brain - Hypothalamus' : 'brain_hypothalamus', - 'Brain - Nucleus accumbens (basal ganglia)' : 'brain_nucleus_accumbens', - 'Brain - Putamen (basal ganglia)' : 'brain_putamen', - 'Brain - Spinal cord (cervical c-1)' : 'brain_spinal_cord', - 'Brain - Substantia nigra' : 'brain_substantia_nigra', - 'Breast - Mammary Tissue' : 'breast', - 'Colon - Sigmoid' : 'colon_sigmoid', - 'Colon - Transverse' : 'colon_transverse', - 'Esophagus - Gastroesophageal Junction' : 'esophagus_gej', - 'Esophagus - Mucosa' : 'esophagus_mucosa', - 'Esophagus - Muscularis' : 'esophagus_muscularis', - 'Cells - Cultured fibroblasts' : 'fibroblast', - 'Heart - Atrial Appendage' : 'heart_atrial_appendage', - 'Heart - Left Ventricle' : 'heart_left_ventricle', - 'Kidney - Cortex' : 'kidney_cortex', - 'Cells - EBV-transformed lymphocytes' : 'LCL', - 'Liver' : 'liver', - 'Lung' : 'lung', - 'Minor Salivary Gland' : 'minor_salivary_gland', - 'Muscle - Skeletal' : 'muscle', - 'Nerve - Tibial' : 'nerve_tibial', - 'Ovary' : 'ovary', - 'Pancreas' : 'pancreas', - 'Pituitary' : 'pituitary', - 'Prostate' : 'prostate', - 'Skin - Not Sun Exposed (Suprapubic)' : 'skin_not_sun_exposed', - 'Skin - Sun Exposed (Lower leg)' : 'skin_sun_exposed', - 'Small Intestine - Terminal Ileum' : 'small_intestine', - 'Spleen' : 'spleen', - 'Stomach' : 'stomach', - 'Testis' : 'testis', - 'Thyroid' : 'thyroid', - 'Uterus' : 'uterus', - 'Vagina' : 'vagina', - } - - for tissue_name in tissue_dict : - - #Load TPM matrix - tpm_df = pd.read_csv("GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_median_tpm.gct.gz", sep='\t', compression='gzip', skiprows=2) - - save_name = tissue_dict[tissue_name] - - print("-- " + save_name + " --") - - #Clean dataframe - tpm_df['gene_id'] = tpm_df['Name'].apply(lambda x: x.split(".")[0]) - - tpm_df = tpm_df.drop_duplicates(subset=['gene_id'], keep='first').copy().reset_index(drop=True) - - tpm_df['tpm'] = tpm_df[tissue_name] - tpm_df = tpm_df[['gene_id', 'tpm']] - - #Get non-zero TPM entries - tpm_df_zero = tpm_df.loc[tpm_df['tpm'] == 0].copy().reset_index(drop=True) - tpm_df_nonzero = tpm_df.loc[tpm_df['tpm'] > 0].copy().reset_index(drop=True) - - tpm_df_zero['tpm_log2'] = 0. - tpm_df_nonzero['tpm_log2'] = np.log2(tpm_df_nonzero['tpm']) - - #Clip at extremes - min_q = 0.0075 - max_q = 0.9925 - - #Log2 fold change bin sizes - bin_size = 0.4 - bin_offset = 0.15 - - min_tpm_log2 = np.quantile(tpm_df_nonzero['tpm_log2'], q=min_q) - max_tpm_log2 = np.quantile(tpm_df_nonzero['tpm_log2'], q=max_q) - - tpm_df_nonzero.loc[tpm_df_nonzero['tpm_log2'] < min_tpm_log2, 'tpm_log2'] = min_tpm_log2 - tpm_df_nonzero.loc[tpm_df_nonzero['tpm_log2'] > max_tpm_log2, 'tpm_log2'] = max_tpm_log2 - - tpm_log2 = tpm_df_nonzero['tpm_log2'].values - - n_bins = int((max_tpm_log2 - min_tpm_log2) / bin_size) - - #Get sample bins - sample_bins = np.linspace(min_tpm_log2, max_tpm_log2, n_bins+1) - - #Map values to bins - bin_index = np.digitize(tpm_log2, sample_bins[1:], right=True) - bin_index_l = np.digitize(tpm_log2 - bin_offset, sample_bins[1:], right=True) - bin_index_r = np.digitize(tpm_log2 + bin_offset, sample_bins[1:], right=True) - - tpm_df_zero['bin_index_l'] = -1 * np.ones(len(tpm_df_zero), dtype='int32') - tpm_df_zero['bin_index'] = -1 * np.ones(len(tpm_df_zero), dtype='int32') - tpm_df_zero['bin_index_r'] = -1 * np.ones(len(tpm_df_zero), dtype='int32') - - tpm_df_nonzero['bin_index_l'] = bin_index_l - tpm_df_nonzero['bin_index'] = bin_index - tpm_df_nonzero['bin_index_r'] = bin_index_r - - tpm_df = pd.concat([tpm_df_zero, tpm_df_nonzero]).copy().reset_index(drop=True) - - tpm_df = tpm_df.sort_values(by='gene_id', ascending=True).copy().reset_index(drop=True) - - #Save dataframe - tpm_df.to_csv('ge/GTEx_ge_' + save_name + "_tpms.csv", sep='\t', index=False) - - #Visualize TPM sample bins - tpm_df_filtered = tpm_df.loc[tpm_df['tpm'] > 0.] - - f = plt.figure(figsize=(4, 3)) - - plt.hist(tpm_df_filtered['bin_index'].values, bins=np.unique(tpm_df_filtered['bin_index'].values)) - - plt.xlim(0, np.max(tpm_df_filtered['bin_index'].values)) - - plt.xticks(fontsize=8) - plt.yticks(fontsize=8) - - plt.xlabel("Sample bin (FC < " + str(round(2**(bin_size+2*bin_offset), 2)) + ")", fontsize=8) - plt.ylabel("# of genes", fontsize=8) - - plt.title("TPM sample bins (" + save_name + ")", fontsize=8) - - plt.tight_layout() - - plt.savefig('ge/GTEx_ge_' + save_name + "_tpms.png", transparent=False, dpi=300) - - plt.close() - - #Check and warn in case of low-support bins - _, bin_support = np.unique(tpm_df_filtered['bin_index'].values, return_counts=True) - - if np.any(bin_support < 100) : - print("[Warning] Less than 100 genes in some of the TPM sample bins (min = " + str(int(np.min(bin_support))) + ").") - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/make_vcfs.py b/src/scripts/data/qtl_data/make_vcfs.py deleted file mode 100644 index aa251d0..0000000 --- a/src/scripts/data/qtl_data/make_vcfs.py +++ /dev/null @@ -1,112 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser - -import glob -import os - -import pandas as pd - -import util - -''' -make_vcfs.py - -Download QTL Catalogue fine-mapping results. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options] arg' - parser = OptionParser(usage) - #parser.add_option() - (options,args) = parser.parse_args() - - pip = 0.2 - match_gene = 0 - match_allele = 1 - - ################################################ - # intronic polyA QTLs - - out_dir = 'ipaqtl_pip%d%s%s' % (pip*100, 'g' if match_gene == 1 else 'e', 'a' if match_allele else '') - os.makedirs(out_dir, exist_ok=True) - - jobs = [] - for table_file in glob.glob('txrev/*.txt.gz'): - out_prefix = table_file.replace('txrev/', '%s/' % out_dir) - out_prefix = out_prefix.replace('.purity_filtered.txt.gz', '') - cmd = './ipaqtl_vcfs.py --neg_pip 0.01 --pos_pip %f --match_gene %d --match_allele %d -o %s' % (pip, match_gene, match_allele, out_prefix) - jobs.append(cmd) - util.exec_par(jobs, 6, verbose=True) - - # merge study/tissue variants - mpos_vcf_file = '%s/pos_merge.vcf' % out_dir - mneg_vcf_file = '%s/neg_merge.vcf' % out_dir - merge_variants(mpos_vcf_file, '%s/*_pos.vcf' % out_dir) - merge_variants(mneg_vcf_file, '%s/*_neg.vcf' % out_dir) - - - ################################################ - # polyA QTLs - - out_dir = 'paqtl_pip%d%s%s' % (pip*100, 'g' if match_gene == 1 else 'e', 'a' if match_allele else '') - os.makedirs(out_dir, exist_ok=True) - - jobs = [] - for table_file in glob.glob('txrev/*.txt.gz'): - out_prefix = table_file.replace('txrev/', '%s/' % out_dir) - out_prefix = out_prefix.replace('.purity_filtered.txt.gz', '') - cmd = './paqtl_vcfs.py --neg_pip 0.01 --pos_pip %f --match_gene %d --match_allele %d -o %s' % (pip, match_gene, match_allele, out_prefix) - jobs.append(cmd) - util.exec_par(jobs, 6, verbose=True) - - # merge study/tissue variants - mpos_vcf_file = '%s/pos_merge.vcf' % out_dir - mneg_vcf_file = '%s/neg_merge.vcf' % out_dir - merge_variants(mpos_vcf_file, '%s/*_pos.vcf' % out_dir) - merge_variants(mneg_vcf_file, '%s/*_neg.vcf' % out_dir) - - ################################################ - # splicing QTLs - - out_dir = 'sqtl_pip%d%s%s' % (pip*100, 'g' if match_gene == 1 else 'e', 'a' if match_allele else '') - os.makedirs(out_dir, exist_ok=True) - - jobs = [] - for table_file in glob.glob('txrev/*.txt.gz'): - out_prefix = table_file.replace('txrev/', '%s/' % out_dir) - out_prefix = out_prefix.replace('.purity_filtered.txt.gz', '') - cmd = './sqtl_vcfs.py --neg_pip 0.01 --pos_pip %f --match_gene %d --match_allele %d -o %s' % (pip, match_gene, match_allele, out_prefix) - jobs.append(cmd) - util.exec_par(jobs, 6, verbose=True) - - # merge study/tissue variants - mpos_vcf_file = '%s/pos_merge.vcf' % out_dir - mneg_vcf_file = '%s/neg_merge.vcf' % out_dir - merge_variants(mpos_vcf_file, '%s/*_pos.vcf' % out_dir) - merge_variants(mneg_vcf_file, '%s/*_neg.vcf' % out_dir) - - -def merge_variants(merge_vcf_file, vcf_glob): - with open(merge_vcf_file, 'w') as merge_vcf_open: - vcf0_file = list(glob.glob(vcf_glob))[0] - for line in open(vcf0_file): - if line[0] == '#': - print(line, end='', file=merge_vcf_open) - - merged_variants = set() - for vcf_file in glob.glob(vcf_glob): - for line in open(vcf_file): - if not line.startswith('#'): - variant = line.split()[2] - if variant not in merged_variants: - print(line, file=merge_vcf_open, end='') - merged_variants.add(variant) - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/merge_finemapping_tables.py b/src/scripts/data/qtl_data/merge_finemapping_tables.py deleted file mode 100644 index ac4fa7d..0000000 --- a/src/scripts/data/qtl_data/merge_finemapping_tables.py +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser - -import os - -import util - -import numpy as np -import pandas as pd - -''' -merge_finemapping_tables.py - -Merge fine-mapping tables of QTL credible sets. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options] arg' - parser = OptionParser(usage) - #parser.add_option() - (options,args) = parser.parse_args() - - #Define tissues - tissue_names = [ - 'adipose_subcutaneous', - 'adipose_visceral', - 'adrenal_gland', - 'artery_aorta', - 'artery_coronary', - 'artery_tibial', - 'blood', - 'brain_amygdala', - 'brain_anterior_cingulate_cortex', - 'brain_caudate', - 'brain_cerebellar_hemisphere', - 'brain_cerebellum', - 'brain_cortex', - 'brain_frontal_cortex', - 'brain_hippocampus', - 'brain_hypothalamus', - 'brain_nucleus_accumbens', - 'brain_putamen', - 'brain_spinal_cord', - 'brain_substantia_nigra', - 'breast', - 'colon_sigmoid', - 'colon_transverse', - 'esophagus_gej', - 'esophagus_mucosa', - 'esophagus_muscularis', - 'fibroblast', - 'heart_atrial_appendage', - 'heart_left_ventricle', - 'kidney_cortex', - 'LCL', - 'liver', - 'lung', - 'minor_salivary_gland', - 'muscle', - 'nerve_tibial', - 'ovary', - 'pancreas', - 'pituitary', - 'prostate', - 'skin_not_sun_exposed', - 'skin_sun_exposed', - 'small_intestine', - 'spleen', - 'stomach', - 'testis', - 'thyroid', - 'uterus', - 'vagina', - ] - - #Load and merge fine-mapping results - dfs = [] - for tissue_name in tissue_names : - - print("-- " + tissue_name + " --") - - df = pd.read_csv("txrev/GTEx_txrev_" + tissue_name + ".purity_filtered.txt.gz", sep='\t', usecols=['chromosome', 'position', 'ref', 'alt', 'variant', 'pip'], low_memory=False) - dfs.append(df.sort_values(by='pip', ascending=False).drop_duplicates(subset=['variant'], keep='first').copy().reset_index(drop=True)) - - df = pd.concat(dfs).sort_values(by='pip', ascending=False).drop_duplicates(subset=['variant'], keep='first').copy().reset_index(drop=True) - - df['chromosome'] = "chr" + df['chromosome'].astype(str) - df = df.rename(columns={'chromosome' : 'chrom', 'position' : 'pos'}) - - print("len(df) = " + str(len(df))) - - #Save union of dataframes - df.to_csv("txrev/GTEx_txrev_finemapped_merged.csv.gz", sep='\t', index=False) - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/paqtl_make_negative_sets.py b/src/scripts/data/qtl_data/paqtl_make_negative_sets.py deleted file mode 100644 index a5da60d..0000000 --- a/src/scripts/data/qtl_data/paqtl_make_negative_sets.py +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser - -import os - -import util - -import numpy as np -import pandas as pd - -import pyranges as pr - -''' -paqtl_make_negative_sets.py - -Build tables with negative (non-causal) SNPs for paQTLs. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options] arg' - parser = OptionParser(usage) - #parser.add_option() - (options,args) = parser.parse_args() - - #Parameters - pip_cutoff = 0.01 - max_distance = 10000 - gene_pad = 50 - apa_file = '/home/drk/common/data/genomes/hg38/genes/polyadb/polyadb_exon3.bed' - gtf_file = '/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_nort.gtf' - finemap_file = 'txrev/GTEx_txrev_finemapped_merged.csv.gz' - - #Define tissues - tissue_names = [ - 'adipose_subcutaneous', - 'adipose_visceral', - 'adrenal_gland', - 'artery_aorta', - 'artery_coronary', - 'artery_tibial', - 'blood', - 'brain_amygdala', - 'brain_anterior_cingulate_cortex', - 'brain_caudate', - 'brain_cerebellar_hemisphere', - 'brain_cerebellum', - 'brain_cortex', - 'brain_frontal_cortex', - 'brain_hippocampus', - 'brain_hypothalamus', - 'brain_nucleus_accumbens', - 'brain_putamen', - 'brain_spinal_cord', - 'brain_substantia_nigra', - 'breast', - 'colon_sigmoid', - 'colon_transverse', - 'esophagus_gej', - 'esophagus_mucosa', - 'esophagus_muscularis', - 'fibroblast', - 'heart_atrial_appendage', - 'heart_left_ventricle', - 'kidney_cortex', - 'LCL', - 'liver', - 'lung', - 'minor_salivary_gland', - 'muscle', - 'nerve_tibial', - 'ovary', - 'pancreas', - 'pituitary', - 'prostate', - 'skin_not_sun_exposed', - 'skin_sun_exposed', - 'small_intestine', - 'spleen', - 'stomach', - 'testis', - 'thyroid', - 'uterus', - 'vagina', - ] - - #Compile negative SNP set for each tissue - for tissue_name in tissue_names : - - print("-- " + str(tissue_name) + " --") - - #Load summary stats and extract unique set of SNPs - vcf_df = pd.read_csv("ge/GTEx_ge_" + tissue_name + ".all.tsv.gz", sep='\t', compression='gzip', usecols=['chromosome', 'position', 'ref', 'alt']).drop_duplicates(subset=['chromosome', 'position', 'ref', 'alt'], keep='first').copy().reset_index(drop=True) - - #Only keep SNPs (no indels) - vcf_df = vcf_df.loc[(vcf_df['ref'].str.len() == vcf_df['alt'].str.len()) & (vcf_df['ref'].str.len() == 1)].copy().reset_index(drop=True) - - vcf_df['chromosome'] = 'chr' + vcf_df['chromosome'].astype(str) - vcf_df['start'] = vcf_df['position'].astype(int) - vcf_df['end'] = vcf_df['start'] + 1 - vcf_df['strand'] = "." - - vcf_df = vcf_df[['chromosome', 'start', 'end', 'ref', 'alt', 'strand']] - vcf_df = vcf_df.rename(columns={'chromosome' : 'Chromosome', 'start' : 'Start', 'end' : 'End', 'strand' : 'Strand'}) - - print("len(vcf_df) = " + str(len(vcf_df))) - - #Store intermediate SNPs - #vcf_df.to_csv("ge/GTEx_snps_" + tissue_name + ".bed.gz", sep='\t', index=False, header=False) - - #Load polyadenylation site annotation - apa_df = pd.read_csv(apa_file, sep='\t', names=['Chromosome', 'Start', 'End', 'pas_id', 'feat1', 'Strand']) - apa_df['Start'] += 1 - - #Load gene span annotation - gtf_df = pd.read_csv(gtf_file, sep='\t', skiprows=5, names=['Chromosome', 'havana_str', 'feature', 'Start', 'End', 'feat1', 'Strand', 'feat2', 'id_str']) - gtf_df = gtf_df.query("feature == 'gene'").copy().reset_index(drop=True) - - gtf_df['gene_id'] = gtf_df['id_str'].apply(lambda x: x.split("gene_id \"")[1].split("\";")[0].split(".")[0]) - - gtf_df = gtf_df[['Chromosome', 'Start', 'End', 'gene_id', 'feat1', 'Strand']].drop_duplicates(subset=['gene_id'], keep='first').copy().reset_index(drop=True) - - gtf_df['Start'] = gtf_df['Start'].astype(int) - gene_pad - gtf_df['End'] = gtf_df['End'].astype(int) + gene_pad - - #Join dataframes against gtf annotation - apa_pr = pr.PyRanges(apa_df) - gtf_pr = pr.PyRanges(gtf_df) - vcf_pr = pr.PyRanges(vcf_df) - - apa_gtf_pr = apa_pr.join(gtf_pr, strandedness='same') - vcf_gtf_pr = vcf_pr.join(gtf_pr, strandedness=False) - - apa_gtf_df = apa_gtf_pr.df[['Chromosome', 'Start', 'End', 'pas_id', 'gene_id', 'Strand']].copy().reset_index(drop=True) - vcf_gtf_df = vcf_gtf_pr.df[['Chromosome', 'Start', 'End', 'ref', 'alt', 'Strand', 'gene_id']].copy().reset_index(drop=True) - - apa_gtf_df['Start'] -= max_distance - apa_gtf_df['End'] += max_distance - - #Join vcf against polyadenylation annotation - apa_gtf_pr = pr.PyRanges(apa_gtf_df) - vcf_gtf_pr = pr.PyRanges(vcf_gtf_df) - - vcf_apa_pr = vcf_gtf_pr.join(apa_gtf_pr, strandedness=False) - - #Force gene_id of SNP to be same as the gene_id of the polyA site - vcf_apa_df = vcf_apa_pr.df.query("gene_id == gene_id_b").copy().reset_index(drop=True) - vcf_apa_df = vcf_apa_df[['Chromosome', 'Start', 'ref', 'alt', 'gene_id', 'pas_id', 'Strand_b', 'Start_b']] - - #PolyA site position - vcf_apa_df['Start_b'] += max_distance - vcf_apa_df = vcf_apa_df.rename(columns={'Start' : 'Pos', 'Start_b' : 'pas_pos', 'Strand_b' : 'Strand'}) - - #Distance to polyA site - vcf_apa_df['distance'] = np.abs(vcf_apa_df['Pos'] - vcf_apa_df['pas_pos']) - - #Choose unique SNPs by shortest distance to polyA site - vcf_apa_df = vcf_apa_df.sort_values(by='distance', ascending=True).drop_duplicates(subset=['Chromosome', 'Pos', 'ref', 'alt'], keep='first').copy().reset_index(drop=True) - vcf_apa_df = vcf_apa_df.sort_values(['Chromosome', 'Pos', 'alt'], ascending=True).copy().reset_index(drop=True) - - vcf_df_filtered = vcf_apa_df.rename(columns={'Chromosome' : 'chrom', 'Pos' : 'pos', 'Strand' : 'strand'}) - vcf_df_filtered = vcf_df_filtered[['chrom', 'pos', 'ref', 'alt', 'gene_id', 'pas_id', 'strand', 'pas_pos', 'distance']] - - print("len(vcf_df_filtered) = " + str(len(vcf_df_filtered))) - - #Store intermediate SNPs (filtered) - vcf_df_filtered.to_csv("ge/GTEx_snps_" + tissue_name + "_polya_filtered.bed.gz", sep='\t', index=False) - - #Reload filtered SNP file - vcf_df_filtered = pd.read_csv("ge/GTEx_snps_" + tissue_name + "_polya_filtered.bed.gz", sep='\t', compression='gzip') - - #Create variant identifier - vcf_df_filtered['variant'] = vcf_df_filtered['chrom'] + "_" + vcf_df_filtered['pos'].astype(str) + "_" + vcf_df_filtered['ref'] + "_" + vcf_df_filtered['alt'] - - #Load merged fine-mapping dataframe - finemap_df = pd.read_csv(finemap_file, sep='\t')[['variant', 'pip']] - - #Join against fine-mapping dataframe - neg_df = vcf_df_filtered.join(finemap_df.set_index('variant'), on='variant', how='left') - neg_df.loc[neg_df['pip'].isnull(), 'pip'] = 0. - - #Only keep SNPs with PIP < cutoff - neg_df = neg_df.query("pip < " + str(pip_cutoff)).copy().reset_index(drop=True) - - #Store final table of negative SNPs - neg_df.to_csv("ge/GTEx_snps_" + tissue_name + "_polya_negatives.bed.gz", sep='\t', index=False) - - print("len(neg_df) = " + str(len(neg_df))) - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/paqtl_make_positive_sets.py b/src/scripts/data/qtl_data/paqtl_make_positive_sets.py deleted file mode 100644 index 3d07fa3..0000000 --- a/src/scripts/data/qtl_data/paqtl_make_positive_sets.py +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser - -import os - -import util - -import numpy as np -import pandas as pd - -import pyranges as pr - -''' -paqtl_make_positive_sets.py - -Build tables with positive (causal) SNPs for paQTLs. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options] arg' - parser = OptionParser(usage) - #parser.add_option() - (options,args) = parser.parse_args() - - #Parameters - pip_cutoff = 0.01 - max_distance = 10000 - gene_pad = 50 - apa_file = '/home/drk/common/data/genomes/hg38/genes/polyadb/polyadb_exon3.bed' - gtf_file = '/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_nort.gtf' - - #Define tissues - tissue_names = [ - 'adipose_subcutaneous', - 'adipose_visceral', - 'adrenal_gland', - 'artery_aorta', - 'artery_coronary', - 'artery_tibial', - 'blood', - 'brain_amygdala', - 'brain_anterior_cingulate_cortex', - 'brain_caudate', - 'brain_cerebellar_hemisphere', - 'brain_cerebellum', - 'brain_cortex', - 'brain_frontal_cortex', - 'brain_hippocampus', - 'brain_hypothalamus', - 'brain_nucleus_accumbens', - 'brain_putamen', - 'brain_spinal_cord', - 'brain_substantia_nigra', - 'breast', - 'colon_sigmoid', - 'colon_transverse', - 'esophagus_gej', - 'esophagus_mucosa', - 'esophagus_muscularis', - 'fibroblast', - 'heart_atrial_appendage', - 'heart_left_ventricle', - 'kidney_cortex', - 'LCL', - 'liver', - 'lung', - 'minor_salivary_gland', - 'muscle', - 'nerve_tibial', - 'ovary', - 'pancreas', - 'pituitary', - 'prostate', - 'skin_not_sun_exposed', - 'skin_sun_exposed', - 'small_intestine', - 'spleen', - 'stomach', - 'testis', - 'thyroid', - 'uterus', - 'vagina', - ] - - #Compile positive SNP set for each tissue - for tissue_name in tissue_names : - - print("-- " + str(tissue_name) + " --") - - #Load fine-mapping table - vcf_df = pd.read_csv("txrev/GTEx_txrev_" + tissue_name + ".purity_filtered.txt.gz", sep='\t', usecols=['chromosome', 'position', 'ref', 'alt', 'variant', 'pip', 'molecular_trait_id'], low_memory=False) - - #Only keep SNPs (no indels) - vcf_df = vcf_df.loc[(vcf_df['ref'].str.len() == vcf_df['alt'].str.len()) & (vcf_df['ref'].str.len() == 1)].copy().reset_index(drop=True) - - #Only keep SNPs associated with polyadenylation events - vcf_df = vcf_df.loc[vcf_df['molecular_trait_id'].str.contains(".downstream.")].copy().reset_index(drop=True) - - vcf_df['chromosome'] = 'chr' + vcf_df['chromosome'].astype(str) - vcf_df['start'] = vcf_df['position'].astype(int) - vcf_df['end'] = vcf_df['start'] + 1 - vcf_df['strand'] = "." - - vcf_df = vcf_df[['chromosome', 'start', 'end', 'ref', 'alt', 'strand', 'variant', 'pip', 'molecular_trait_id']] - vcf_df = vcf_df.rename(columns={'chromosome' : 'Chromosome', 'start' : 'Start', 'end' : 'End', 'strand' : 'Strand'}) - - print("len(vcf_df) = " + str(len(vcf_df))) - - #Load polyadenylation site annotation - apa_df = pd.read_csv(apa_file, sep='\t', names=['Chromosome', 'Start', 'End', 'pas_id', 'feat1', 'Strand']) - apa_df['Start'] += 1 - - #Load gene span annotation - gtf_df = pd.read_csv(gtf_file, sep='\t', skiprows=5, names=['Chromosome', 'havana_str', 'feature', 'Start', 'End', 'feat1', 'Strand', 'feat2', 'id_str']) - gtf_df = gtf_df.query("feature == 'gene'").copy().reset_index(drop=True) - - gtf_df['gene_id'] = gtf_df['id_str'].apply(lambda x: x.split("gene_id \"")[1].split("\";")[0].split(".")[0]) - - gtf_df = gtf_df[['Chromosome', 'Start', 'End', 'gene_id', 'feat1', 'Strand']].drop_duplicates(subset=['gene_id'], keep='first').copy().reset_index(drop=True) - - gtf_df['Start'] = gtf_df['Start'].astype(int) - gene_pad - gtf_df['End'] = gtf_df['End'].astype(int) + gene_pad - - #Join dataframes against gtf annotation - apa_pr = pr.PyRanges(apa_df) - gtf_pr = pr.PyRanges(gtf_df) - vcf_pr = pr.PyRanges(vcf_df) - - apa_gtf_pr = apa_pr.join(gtf_pr, strandedness='same') - vcf_gtf_pr = vcf_pr.join(gtf_pr, strandedness=False) - - apa_gtf_df = apa_gtf_pr.df[['Chromosome', 'Start', 'End', 'pas_id', 'gene_id', 'Strand']].copy().reset_index(drop=True) - vcf_gtf_df = vcf_gtf_pr.df[['Chromosome', 'Start', 'End', 'ref', 'alt', 'Strand', 'gene_id', 'variant', 'pip', 'molecular_trait_id']].copy().reset_index(drop=True) - - apa_gtf_df['Start'] -= max_distance - apa_gtf_df['End'] += max_distance - - #Join vcf against polyadenylation annotation - apa_gtf_pr = pr.PyRanges(apa_gtf_df) - vcf_gtf_pr = pr.PyRanges(vcf_gtf_df) - - vcf_apa_pr = vcf_gtf_pr.join(apa_gtf_pr, strandedness=False) - - #Force gene_id of SNP to be same as the gene_id of the polyA site - vcf_apa_df = vcf_apa_pr.df.query("gene_id == gene_id_b").copy().reset_index(drop=True) - vcf_apa_df = vcf_apa_df[['Chromosome', 'Start', 'ref', 'alt', 'gene_id', 'pas_id', 'Strand_b', 'Start_b', 'variant', 'pip', 'molecular_trait_id']] - - #Force gene_id of SNP to be same as the gene_id of the finemapped molecular trait - vcf_apa_df['molecular_trait_gene_id'] = vcf_apa_df['molecular_trait_id'].apply(lambda x: x.split(".")[0]) - vcf_apa_df = vcf_apa_df.query("gene_id == molecular_trait_gene_id").copy().reset_index(drop=True) - - #PolyA site position - vcf_apa_df['Start_b'] += max_distance - vcf_apa_df = vcf_apa_df.rename(columns={'Start' : 'Pos', 'Start_b' : 'pas_pos', 'Strand_b' : 'Strand'}) - - #Distance to polyA site - vcf_apa_df['distance'] = np.abs(vcf_apa_df['Pos'] - vcf_apa_df['pas_pos']) - - #Choose unique SNPs by shortest distance to polyA site (and inverse PIP for tie-breaking) - vcf_apa_df['pip_inv'] = 1. - vcf_apa_df['pip'] - - vcf_apa_df = vcf_apa_df.sort_values(by=['distance', 'pip_inv'], ascending=True).drop_duplicates(subset=['Chromosome', 'Pos', 'ref', 'alt'], keep='first').copy().reset_index(drop=True) - vcf_apa_df = vcf_apa_df.sort_values(['Chromosome', 'Pos', 'alt'], ascending=True).copy().reset_index(drop=True) - - vcf_df_filtered = vcf_apa_df.rename(columns={'Chromosome' : 'chrom', 'Pos' : 'pos', 'Strand' : 'strand'}) - vcf_df_filtered = vcf_df_filtered[['chrom', 'pos', 'ref', 'alt', 'gene_id', 'pas_id', 'strand', 'pas_pos', 'distance', 'variant', 'pip', 'molecular_trait_id']] - - print("len(vcf_df_filtered) = " + str(len(vcf_df_filtered))) - - #Store intermediate SNPs (filtered) - vcf_df_filtered.to_csv("txrev/GTEx_snps_" + tissue_name + "_polya_finemapped_filtered.bed.gz", sep='\t', index=False) - - #Reload filtered SNP file - vcf_df_filtered = pd.read_csv("txrev/GTEx_snps_" + tissue_name + "_polya_finemapped_filtered.bed.gz", sep='\t', compression='gzip') - - #Only keep SNPs with PIP > cutoff - pos_df = vcf_df_filtered.query("pip > " + str(pip_cutoff)).copy().reset_index(drop=True) - - #Store final table of positive SNPs - pos_df.to_csv("txrev/GTEx_snps_" + tissue_name + "_polya_positives.bed.gz", sep='\t', index=False) - - print("len(pos_df) = " + str(len(pos_df))) - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/paqtl_vcfs.py b/src/scripts/data/qtl_data/paqtl_vcfs.py deleted file mode 100644 index f0884b1..0000000 --- a/src/scripts/data/qtl_data/paqtl_vcfs.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser -import os -import pdb -import time - -import numpy as np -import pandas as pd -import pyranges as pr -from tqdm import tqdm - -''' -paqtl_vcfs.py - -Generate positive and negative paQTL sets from the QTL catalog txrevise. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options]' - parser = OptionParser(usage) - parser.add_option('--neg_pip', dest='neg_pip', - default=0.01, type='float', - help='PIP upper limit for negative examples. [Default: %default]') - parser.add_option('--pos_pip', dest='pos_pip', - default=0.9, type='float', - help='PIP lower limit for positive examples. [Default: %default]') - parser.add_option('--match_gene', dest='match_gene', - default=0, type='int', - help='Try finding negative in same gene as positive. [Default: %default]') - parser.add_option('--match_allele', dest='match_allele', - default=0, type='int', - help='Try finding negative with same ref and alt alleles. [Default: %default]') - parser.add_option('-o', dest='out_prefix', - default='qtlcat_paqtl') - (options,args) = parser.parse_args() - - tissue_name = options.out_prefix.split('txrev_')[1] - - gtf_file = '/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_nort_protein.gtf' - - # read variant table - qtlcat_df_neg = pd.read_csv("ge/GTEx_snps_" + tissue_name + "_polya_negatives.bed.gz", sep='\t') - qtlcat_df_pos = pd.read_csv("txrev/GTEx_snps_" + tissue_name + "_polya_positives.bed.gz", sep='\t') - - # read TPM bin table and construct lookup dictionaries - tpm_df = pd.read_csv('ge/GTEx_ge_' + tissue_name + "_tpms.csv", sep='\t')[['gene_id', 'tpm', 'bin_index', 'bin_index_l', 'bin_index_r']] - gene_to_tpm_dict = tpm_df.set_index('gene_id').to_dict(orient='index') - - # filter on SNPs with genes in TPM bin dict - qtlcat_df_neg = qtlcat_df_neg.loc[qtlcat_df_neg['gene_id'].isin(tpm_df['gene_id'].values.tolist())].copy().reset_index(drop=True) - qtlcat_df_pos = qtlcat_df_pos.loc[qtlcat_df_pos['gene_id'].isin(tpm_df['gene_id'].values.tolist())].copy().reset_index(drop=True) - - #Load gene span annotation (protein-coding/categorized only) - gtf_df = pd.read_csv(gtf_file, sep='\t', skiprows=5, names=['id_str']) - gtf_genes = gtf_df['id_str'].apply(lambda x: x.split("gene_id \"")[1].split("\";")[0].split(".")[0]).unique().tolist() - - # filter on SNPs with genes in GTF file - qtlcat_df_neg = qtlcat_df_neg.loc[qtlcat_df_neg['gene_id'].isin(gtf_genes)].copy().reset_index(drop=True) - qtlcat_df_pos = qtlcat_df_pos.loc[qtlcat_df_pos['gene_id'].isin(gtf_genes)].copy().reset_index(drop=True) - - bin_to_genes_dict = {} - for _, row in tpm_df.iterrows() : - - if row['bin_index'] not in bin_to_genes_dict : - bin_to_genes_dict[row['bin_index']] = [] - - bin_to_genes_dict[row['bin_index']].append(row['gene_id']) - - for sample_bin in bin_to_genes_dict : - bin_to_genes_dict[sample_bin] = set(bin_to_genes_dict[sample_bin]) - - # split molecular trait id and filter for polyadenylation (for positives) - qtlcat_df_pos['gene'] = [mti.split('.')[0] for mti in qtlcat_df_pos.molecular_trait_id] - qtlcat_df_pos['event'] = [mti.split('.')[2] for mti in qtlcat_df_pos.molecular_trait_id] - - qtlcat_df_pos = qtlcat_df_pos[qtlcat_df_pos.event == 'downstream'] - qtlcat_df_pos = qtlcat_df_pos.rename(columns={'distance' : 'pas_dist'}) - - qtlcat_df_neg['molecular_trait_id'] = qtlcat_df_neg['gene_id'] + "." + "grp_0.downstream.negative" - qtlcat_df_neg['gene'] = qtlcat_df_neg['gene_id'] - qtlcat_df_neg['event'] = 'downstream' - qtlcat_df_neg = qtlcat_df_neg.rename(columns={'distance' : 'pas_dist'}) - - paqtl_df = pd.concat([qtlcat_df_neg, qtlcat_df_pos]).copy().reset_index(drop=True) - - # determine positive variants - paqtl_pos_df = paqtl_df[paqtl_df.pip >= options.pos_pip] - paqtl_neg_df = paqtl_df[paqtl_df.pip < options.neg_pip] - pos_variants = set(paqtl_pos_df.variant) - - neg_gene_and_allele_variants = 0 - neg_gene_variants = 0 - - neg_expr_and_allele_variants = 0 - neg_expr_variants = 0 - - unmatched_variants = 0 - - # choose negative variants - neg_variants = set() - neg_dict = {} - for pvariant in tqdm(pos_variants): - paqtl_this_df = paqtl_pos_df[paqtl_pos_df.variant == pvariant] - - neg_found = False - - # optionally prefer negative from positive's gene set - if options.match_gene == 1 and options.match_allele == 1 : - pgenes = set(paqtl_this_df.gene) - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, True) - - if neg_found : - neg_gene_and_allele_variants += 1 - - if not neg_found and options.match_gene == 1 : - pgenes = set(paqtl_this_df.gene) - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, False) - - if neg_found : - neg_gene_variants += 1 - - if not neg_found and options.match_allele == 1 : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, True) - - if not neg_found and gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index'] != gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_l'] : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_l']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, True) - - if not neg_found and gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index'] != gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_r'] : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_r']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, True) - - if neg_found : - neg_expr_and_allele_variants += 1 - - if not neg_found : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, False) - - if not neg_found and gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index'] != gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_l'] : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_l']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, False) - - if not neg_found and gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index'] != gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_r'] : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[paqtl_this_df.iloc[0].gene]['bin_index_r']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, False) - - if neg_found : - neg_expr_variants += 1 - - if not neg_found : - print("[Warning] Could not find a matching negative for '" + pvariant + "'") - unmatched_variants += 1 - - print('%d positive variants' % len(pos_variants)) - print('%d negative variants' % len(neg_variants)) - print(' - %d gene-matched negatives with same alleles' % neg_gene_and_allele_variants) - print(' - %d gene-matched negatives ' % neg_gene_variants) - print(' - %d expr-matched negatives with same alleles' % neg_expr_and_allele_variants) - print(' - %d expr-matched negatives ' % neg_expr_variants) - print(' - %d unmatched negatives ' % unmatched_variants) - - pos_dict = {pv: pv for pv in pos_variants} - - # write VCFs - write_vcf('%s_pos.vcf' % options.out_prefix, paqtl_df, pos_variants, pos_dict) - write_vcf('%s_neg.vcf' % options.out_prefix, paqtl_df, neg_variants, neg_dict) - -def find_negative(neg_variants, neg_dict, pos_variants, paqtl_this_df, paqtl_neg_df, pgenes, match_allele) : - - gene_mask = np.array([gene in pgenes for gene in paqtl_neg_df.gene]) - paqtl_neg_gene_df = paqtl_neg_df[gene_mask] - - # match PAS distance - this_dist = paqtl_this_df.iloc[0].pas_dist - dist_cmp = np.abs(paqtl_neg_gene_df.pas_dist - this_dist) - dist_cmp_unique = np.sort(np.unique(dist_cmp.values)) - - this_ref = paqtl_this_df.iloc[0].ref - this_alt = paqtl_this_df.iloc[0].alt - - for ni_unique in dist_cmp_unique: - - paqtl_neg_gene_dist_df = paqtl_neg_gene_df.loc[dist_cmp == ni_unique] - - shuffle_index = np.arange(len(paqtl_neg_gene_dist_df), dtype='int32') - np.random.shuffle(shuffle_index) - - for npaqtl_i in range(len(paqtl_neg_gene_dist_df)) : - npaqtl = paqtl_neg_gene_dist_df.iloc[shuffle_index[npaqtl_i]] - - if not match_allele or (npaqtl.ref == this_ref and npaqtl.alt == this_alt): - if npaqtl.variant not in neg_variants and npaqtl.variant not in pos_variants: - - neg_variants.add(npaqtl.variant) - neg_dict[npaqtl.variant] = paqtl_this_df.iloc[0].variant - - return True - - return False - -def write_vcf(vcf_file, df, variants_write, variants_dict): - vcf_open = open(vcf_file, 'w') - print('##fileformat=VCFv4.2', file=vcf_open) - print('##INFO=', - file=vcf_open) - print('##INFO=', - file=vcf_open) - print('##INFO=', - file=vcf_open) - cols = ['#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO'] - print('\t'.join(cols), file=vcf_open) - - variants_written = set() - - for v in df.itertuples(): - if v.variant in variants_write and v.variant not in variants_written: - cols = [v.chrom, str(v.pos), v.variant, v.ref, v.alt, '.', '.'] - cols += ['MT=%s;PD=%d;PI=%s' % (v.molecular_trait_id, v.pas_dist, variants_dict[v.variant])] - print('\t'.join(cols), file=vcf_open) - variants_written.add(v.variant) - - vcf_open.close() - - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/sqtl_make_negative_sets.py b/src/scripts/data/qtl_data/sqtl_make_negative_sets.py deleted file mode 100644 index 7518ca4..0000000 --- a/src/scripts/data/qtl_data/sqtl_make_negative_sets.py +++ /dev/null @@ -1,195 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser - -import os - -import util - -import numpy as np -import pandas as pd - -import pyranges as pr - -''' -sqtl_make_negative_sets.py - -Build tables with negative (non-causal) SNPs for sQTLs. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options] arg' - parser = OptionParser(usage) - #parser.add_option() - (options,args) = parser.parse_args() - - #Parameters - pip_cutoff = 0.01 - max_distance = 10000 - gene_pad = 50 - splice_file = '/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_protein_splice.gff' - gtf_file = '/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_nort.gtf' - finemap_file = 'txrev/GTEx_txrev_finemapped_merged.csv.gz' - - #Define tissues - tissue_names = [ - 'adipose_subcutaneous', - 'adipose_visceral', - 'adrenal_gland', - 'artery_aorta', - 'artery_coronary', - 'artery_tibial', - 'blood', - 'brain_amygdala', - 'brain_anterior_cingulate_cortex', - 'brain_caudate', - 'brain_cerebellar_hemisphere', - 'brain_cerebellum', - 'brain_cortex', - 'brain_frontal_cortex', - 'brain_hippocampus', - 'brain_hypothalamus', - 'brain_nucleus_accumbens', - 'brain_putamen', - 'brain_spinal_cord', - 'brain_substantia_nigra', - 'breast', - 'colon_sigmoid', - 'colon_transverse', - 'esophagus_gej', - 'esophagus_mucosa', - 'esophagus_muscularis', - 'fibroblast', - 'heart_atrial_appendage', - 'heart_left_ventricle', - 'kidney_cortex', - 'LCL', - 'liver', - 'lung', - 'minor_salivary_gland', - 'muscle', - 'nerve_tibial', - 'ovary', - 'pancreas', - 'pituitary', - 'prostate', - 'skin_not_sun_exposed', - 'skin_sun_exposed', - 'small_intestine', - 'spleen', - 'stomach', - 'testis', - 'thyroid', - 'uterus', - 'vagina', - ] - - #Compile negative SNP set for each tissue - for tissue_name in tissue_names : - - print("-- " + str(tissue_name) + " --") - - #Load summary stats and extract unique set of SNPs - vcf_df = pd.read_csv("ge/GTEx_ge_" + tissue_name + ".all.tsv.gz", sep='\t', compression='gzip', usecols=['chromosome', 'position', 'ref', 'alt']).drop_duplicates(subset=['chromosome', 'position', 'ref', 'alt'], keep='first').copy().reset_index(drop=True) - - #Only keep SNPs (no indels) - vcf_df = vcf_df.loc[(vcf_df['ref'].str.len() == vcf_df['alt'].str.len()) & (vcf_df['ref'].str.len() == 1)].copy().reset_index(drop=True) - - vcf_df['chromosome'] = 'chr' + vcf_df['chromosome'].astype(str) - vcf_df['start'] = vcf_df['position'].astype(int) - vcf_df['end'] = vcf_df['start'] + 1 - vcf_df['strand'] = "." - - vcf_df = vcf_df[['chromosome', 'start', 'end', 'ref', 'alt', 'strand']] - vcf_df = vcf_df.rename(columns={'chromosome' : 'Chromosome', 'start' : 'Start', 'end' : 'End', 'strand' : 'Strand'}) - - print("len(vcf_df) = " + str(len(vcf_df))) - - #Store intermediate SNPs - #vcf_df.to_csv("ge/GTEx_snps_" + tissue_name + ".bed.gz", sep='\t', index=False, header=False) - - #Load splice site annotation - splice_df = pd.read_csv(splice_file, sep='\t', names=['Chromosome', 'havana_str', 'feature', 'Start', 'End', 'feat1', 'Strand', 'feat2', 'id_str'], usecols=['Chromosome', 'Start', 'End', 'feature', 'feat1', 'Strand'])[['Chromosome', 'Start', 'End', 'feature', 'feat1', 'Strand']] - - #Load gene span annotation - gtf_df = pd.read_csv(gtf_file, sep='\t', skiprows=5, names=['Chromosome', 'havana_str', 'feature', 'Start', 'End', 'feat1', 'Strand', 'feat2', 'id_str']) - gtf_df = gtf_df.query("feature == 'gene'").copy().reset_index(drop=True) - - gtf_df['gene_id'] = gtf_df['id_str'].apply(lambda x: x.split("gene_id \"")[1].split("\";")[0].split(".")[0]) - - gtf_df = gtf_df[['Chromosome', 'Start', 'End', 'gene_id', 'feat1', 'Strand']].drop_duplicates(subset=['gene_id'], keep='first').copy().reset_index(drop=True) - - gtf_df['Start'] = gtf_df['Start'].astype(int) - gene_pad - gtf_df['End'] = gtf_df['End'].astype(int) + gene_pad - - #Join dataframes against gtf annotation - splice_pr = pr.PyRanges(splice_df) - gtf_pr = pr.PyRanges(gtf_df) - vcf_pr = pr.PyRanges(vcf_df) - - splice_gtf_pr = splice_pr.join(gtf_pr, strandedness='same') - vcf_gtf_pr = vcf_pr.join(gtf_pr, strandedness=False) - - splice_gtf_df = splice_gtf_pr.df[['Chromosome', 'Start', 'End', 'feature', 'gene_id', 'Strand']].copy().reset_index(drop=True) - vcf_gtf_df = vcf_gtf_pr.df[['Chromosome', 'Start', 'End', 'ref', 'alt', 'Strand', 'gene_id']].copy().reset_index(drop=True) - - splice_gtf_df['Start'] -= max_distance - splice_gtf_df['End'] += max_distance - - #Join vcf against splice annotation - splice_gtf_pr = pr.PyRanges(splice_gtf_df) - vcf_gtf_pr = pr.PyRanges(vcf_gtf_df) - - vcf_splice_pr = vcf_gtf_pr.join(splice_gtf_pr, strandedness=False) - - #Force gene_id of SNP to be same as the gene_id of the splice site - vcf_splice_df = vcf_splice_pr.df.query("gene_id == gene_id_b").copy().reset_index(drop=True) - vcf_splice_df = vcf_splice_df[['Chromosome', 'Start', 'ref', 'alt', 'gene_id', 'feature', 'Strand_b', 'Start_b']] - - #Splice site position - vcf_splice_df['Start_b'] += max_distance - vcf_splice_df = vcf_splice_df.rename(columns={'Start' : 'Pos', 'Start_b' : 'splice_pos', 'Strand_b' : 'Strand'}) - - #Distance to splice site - vcf_splice_df['distance'] = np.abs(vcf_splice_df['Pos'] - vcf_splice_df['splice_pos']) - - #Choose unique SNPs by shortest distance to splice site - vcf_splice_df = vcf_splice_df.sort_values(by='distance', ascending=True).drop_duplicates(subset=['Chromosome', 'Pos', 'ref', 'alt'], keep='first').copy().reset_index(drop=True) - vcf_splice_df = vcf_splice_df.sort_values(['Chromosome', 'Pos', 'alt'], ascending=True).copy().reset_index(drop=True) - - vcf_df_filtered = vcf_splice_df.rename(columns={'Chromosome' : 'chrom', 'Pos' : 'pos', 'Strand' : 'strand'}) - vcf_df_filtered = vcf_df_filtered[['chrom', 'pos', 'ref', 'alt', 'gene_id', 'feature', 'strand', 'splice_pos', 'distance']] - - print("len(vcf_df_filtered) = " + str(len(vcf_df_filtered))) - - #Store intermediate SNPs (filtered) - vcf_df_filtered.to_csv("ge/GTEx_snps_" + tissue_name + "_splice_filtered.bed.gz", sep='\t', index=False) - - #Reload filtered SNP file - vcf_df_filtered = pd.read_csv("ge/GTEx_snps_" + tissue_name + "_splice_filtered.bed.gz", sep='\t', compression='gzip') - - #Create variant identifier - vcf_df_filtered['variant'] = vcf_df_filtered['chrom'] + "_" + vcf_df_filtered['pos'].astype(str) + "_" + vcf_df_filtered['ref'] + "_" + vcf_df_filtered['alt'] - - #Load merged fine-mapping dataframe - finemap_df = pd.read_csv(finemap_file, sep='\t')[['variant', 'pip']] - - #Join against fine-mapping dataframe - neg_df = vcf_df_filtered.join(finemap_df.set_index('variant'), on='variant', how='left') - neg_df.loc[neg_df['pip'].isnull(), 'pip'] = 0. - - #Only keep SNPs with PIP < cutoff - neg_df = neg_df.query("pip < " + str(pip_cutoff)).copy().reset_index(drop=True) - - #Store final table of negative SNPs - neg_df.to_csv("ge/GTEx_snps_" + tissue_name + "_splice_negatives.bed.gz", sep='\t', index=False) - - print("len(neg_df) = " + str(len(neg_df))) - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/sqtl_make_positive_sets.py b/src/scripts/data/qtl_data/sqtl_make_positive_sets.py deleted file mode 100644 index 954ab7e..0000000 --- a/src/scripts/data/qtl_data/sqtl_make_positive_sets.py +++ /dev/null @@ -1,190 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser - -import os - -import util - -import numpy as np -import pandas as pd - -import pyranges as pr - -''' -sqtl_make_positive_sets.py - -Build tables with positive (causal) SNPs for sQTLs. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options] arg' - parser = OptionParser(usage) - #parser.add_option() - (options,args) = parser.parse_args() - - #Parameters - pip_cutoff = 0.01 - max_distance = 10000 - gene_pad = 50 - splice_file = '/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_protein_splice.gff' - gtf_file = '/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_nort.gtf' - - #Define tissues - tissue_names = [ - 'adipose_subcutaneous', - 'adipose_visceral', - 'adrenal_gland', - 'artery_aorta', - 'artery_coronary', - 'artery_tibial', - 'blood', - 'brain_amygdala', - 'brain_anterior_cingulate_cortex', - 'brain_caudate', - 'brain_cerebellar_hemisphere', - 'brain_cerebellum', - 'brain_cortex', - 'brain_frontal_cortex', - 'brain_hippocampus', - 'brain_hypothalamus', - 'brain_nucleus_accumbens', - 'brain_putamen', - 'brain_spinal_cord', - 'brain_substantia_nigra', - 'breast', - 'colon_sigmoid', - 'colon_transverse', - 'esophagus_gej', - 'esophagus_mucosa', - 'esophagus_muscularis', - 'fibroblast', - 'heart_atrial_appendage', - 'heart_left_ventricle', - 'kidney_cortex', - 'LCL', - 'liver', - 'lung', - 'minor_salivary_gland', - 'muscle', - 'nerve_tibial', - 'ovary', - 'pancreas', - 'pituitary', - 'prostate', - 'skin_not_sun_exposed', - 'skin_sun_exposed', - 'small_intestine', - 'spleen', - 'stomach', - 'testis', - 'thyroid', - 'uterus', - 'vagina', - ] - - #Compile positive SNP set for each tissue - for tissue_name in tissue_names : - - print("-- " + str(tissue_name) + " --") - - #Load fine-mapping table - vcf_df = pd.read_csv("txrev/GTEx_txrev_" + tissue_name + ".purity_filtered.txt.gz", sep='\t', usecols=['chromosome', 'position', 'ref', 'alt', 'variant', 'pip', 'molecular_trait_id'], low_memory=False) - - #Only keep SNPs (no indels) - vcf_df = vcf_df.loc[(vcf_df['ref'].str.len() == vcf_df['alt'].str.len()) & (vcf_df['ref'].str.len() == 1)].copy().reset_index(drop=True) - - #Only keep SNPs associated with splice events - vcf_df = vcf_df.loc[vcf_df['molecular_trait_id'].str.contains(".contained.")].copy().reset_index(drop=True) - - vcf_df['chromosome'] = 'chr' + vcf_df['chromosome'].astype(str) - vcf_df['start'] = vcf_df['position'].astype(int) - vcf_df['end'] = vcf_df['start'] + 1 - vcf_df['strand'] = "." - - vcf_df = vcf_df[['chromosome', 'start', 'end', 'ref', 'alt', 'strand', 'variant', 'pip', 'molecular_trait_id']] - vcf_df = vcf_df.rename(columns={'chromosome' : 'Chromosome', 'start' : 'Start', 'end' : 'End', 'strand' : 'Strand'}) - - print("len(vcf_df) = " + str(len(vcf_df))) - - #Load splice site annotation - splice_df = pd.read_csv(splice_file, sep='\t', names=['Chromosome', 'havana_str', 'feature', 'Start', 'End', 'feat1', 'Strand', 'feat2', 'id_str'], usecols=['Chromosome', 'Start', 'End', 'feature', 'feat1', 'Strand'])[['Chromosome', 'Start', 'End', 'feature', 'feat1', 'Strand']] - - #Load gene span annotation - gtf_df = pd.read_csv(gtf_file, sep='\t', skiprows=5, names=['Chromosome', 'havana_str', 'feature', 'Start', 'End', 'feat1', 'Strand', 'feat2', 'id_str']) - gtf_df = gtf_df.query("feature == 'gene'").copy().reset_index(drop=True) - - gtf_df['gene_id'] = gtf_df['id_str'].apply(lambda x: x.split("gene_id \"")[1].split("\";")[0].split(".")[0]) - - gtf_df = gtf_df[['Chromosome', 'Start', 'End', 'gene_id', 'feat1', 'Strand']].drop_duplicates(subset=['gene_id'], keep='first').copy().reset_index(drop=True) - - gtf_df['Start'] = gtf_df['Start'].astype(int) - gene_pad - gtf_df['End'] = gtf_df['End'].astype(int) + gene_pad - - #Join dataframes against gtf annotation - splice_pr = pr.PyRanges(splice_df) - gtf_pr = pr.PyRanges(gtf_df) - vcf_pr = pr.PyRanges(vcf_df) - - splice_gtf_pr = splice_pr.join(gtf_pr, strandedness='same') - vcf_gtf_pr = vcf_pr.join(gtf_pr, strandedness=False) - - splice_gtf_df = splice_gtf_pr.df[['Chromosome', 'Start', 'End', 'feature', 'gene_id', 'Strand']].copy().reset_index(drop=True) - vcf_gtf_df = vcf_gtf_pr.df[['Chromosome', 'Start', 'End', 'ref', 'alt', 'Strand', 'gene_id', 'variant', 'pip', 'molecular_trait_id']].copy().reset_index(drop=True) - - splice_gtf_df['Start'] -= max_distance - splice_gtf_df['End'] += max_distance - - #Join vcf against splice annotation - splice_gtf_pr = pr.PyRanges(splice_gtf_df) - vcf_gtf_pr = pr.PyRanges(vcf_gtf_df) - - vcf_splice_pr = vcf_gtf_pr.join(splice_gtf_pr, strandedness=False) - - #Force gene_id of SNP to be same as the gene_id of the splice site - vcf_splice_df = vcf_splice_pr.df.query("gene_id == gene_id_b").copy().reset_index(drop=True) - vcf_splice_df = vcf_splice_df[['Chromosome', 'Start', 'ref', 'alt', 'gene_id', 'feature', 'Strand_b', 'Start_b', 'variant', 'pip', 'molecular_trait_id']] - - #Force gene_id of SNP to be same as the gene_id of the finemapped molecular trait - vcf_splice_df['molecular_trait_gene_id'] = vcf_splice_df['molecular_trait_id'].apply(lambda x: x.split(".")[0]) - vcf_splice_df = vcf_splice_df.query("gene_id == molecular_trait_gene_id").copy().reset_index(drop=True) - - #Splice site position - vcf_splice_df['Start_b'] += max_distance - vcf_splice_df = vcf_splice_df.rename(columns={'Start' : 'Pos', 'Start_b' : 'splice_pos', 'Strand_b' : 'Strand'}) - - #Distance to splice site - vcf_splice_df['distance'] = np.abs(vcf_splice_df['Pos'] - vcf_splice_df['splice_pos']) - - #Choose unique SNPs by shortest distance to splice site (and inverse PIP for tie-breaking) - vcf_splice_df['pip_inv'] = 1. - vcf_splice_df['pip'] - - vcf_splice_df = vcf_splice_df.sort_values(by=['distance', 'pip_inv'], ascending=True).drop_duplicates(subset=['Chromosome', 'Pos', 'ref', 'alt'], keep='first').copy().reset_index(drop=True) - vcf_splice_df = vcf_splice_df.sort_values(['Chromosome', 'Pos', 'alt'], ascending=True).copy().reset_index(drop=True) - - vcf_df_filtered = vcf_splice_df.rename(columns={'Chromosome' : 'chrom', 'Pos' : 'pos', 'Strand' : 'strand'}) - vcf_df_filtered = vcf_df_filtered[['chrom', 'pos', 'ref', 'alt', 'gene_id', 'feature', 'strand', 'splice_pos', 'distance', 'variant', 'pip', 'molecular_trait_id']] - - print("len(vcf_df_filtered) = " + str(len(vcf_df_filtered))) - - #Store intermediate SNPs (filtered) - vcf_df_filtered.to_csv("txrev/GTEx_snps_" + tissue_name + "_splice_finemapped_filtered.bed.gz", sep='\t', index=False) - - #Reload filtered SNP file - vcf_df_filtered = pd.read_csv("txrev/GTEx_snps_" + tissue_name + "_splice_finemapped_filtered.bed.gz", sep='\t', compression='gzip') - - #Only keep SNPs with PIP > cutoff - pos_df = vcf_df_filtered.query("pip > " + str(pip_cutoff)).copy().reset_index(drop=True) - - #Store final table of positive SNPs - pos_df.to_csv("txrev/GTEx_snps_" + tissue_name + "_splice_positives.bed.gz", sep='\t', index=False) - - print("len(pos_df) = " + str(len(pos_df))) - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/qtl_data/sqtl_vcfs.py b/src/scripts/data/qtl_data/sqtl_vcfs.py deleted file mode 100644 index d275a76..0000000 --- a/src/scripts/data/qtl_data/sqtl_vcfs.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python -from optparse import OptionParser -import os -import pdb -import time - -import numpy as np -import pandas as pd -import pyranges as pr -from tqdm import tqdm - -''' -sqtl_vcfs.py - -Generate positive and negative sQTL sets from the QTL catalog txrevise. -''' - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options]' - parser = OptionParser(usage) - parser.add_option('--neg_pip', dest='neg_pip', - default=0.01, type='float', - help='PIP upper limit for negative examples. [Default: %default]') - parser.add_option('--pos_pip', dest='pos_pip', - default=0.9, type='float', - help='PIP lower limit for positive examples. [Default: %default]') - parser.add_option('--match_gene', dest='match_gene', - default=0, type='int', - help='Try finding negative in same gene as positive. [Default: %default]') - parser.add_option('--match_allele', dest='match_allele', - default=0, type='int', - help='Try finding negative with same ref and alt alleles. [Default: %default]') - parser.add_option('-o', dest='out_prefix', - default='qtlcat_sqtl') - (options,args) = parser.parse_args() - - tissue_name = options.out_prefix.split('txrev_')[1] - - gtf_file = '/home/drk/common/data/genomes/hg38/genes/gencode41/gencode41_basic_nort_protein.gtf' - - # read variant table - qtlcat_df_neg = pd.read_csv("ge/GTEx_snps_" + tissue_name + "_splice_negatives.bed.gz", sep='\t') - qtlcat_df_pos = pd.read_csv("txrev/GTEx_snps_" + tissue_name + "_splice_positives.bed.gz", sep='\t') - - # read TPM bin table and construct lookup dictionaries - tpm_df = pd.read_csv('ge/GTEx_ge_' + tissue_name + "_tpms.csv", sep='\t')[['gene_id', 'tpm', 'bin_index', 'bin_index_l', 'bin_index_r']] - gene_to_tpm_dict = tpm_df.set_index('gene_id').to_dict(orient='index') - - # filter on SNPs with genes in TPM bin dict - qtlcat_df_neg = qtlcat_df_neg.loc[qtlcat_df_neg['gene_id'].isin(tpm_df['gene_id'].values.tolist())].copy().reset_index(drop=True) - qtlcat_df_pos = qtlcat_df_pos.loc[qtlcat_df_pos['gene_id'].isin(tpm_df['gene_id'].values.tolist())].copy().reset_index(drop=True) - - #Load gene span annotation (protein-coding/categorized only) - gtf_df = pd.read_csv(gtf_file, sep='\t', skiprows=5, names=['id_str']) - gtf_genes = gtf_df['id_str'].apply(lambda x: x.split("gene_id \"")[1].split("\";")[0].split(".")[0]).unique().tolist() - - # filter on SNPs with genes in GTF file - qtlcat_df_neg = qtlcat_df_neg.loc[qtlcat_df_neg['gene_id'].isin(gtf_genes)].copy().reset_index(drop=True) - qtlcat_df_pos = qtlcat_df_pos.loc[qtlcat_df_pos['gene_id'].isin(gtf_genes)].copy().reset_index(drop=True) - - bin_to_genes_dict = {} - for _, row in tpm_df.iterrows() : - - if row['bin_index'] not in bin_to_genes_dict : - bin_to_genes_dict[row['bin_index']] = [] - - bin_to_genes_dict[row['bin_index']].append(row['gene_id']) - - for sample_bin in bin_to_genes_dict : - bin_to_genes_dict[sample_bin] = set(bin_to_genes_dict[sample_bin]) - - # split molecular trait id and filter for polyadenylation (for positives) - qtlcat_df_pos['gene'] = [mti.split('.')[0] for mti in qtlcat_df_pos.molecular_trait_id] - qtlcat_df_pos['event'] = [mti.split('.')[2] for mti in qtlcat_df_pos.molecular_trait_id] - - qtlcat_df_pos = qtlcat_df_pos[qtlcat_df_pos.event == 'contained'] - qtlcat_df_pos = qtlcat_df_pos.rename(columns={'distance' : 'splice_dist'}) - - qtlcat_df_neg['molecular_trait_id'] = qtlcat_df_neg['gene_id'] + "." + "grp_0.contained.negative" - qtlcat_df_neg['gene'] = qtlcat_df_neg['gene_id'] - qtlcat_df_neg['event'] = 'contained' - qtlcat_df_neg = qtlcat_df_neg.rename(columns={'distance' : 'splice_dist'}) - - sqtl_df = pd.concat([qtlcat_df_neg, qtlcat_df_pos]).copy().reset_index(drop=True) - - # determine positive variants - sqtl_pos_df = sqtl_df[sqtl_df.pip >= options.pos_pip] - sqtl_neg_df = sqtl_df[sqtl_df.pip < options.neg_pip] - pos_variants = set(sqtl_pos_df.variant) - - neg_gene_and_allele_variants = 0 - neg_gene_variants = 0 - - neg_expr_and_allele_variants = 0 - neg_expr_variants = 0 - - unmatched_variants = 0 - - # choose negative variants - neg_variants = set() - neg_dict = {} - for pvariant in tqdm(pos_variants): - sqtl_this_df = sqtl_pos_df[sqtl_pos_df.variant == pvariant] - - neg_found = False - - # optionally prefer negative from positive's gene set - if options.match_gene == 1 and options.match_allele == 1 : - pgenes = set(sqtl_this_df.gene) - neg_found = find_negative(neg_variants, neg_dict, pos_variants, sqtl_this_df, sqtl_neg_df, pgenes, True) - - if neg_found : - neg_gene_and_allele_variants += 1 - - if not neg_found and options.match_gene == 1 : - pgenes = set(sqtl_this_df.gene) - neg_found = find_negative(neg_variants, neg_dict, pos_variants, sqtl_this_df, sqtl_neg_df, pgenes, False) - - if neg_found : - neg_gene_variants += 1 - - if not neg_found and options.match_allele == 1 : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, sqtl_this_df, sqtl_neg_df, pgenes, True) - - if not neg_found and gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index'] != gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index_l'] : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index_l']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, sqtl_this_df, sqtl_neg_df, pgenes, True) - - if not neg_found and gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index'] != gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index_r'] : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index_r']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, sqtl_this_df, sqtl_neg_df, pgenes, True) - - if neg_found : - neg_expr_and_allele_variants += 1 - - if not neg_found : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, sqtl_this_df, sqtl_neg_df, pgenes, False) - - if not neg_found and gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index'] != gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index_l'] : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index_l']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, sqtl_this_df, sqtl_neg_df, pgenes, False) - - if not neg_found and gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index'] != gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index_r'] : - pgenes = bin_to_genes_dict[gene_to_tpm_dict[sqtl_this_df.iloc[0].gene]['bin_index_r']] - neg_found = find_negative(neg_variants, neg_dict, pos_variants, sqtl_this_df, sqtl_neg_df, pgenes, False) - - if neg_found : - neg_expr_variants += 1 - - if not neg_found : - print("[Warning] Could not find a matching negative for '" + pvariant + "'") - unmatched_variants += 1 - - print('%d positive variants' % len(pos_variants)) - print('%d negative variants' % len(neg_variants)) - print(' - %d gene-matched negatives with same alleles' % neg_gene_and_allele_variants) - print(' - %d gene-matched negatives ' % neg_gene_variants) - print(' - %d expr-matched negatives with same alleles' % neg_expr_and_allele_variants) - print(' - %d expr-matched negatives ' % neg_expr_variants) - print(' - %d unmatched negatives ' % unmatched_variants) - - pos_dict = {pv: pv for pv in pos_variants} - - # write VCFs - write_vcf('%s_pos.vcf' % options.out_prefix, sqtl_df, pos_variants, pos_dict) - write_vcf('%s_neg.vcf' % options.out_prefix, sqtl_df, neg_variants, neg_dict) - -def find_negative(neg_variants, neg_dict, pos_variants, sqtl_this_df, sqtl_neg_df, pgenes, match_allele) : - - gene_mask = np.array([gene in pgenes for gene in sqtl_neg_df.gene]) - sqtl_neg_gene_df = sqtl_neg_df[gene_mask] - - # match PAS distance - this_dist = sqtl_this_df.iloc[0].splice_dist - dist_cmp = np.abs(sqtl_neg_gene_df.splice_dist - this_dist) - dist_cmp_unique = np.sort(np.unique(dist_cmp.values)) - - this_ref = sqtl_this_df.iloc[0].ref - this_alt = sqtl_this_df.iloc[0].alt - - for ni_unique in dist_cmp_unique: - - sqtl_neg_gene_dist_df = sqtl_neg_gene_df.loc[dist_cmp == ni_unique] - - shuffle_index = np.arange(len(sqtl_neg_gene_dist_df), dtype='int32') - np.random.shuffle(shuffle_index) - - for nsqtl_i in range(len(sqtl_neg_gene_dist_df)) : - nsqtl = sqtl_neg_gene_dist_df.iloc[shuffle_index[nsqtl_i]] - - if not match_allele or (nsqtl.ref == this_ref and nsqtl.alt == this_alt): - if nsqtl.variant not in neg_variants and nsqtl.variant not in pos_variants: - - neg_variants.add(nsqtl.variant) - neg_dict[nsqtl.variant] = sqtl_this_df.iloc[0].variant - - return True - - return False - -def write_vcf(vcf_file, df, variants_write, variants_dict): - vcf_open = open(vcf_file, 'w') - print('##fileformat=VCFv4.2', file=vcf_open) - print('##INFO=', - file=vcf_open) - print('##INFO=', - file=vcf_open) - print('##INFO=', - file=vcf_open) - cols = ['#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO'] - print('\t'.join(cols), file=vcf_open) - - variants_written = set() - - for v in df.itertuples(): - if v.variant in variants_write and v.variant not in variants_written: - cols = [v.chrom, str(v.pos), v.variant, v.ref, v.alt, '.', '.'] - cols += ['MT=%s;SD=%d;PI=%s' % (v.molecular_trait_id, v.splice_dist, variants_dict[v.variant])] - print('\t'.join(cols), file=vcf_open) - variants_written.add(v.variant) - - vcf_open.close() - - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() diff --git a/src/scripts/data/training_data/Makefile b/src/scripts/data/training_data/Makefile deleted file mode 100644 index 170222b..0000000 --- a/src/scripts/data/training_data/Makefile +++ /dev/null @@ -1,47 +0,0 @@ -FASTA_HUMAN=$$HG38/assembly/ucsc/hg38.ml.fa -GAPS_HUMAN=$$HG38/assembly/ucsc/hg38_gaps.bed -UMAP_HUMAN=$$HG38//mappability/umap_k36_t10_l32.bed -BLACK_HUMAN=$$HG38/blacklist/blacklist_hg38_all.bed - -FASTA_MOUSE=$$MM10/assembly/ucsc/mm10.ml.fa -GAPS_MOUSE=$$MM10/assembly/ucsc/mm10_gaps.bed -UMAP_MOUSE=$$MM10//mappability/umap_k36_t10_l32.bed -BLACK_MOUSE=$$MM10/blacklist/blacklist_mm10_all.bed - -ALIGN=$$HG38/align/hg38.mm10.syn.net.gz - -OUT=/scratch3/drk/seqnn/data/v9 - -# LENGTH=393216 -# TSTRIDE=43691 # (393216-2*131072)/3 -# CROP=131072 - -LENGTH=524288 -TSTRIDE=49173 # (524288-2*163840)/4 + 21 -CROP=163840 -WIDTH=32 -FOLDS=8 - -AOPTS=--break 2097152 -c $(CROP) --nf 524288 --no 393216 -l $(LENGTH) --stride $(TSTRIDE) -f $(FOLDS) --umap_t 0.5 -w $(WIDTH) -DOPTS=-c $(CROP) -d 2 -f $(FOLDS) -l $(LENGTH) -p 64 -r 16 --umap_clip 0.5 -w $(WIDTH) - - -all: $(OUT)/hg38/tfrecords/train-0.tfr $(OUT)/mm10/tfrecords/train-0.tfr - -umap_human.bed: - cat $(UMAP_HUMAN) $(BLACK_HUMAN) | awk 'BEGIN {OFS="\t"} {print $$1, $$2, $$3}' | bedtools sort -i - | bedtools merge -i - > umap_human.bed - -umap_mouse.bed: - cat $(UMAP_MOUSE) $(BLACK_MOUSE) | awk 'BEGIN {OFS="\t"} {print $$1, $$2, $$3}' | bedtools sort -i - | bedtools merge -i - > umap_mouse.bed - -targets_human.txt targets_mouse.txt: - ./make_targets.py - -$(OUT)/hg38/sequences.bed $(OUT)/mm10/sequences.bed: umap_human.bed umap_mouse.bed - basenji_data_align.py -a hg38,mm10 -g $(GAPS_HUMAN),$(GAPS_MOUSE) -u umap_human.bed,umap_mouse.bed $(AOPTS) -o $(OUT) $(ALIGN) $(FASTA_HUMAN),$(FASTA_MOUSE) - -$(OUT)/hg38/tfrecords/train-0.tfr: $(OUT)/hg38/sequences.bed targets_human.txt - basenji_data.py --restart $(DOPTS) -b $(BLACK_HUMAN) -o $(OUT)/hg38 $(FASTA_HUMAN) -u umap_human.bed targets_human.txt - -$(OUT)/mm10/tfrecords/train-0.tfr: $(OUT)/mm10/sequences.bed targets_mouse.txt - basenji_data.py --restart $(DOPTS) -b $(BLACK_MOUSE) -o $(OUT)/mm10 $(FASTA_MOUSE) -u umap_mouse.bed targets_mouse.txt diff --git a/src/scripts/data/training_data/README.md b/src/scripts/data/training_data/README.md deleted file mode 100644 index 7c2751e..0000000 --- a/src/scripts/data/training_data/README.md +++ /dev/null @@ -1,11 +0,0 @@ -## Data processing & Training - -Processing of ENCODE, GTEx, FANTOM5, and CATlas training data is done through a Makefile. It requires a number of auxiliary files (e.g. genome alignments), which can be downloaded from the Borzoi training data bucket [here](https://storage.googleapis.com/borzoi-paper/data/) (GCP).
- -The Makefile relies on the script 'basenji_data.py' from the [basenji repository](https://github.com/calico/basenji/blob/master/bin/basenji_data.py), which in turn calls the scripts 'basenji_data_read.py' and 'basenji_data_write.py' from the same repo, in order to (1) read coverage data (from bigwig-like files) along with a matched segment from a fasta genome file, and (2) write the (one-hot coded) sequence along with coverage values into compressed TF records.
- -*Notes*: -- The attached Makefile shows the exact commands used to call basenji_data.py and other related scripts to create the specific training data for the published model. -- The script(s) take as input a fasta genome file, optional blacklist+unmappable region files, as well as a .txt file where each row points to a bigwig coverage file location (see for [this file](https://raw.githubusercontent.com/calico/borzoi/main/examples/targets_human.txt)).
- -The model training is done through the script 'hound_train.py' from the [baskerville repository](https://github.com/calico/baskerville/blob/main/src/baskerville/scripts/hound_train.py). Most of the training parameters are set through a .json file that is supplied to the script. The published model's parameter file can be found [here](https://storage.googleapis.com/seqnn-share/borzoi/params.json).
diff --git a/src/scripts/idx_genome.py b/src/scripts/idx_genome.py new file mode 100755 index 0000000..92127b0 --- /dev/null +++ b/src/scripts/idx_genome.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +from optparse import OptionParser +import os +import sys +import pyfaidx + +''' +idx_genome.py + +Create .fai index file for input .fa. +''' + +################################################################################ +# main +################################################################################ +def main(): + usage = 'usage: %prog [options] ' + parser = OptionParser(usage) + (options, args) = parser.parse_args() + + if len(args) != 1: + parser.error('Must provide input fasta file') + else: + genome_fa = args[0] + + pyfaidx.Faidx(genome_fa) + +################################################################################ +# __main__ +################################################################################ +if __name__ == '__main__': + main() diff --git a/src/scripts/pygene.py b/src/scripts/pygene.py new file mode 100755 index 0000000..86cae4f --- /dev/null +++ b/src/scripts/pygene.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python +from optparse import OptionParser + +import gzip +import pdb + +''' +pygene + +Classes and methods to manage genes in GTF format. +''' + +################################################################################ +# Classes +################################################################################ +class GenomicInterval: + def __init__(self, start, end, chrom=None, strand=None): + self.start = start + self.end = end + self.chrom = chrom + self.strand = strand + + def __eq__(self, other): + return self.start == other.start + + def __lt__(self, other): + return self.start < other.start + + def __cmp__(self, x): + if self.start < x.start: + return -1 + elif self.start > x.start: + return 1 + else: + return 0 + + def __str__(self): + if self.chrom is None: + label = '[%d-%d]' % (self.start, self.end) + else: + label = '%s:%d-%d' % (self.chrom, self.start, self.end) + return label + + +class Transcript: + def __init__(self, chrom, strand, kv): + self.chrom = chrom + self.strand = strand + self.kv = kv + self.exons = [] + self.cds = [] + self.utrs3 = [] + self.utrs5 = [] + self.sorted = False + self.utrs_defined = False + + def add_cds(self, start, end): + self.cds.append(GenomicInterval(start,end)) + + def add_exon(self, start, end): + self.exons.append(GenomicInterval(start,end)) + + def define_utrs(self): + self.utrs_defined = True + + if len(self.cds) == 0: + self.utrs3 = self.exons + + else: + assert(self.sorted) + + # reset UTR lists + self.utrs5 = [] + self.utrs3 = [] + + # match up exons and CDS + ci = 0 + for ei in range(len(self.exons)): + # left initial + if self.exons[ei].end < self.cds[ci].start: + utr = GenomicInterval(self.exons[ei].start, self.exons[ei].end) + if self.strand == '+': + self.utrs5.append(utr) + else: + self.utrs3.append(utr) + + # right initial + elif self.cds[ci].end < self.exons[ei].start: + utr = GenomicInterval(self.exons[ei].start, self.exons[ei].end) + if self.strand == '+': + self.utrs3.append(utr) + else: + self.utrs5.append(utr) + + # overlap + else: + # left overlap + if self.exons[ei].start < self.cds[ci].start: + utr = GenomicInterval(self.exons[ei].start, self.cds[ci].start-1) + if self.strand == '+': + self.utrs5.append(utr) + else: + self.utrs3.append(utr) + + # right overlap + if self.cds[ci].end < self.exons[ei].end: + utr = GenomicInterval(self.cds[ci].end+1, self.exons[ei].end) + if self.strand == '+': + self.utrs3.append(utr) + else: + self.utrs5.append(utr) + + # increment up to last + ci = min(ci+1, len(self.cds)-1) + + def fasta_cds(self, fasta_open, stranded=False): + assert(self.sorted) + gene_seq = '' + for exon in self.cds: + exon_seq = fasta_open.fetch(self.chrom, exon.start-1, exon.end) + gene_seq += exon_seq + if stranded and self.strand == '-': + gene_seq = rc(gene_seq) + return gene_seq + + def fasta_exons(self, fasta_open, stranded=False): + assert(self.sorted) + gene_seq = '' + for exon in self.exons: + exon_seq = fasta_open.fetch(self.chrom, exon.start-1, exon.end) + gene_seq += exon_seq + if stranded and self.strand == '-': + gene_seq = rc(gene_seq) + return gene_seq + + def sort_exons(self): + self.sorted = True + if len(self.exons) > 1: + self.exons.sort() + if len(self.cds) > 1: + self.cds.sort() + + def span(self): + exon_starts = [exon.start for exon in self.exons] + exon_ends = [exon.end for exon in self.exons] + return min(exon_starts), max(exon_ends) + + def tss(self): + if self.strand == '-': + return self.exons[-1].end + else: + return self.exons[0].start + + def write_gtf(self, gtf_out, write_cds=False, write_utrs=False): + for ex in self.exons: + cols = [self.chrom, 'pygene', 'exon', str(ex.start), str(ex.end)] + cols += ['.', self.strand, '.', kv_gtf(self.kv)] + print('\t'.join(cols), file=gtf_out) + if write_cds: + for cds in self.cds: + cols = [self.chrom, 'pygene', 'CDS', str(cds.start), str(cds.end)] + cols += ['.', self.strand, '.', kv_gtf(self.kv)] + print('\t'.join(cols), file=gtf_out) + if write_utrs: + assert(self.utrs_defined) + for utr in self.utrs5: + cols = [self.chrom, 'pygene', '5\'UTR', str(utr.start), str(utr.end)] + cols += ['.', self.strand, '.', kv_gtf(self.kv)] + print('\t'.join(cols), file=gtf_out) + for utr in self.utrs3: + cols = [self.chrom, 'pygene', '3\'UTR', str(utr.start), str(utr.end)] + cols += ['.', self.strand, '.', kv_gtf(self.kv)] + print('\t'.join(cols), file=gtf_out) + + def __str__(self): + return '%s %s %s %s' % (self.chrom, self.strand, kv_gtf(self.kv), ','.join([ex.__str__() for ex in self.exons])) + + +class Gene: + def __init__(self): + self.transcripts = {} + self.chrom = None + self.strand = None + self.start = None + self.end = None + + def add_transcript(self, tx_id, tx): + self.transcripts[tx_id] = tx + self.chrom = tx.chrom + self.strand = tx.strand + self.kv = tx.kv + + def span(self): + tx_spans = [tx.span() for tx in self.transcripts.values()] + tx_starts, tx_ends = zip(*tx_spans) + self.start = min(tx_starts) + self.end = max(tx_ends) + return self.start, self.end + + +class GTF: + def __init__(self, gtf_file, trim_dot=False): + self.gtf_file = gtf_file + self.genes = {} + self.transcripts = {} + self.utrs_defined = False + self.trim_dot = trim_dot + + self.read_gtf() + + def define_utrs(self): + self.utrs_defined = True + for tx in self.transcripts.values(): + tx.define_utrs() + + def read_gtf(self): + if self.gtf_file[-3:] == '.gz': + gtf_in = gzip.open(self.gtf_file, 'rt') + else: + gtf_in = open(self.gtf_file) + + # ignore header + line = gtf_in.readline() + while line[0] == '#': + line = gtf_in.readline() + + while line: + a = line.split('\t') + if a[2] in ['exon','CDS']: + chrom = a[0] + interval_type = a[2] + start = int(a[3]) + end = int(a[4]) + strand = a[6] + kv = gtf_kv(a[8]) + + # add/get transcript + tx_id = kv['transcript_id'] + if self.trim_dot: + tx_id = trim_dot(tx_id) + if not tx_id in self.transcripts: + self.transcripts[tx_id] = Transcript(chrom, strand, kv) + tx = self.transcripts[tx_id] + + # add/get gene + gene_id = kv['gene_id'] + if self.trim_dot: + gene_id = trim_dot(gene_id) + if not gene_id in self.genes: + self.genes[gene_id] = Gene() + self.genes[gene_id].add_transcript(tx_id, tx) + + # add exons + if interval_type == 'exon': + tx.add_exon(start, end) + elif interval_type == 'CDS': + tx.add_cds(start, end) + + line = gtf_in.readline() + + gtf_in.close() + + # sort transcript exons + for tx in self.transcripts.values(): + tx.sort_exons() + + def write_gtf(self, out_gtf_file, write_cds=False, write_utrs=False): + if write_utrs and not self.utrs_defined: + self.define_utrs() + + gtf_out = open(out_gtf_file, 'w') + for tx in self.transcripts.values(): + tx.write_gtf(gtf_out, write_cds, write_utrs) + gtf_out.close() + + +################################################################################ +# Methods +################################################################################ +def gtf_kv(s): + """Convert the last gtf section of key/value pairs into a dict.""" + d = {} + + a = s.split(';') + for key_val in a: + if key_val.strip(): + eq_i = key_val.find('=') + if eq_i != -1 and key_val[eq_i-1] != '"': + kvs = key_val.split('=') + else: + kvs = key_val.split() + + key = kvs[0] + if kvs[1][0] == '"' and kvs[-1][-1] == '"': + val = (' '.join(kvs[1:]))[1:-1].strip() + else: + val = (' '.join(kvs[1:])).strip() + + d[key] = val + + return d + +def kv_gtf(d): + """Convert a kv hash to str gtf representation.""" + s = '' + + if 'gene_id' in d.keys(): + s += '%s "%s"; ' % ('gene_id',d['gene_id']) + + if 'transcript_id' in d.keys(): + s += '%s "%s"; ' % ('transcript_id',d['transcript_id']) + + for key in sorted(d.keys()): + if key not in ['gene_id','transcript_id']: + s += '%s "%s"; ' % (key,d[key]) + + return s + +def trim_dot(gene_id): + """Trim the final dot suffix off a gene_id.""" + dot_i = gene_id.rfind('.') + if dot_i != -1: + gene_id = gene_id[:dot_i] + return gene_id \ No newline at end of file diff --git a/src/scripts/slurm.py b/src/scripts/slurm.py new file mode 100755 index 0000000..ab83e69 --- /dev/null +++ b/src/scripts/slurm.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python +from __future__ import print_function +from optparse import OptionParser +import os, pdb, sys, subprocess, tempfile, time + +################################################################################ +# slurm.py +# +# Methods to run jobs on SLURM. +################################################################################ + + +################################################################################ +# main +################################################################################ +def main(): + usage = 'usage: %prog [options] arg' + parser = OptionParser(usage) + parser.add_option('-g', dest='go', + default=False, action='store_true', + help='Don\'t wait for the job to finish [Default: %default]') + + parser.add_option('-o', dest='out_file') + parser.add_option('-e', dest='err_file') + + parser.add_option('-J', dest='job_name') + + parser.add_option('-q', dest='queue', default='general') + parser.add_option('-n', dest='cpu', default=1, type='int') + parser.add_option('-m', dest='mem', default=None, type='int') + parser.add_option('-t', dest='time', default=None) + + (options,args) = parser.parse_args() + + cmd = args[0] + + main_job = Job(cmd, name=options.job_name, + out_file=options.out_file, err_file=options.err_file, + queue=options.queue, cpu=options.cpu, + mem=options.mem, time=options.time) + main_job.launch() + + if options.go: + time.sleep(1) + + # find the job + if not main_job.update_status: + time.sleep(1) + + # delete sbatch + main_job.clean() + + else: + time.sleep(10) + + # find the job + if not main_job.update_status(): + time.sleep(10) + + # wait for it to complete + while main_job.update_status() and main_job.status in ['PENDING','RUNNING']: + time.sleep(30) + + print('%s %s' % (main_job.name, main_job.status), file=sys.stderr) + + # delete sbatch + main_job.clean() + + +################################################################################ +# multi_run +# +# Launch and manage multiple SLURM jobs in parallel, using only one 'sacct' +# call per +################################################################################ +def multi_run(jobs, max_proc=None, verbose=False, launch_sleep=2, update_sleep=20): + total = len(jobs) + finished = 0 + running = 0 + active_jobs = [] + + if max_proc is None: + max_proc = len(jobs) + + while finished + running < total: + # launch jobs up to the max + while running < max_proc and finished+running < total: + # launch + jobs[finished+running].launch() + time.sleep(launch_sleep) + if verbose: + print(jobs[finished+running].name, jobs[finished+running].cmd, file=sys.stderr) + + # save it + active_jobs.append(jobs[finished+running]) + running += 1 + + # sleep + time.sleep(update_sleep) + + # update all statuses + multi_update_status(active_jobs) + + # update active jobs + active_jobs_new = [] + for i in range(len(active_jobs)): + if active_jobs[i].status in ['PENDING', 'RUNNING']: + active_jobs_new.append(active_jobs[i]) + else: + if verbose: + print('%s %s' % (active_jobs[i].name, active_jobs[i].status), file=sys.stderr) + + running -= 1 + finished += 1 + + active_jobs = active_jobs_new + + + # wait for all to finish + while active_jobs: + # sleep + time.sleep(update_sleep) + + # update all statuses + multi_update_status(active_jobs) + + # update active jobs + active_jobs_new = [] + for i in range(len(active_jobs)): + if active_jobs[i].status in ['PENDING', 'RUNNING']: + active_jobs_new.append(active_jobs[i]) + else: + if verbose: + print('%s %s' % (active_jobs[i].name, active_jobs[i].status), file=sys.stderr) + + running -= 1 + finished += 1 + + active_jobs = active_jobs_new + + +################################################################################ +# multi_update_status +# +# Update the status for multiple jobs at once. +################################################################################ +def multi_update_status(jobs, max_attempts=3, sleep_attempt=5): + # reset all + for j in jobs: + j.status = None + + # try multiple times because sometimes it fails + attempt = 0 + while attempt < max_attempts and [j for j in jobs if j.status == None]: + if attempt > 0: + time.sleep(sleep_attempt) + + sacct_str = subprocess.check_output('sacct', shell=True) + sacct_str = sacct_str.decode('UTF-8') + + # split into job lines + sacct_lines = sacct_str.split('\n') + for line in sacct_lines[2:]: + a = line.split() + + try: + line_id = int(a[0]) + except: + line_id = None + + # check call jobs for a match + for j in jobs: + if line_id == j.id: + # j.status = a[5] # original + j.status = a[4] # cb2 + + attempt += 1 + + +class Job: + ''' class to manage SLURM jobs. + + Notes: + -Since we have two types of machines in the GPU queue, I'm asking + for the machine type as "queue", and the "launch" method will handle it. + ''' + + def __init__(self, cmd, name, out_file=None, err_file=None, sb_file=None, + queue='standard', cpu=1, mem=None, time=None, gpu=0): + self.cmd = cmd + self.name = name + self.out_file = out_file + self.err_file = err_file + self.sb_file = sb_file + self.queue = self.translate_gpu(queue) + self.cpu = cpu + self.mem = mem + self.time = time + self.gpu = gpu + + self.id = None + self.status = None + + + def flash(self): + ''' Determine if the job can run on the flash queue by parsing the time. ''' + + day_split = self.time.split('-') + if len(day_split) == 2: + days, hms = day_split + else: + days = 0 + hms = day_split[0] + + hms_split = hms.split(':') + if len(hms_split) == 3: + hours, mins, secs = hms_split + elif len(hms_split) == 2: + hours = 0 + mins, secs = hms_split + else: + print('Cannot parse time: ', self.time, file=sys.stderr) + exit(1) + + hours_sum = 24*int(days) + int(hours) + float(mins)/60 + + return hours_sum <= 4 + + + def launch(self): + ''' Make an sbatch file, launch it, and save the job id. ''' + + # make sbatch script + if self.sb_file is None: + sbatch_tempf = tempfile.NamedTemporaryFile() + sbatch_file = sbatch_tempf.name + else: + sbatch_file = self.sb_file + sbatch_out = open(sbatch_file, 'w') + + print('#!/bin/bash\n', file=sbatch_out) + if self.gpu > 0: + if self.queue == "" or self.queue == 'gpu': + gpu_str = 'gpu' + gres_str = '--gres=gpu' + elif self.queue == 'nvidia_geforce_rtx_4090': + gpu_str = 'minigpu' + gres_str = '--gres=gpu:%s' % self.queue + else: + gpu_str = 'gpu' + gres_str = '--gres=gpu:%s' % self.queue + print('#SBATCH -p %s' % gpu_str, file=sbatch_out) + print('#SBATCH %s:%d\n' % (gres_str, self.gpu), file=sbatch_out) + else: + print('#SBATCH -p %s' % self.queue, file=sbatch_out) + print('#SBATCH -n 1', file=sbatch_out) + print('#SBATCH -c %d' % self.cpu, file=sbatch_out) + if self.name: + print('#SBATCH -J %s' % self.name, file=sbatch_out) + if self.out_file: + print('#SBATCH -o %s' % self.out_file, file=sbatch_out) + if self.err_file: + print('#SBATCH -e %s' % self.err_file, file=sbatch_out) + if self.mem: + print('#SBATCH --mem %d' % self.mem, file=sbatch_out) + if self.time: + print('#SBATCH --time %s' % self.time, file=sbatch_out) + print(self.cmd, file=sbatch_out) + + sbatch_out.close() + + # launch it; check_output to get the id + launch_str = subprocess.check_output('sbatch %s' % sbatch_file, shell=True) + + # e.g. "Submitted batch job 13861989" + self.id = int(launch_str.split()[3]) + + + def translate_gpu(self, queue_gpu): + """Translate concise GPU labels to their full versions, + or propagate the given label.""" + translation = { + 'p100': 'tesla_p100-pcie-16gb', + 'tesla': 'tesla_p100-pcie-16gb', + 'geforce': 'nvidia_geforce_gtx_1080_ti', + 'gtx1080': 'nvidia_geforce_gtx_1080_ti', + 'titan': 'titan_rtx', + 'quadro': 'quadro_rtx_8000', + 'rtx4090': 'nvidia_geforce_rtx_4090' + } + return translation.get(queue_gpu, queue_gpu) + + + def update_status(self, max_attempts=3, sleep_attempt=5): + ''' Use 'sacct' to update the job's status. Return True if found and False if not. ''' + + status = None + + attempt = 0 + while attempt < max_attempts and status == None: + if attempt > 0: + time.sleep(sleep_attempt) + + sacct_str = subprocess.check_output('sacct', shell=True) + sacct_str = sacct_str.decode('UTF-8') + + sacct_lines = sacct_str.split('\n') + for line in sacct_lines[2:]: + a = line.split() + + try: + line_id = int(a[0]) + except: + line_id = None + + if line_id == self.id: + status = a[5] + + attempt += 1 + + if status == None: + return False + else: + self.status = status + return True + + +################################################################################ +# __main__ +################################################################################ +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/src/scripts/util.py b/src/scripts/util.py new file mode 100755 index 0000000..58b46a3 --- /dev/null +++ b/src/scripts/util.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +from __future__ import print_function +#import pdb +import operator, os, sys, subprocess, time + +############################################################ +# util +# +# Helpful methods that are difficult to categorize. +############################################################ + +############################################################ +# condorify +############################################################ +def condorify(cmds): + return ['runCmd -c "%s"' % c for c in cmds] + +############################################################ +# slurmify +############################################################ +def slurmify(cmds, mem_mb=None): + if mem != None: + mem_str = '--mem %d' % mem_mb + else: + mem_str = '' + + return ['srun -p general -n 1 %s "%s"' % (mem_str,c) for c in cmds] + +############################################################ +# exec_par +# +# Execute the commands in the list 'cmds' in parallel, but +# only running 'max_proc' at a time. +############################################################ +def exec_par(cmds, max_proc=None, verbose=False): + total = len(cmds) + finished = 0 + running = 0 + p = [] + + if max_proc == None: + max_proc = len(cmds) + + if max_proc == 1: + while finished < total: + if verbose: + print(cmds[finished], file=sys.stderr) + op = subprocess.Popen(cmds[finished], shell=True) + os.waitpid(op.pid, 0) + finished += 1 + + else: + while finished + running < total: + # launch jobs up to max + while running < max_proc and finished+running < total: + if verbose: + print(cmds[finished+running], file=sys.stderr) + p.append(subprocess.Popen(cmds[finished+running], shell=True)) + # print('Running %d' % p[running].pid) + running += 1 + + # are any jobs finished + new_p = [] + for i in range(len(p)): + # print('POLLING', i, p[i].poll()) + if p[i].poll() != None: + running -= 1 + finished += 1 + else: + new_p.append(p[i]) + + # if none finished, sleep + if len(new_p) == len(p): + time.sleep(1) + p = new_p + + # wait for all to finish + for i in range(len(p)): + p[i].wait() + +############################################################ +# slurm_par +# +# Execute the commands in the list 'cmds' in parallel on +# SLURM, but only running 'max_proc' at a time. +# +# Doesn't work. Jobs are allocated resources, but won't run. +# Also, I'd have to screen into login nodes, which +# isn't great because I can't get back to them. +############################################################ +def slurm_par(cmds, max_proc, queue='general', cpu=1, mem=None, out_files=None, err_files=None): + # preprocess cmds + if mem != None: + mem_str = '--mem %d' % mem + else: + mem_str = '' + + if out_files != None: + out_strs = ['-o %s' % of for of in out_files] + else: + out_strs = ['']*len(cmds) + + if err_files != None: + err_strs = ['-e %s' % ef for ef in err_files] + else: + err_strs = ['']*len(cmds) + + slurm_cmds = ['srun -p %s -n %d %s %s %s "%s"' % (queue, cpu, mem_str, out_strs[i], err_strs[i], cmds[i]) for i in range(len(cmds))] + + exec_par(slurm_cmds, max_proc, print_cmd=True) + + +############################################################ +# sort_dict +# +# Sort a dict by the values, returning a list of tuples +############################################################ +def sort_dict(hash, reverse=False): + return sorted(hash.items(), key=operator.itemgetter(1), reverse=reverse) + diff --git a/src/scripts/w5_merge.py b/src/scripts/w5_merge.py new file mode 100755 index 0000000..307cfde --- /dev/null +++ b/src/scripts/w5_merge.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +from optparse import OptionParser +import os +import sys + +import h5py +import numpy as np + +''' +w5_merge.py + +Merge wig5 files using a specified summary statistic. +''' + +################################################################################ +# main +################################################################################ +def main(): + usage = 'usage: %prog [options] ...' + parser = OptionParser(usage) + parser.add_option('-s', dest='sum_stat', + default='sum', help='Summary statistic [Default: %default]') + parser.add_option('-v', dest='verbose', + default=False, action='store_true') + parser.add_option('-w', dest='overwrite', + default=False, action='store_true') + parser.add_option('-z', dest='gzip', + default=False, action='store_true') + (options,args) = parser.parse_args() + + if len(args) < 3: + parser.error('Must provide output and two or more input wig5.') + else: + out_w5_file = args[0] + in_w5_files = args[1:] + + compression_args = {} + if options.gzip: + compression_args['compression'] = 'gzip' + compression_args['shuffle'] = True + + # open input wig5 + in_w5_opens = [h5py.File(iwf) for iwf in in_w5_files] + in_num = len(in_w5_opens) + + # take keys union + in_keys = set() + for in_w5_open in in_w5_opens: + in_keys |= in_w5_open.keys() + + # open output file + if os.path.isfile(out_w5_file) and not options.overwrite: + parser.error('%s exists. Please remove.' % out_w5_file) + out_w5_open = h5py.File(out_w5_file, 'w') + + for out_key in in_keys: + if options.verbose: + print(out_key) + + # initialize array + for i in range(in_num): + if out_key in in_w5_opens[i]: + in_key_len = len(in_w5_opens[i][out_key]) + break + in_key_data = np.zeros((in_num,in_key_len), dtype='float32') + + # read data + for i in range(in_num): + if out_key in in_w5_opens[i]: + in_key_data[i] = np.array(in_w5_opens[i][out_key]) + else: + print('%s missing %s' % (in_w5_files[i], out_key), file=sys.stderr) + + # summarize + if options.sum_stat == 'sum': + out_key_data = in_key_data.sum(axis=0) + + elif options.sum_stat == 'mean': + out_key_data = in_key_data.mean(axis=0) + + elif options.sum_stat == 'geo-mean': + in_key_data_log = np.log(in_key_data) + in_key_data_log_mean = in_key_data_log.mean(axis=0) + out_key_data = np.exp(in_key_data_log_mean) + + elif options.sum_stat == 'sqrt-mean': + in_key_data_sqrt = in_key_data**0.5 + in_key_data_sqrt_mean = in_key_data_sqrt.mean(axis=0) + out_key_data = in_key_data_sqrt_mean**2 + + else: + print('Cannot identify summary statistic %s' % options.sum_stat) + + # carefully decrease resolution + out_key_data = np.clip(out_key_data, np.finfo(np.float16).min, np.finfo(np.float16).max) + out_key_data = out_key_data.astype('float16') + + # write + out_w5_open.create_dataset(out_key, data=out_key_data, + dtype='float16', **compression_args) + + out_w5_open.close() + + + +################################################################################ +# __main__ +################################################################################ +if __name__ == '__main__': + main() diff --git a/src/scripts/w5_qc.py b/src/scripts/w5_qc.py new file mode 100755 index 0000000..0d007e6 --- /dev/null +++ b/src/scripts/w5_qc.py @@ -0,0 +1,322 @@ +#!/usr/bin/env python +from optparse import OptionParser + +from collections import OrderedDict +import os +import pdb +import sys + +import h5py +from intervaltree import IntervalTree +import numpy as np +from scipy.ndimage.filters import maximum_filter1d +from sklearn.mixture import GaussianMixture + +import matplotlib +matplotlib.use('agg') +import matplotlib.pyplot as plt +import seaborn as sns + +''' +w5_qc.py + +Create a QC report for a Wig5 file. +''' + +################################################################################ +# main +################################################################################ +def main(): + usage = 'usage: %prog [options] ' + parser = OptionParser(usage) + parser.add_option('-b', dest='blacklist_bed', + default='/home/drk/common/data/genomes/hg38/blacklist/blacklist_hg38_all.bed', + help='Blacklist BED file for annotating max regions.') + parser.add_option('-c', dest='chrs', + help='Process only the given comma-separated chromosomes') + parser.add_option('-g', dest='genes_bed', + help='Genes BED file for annotating max regions.') + parser.add_option('-m', dest='max_pool', + default=512, type='int', + help='Max pool window to report max values') + parser.add_option('-n', dest='max_n', + default=100, type='int', + help='Number of maximum coverage positions [Default: %default]') + parser.add_option('-o', dest='out_dir', default='w5_qc') + parser.add_option('-p', dest='pool', + default=32, type='int', + help='Average pool window to reduce dimensionality [Default: %default]') + (options,args) = parser.parse_args() + + if len(args) != 1: + parser.error('Must provide Wig5.') + else: + w5_file = args[0] + + if not os.path.isdir(options.out_dir): + os.mkdir(options.out_dir) + + w5_open = h5py.File(w5_file, 'r') + + ############################################################ + # choose chromosomes + + if options.chrs is None: + options.chrs = list(w5_open.keys()) + options.chrs = [] + for chrm in w5_open.keys(): + if chrm in ['chrM','chrEBV']: + continue + if chrm.startswith('chrUn'): + continue + if chrm.find('random') != -1: + continue + options.chrs.append(chrm) + else: + chrs_str = options.chrs + options.chrs = [] + for chrm in chrs_str.split(','): + if chrm in w5_open: + options.chrs.append(chrm) + else: + print('Chromosome %s not found in %s' % (chrm, w5_file), file=sys.stderr) + + ############################################################ + # read genome coverage + + nan_out = open('%s/nan.txt' % options.out_dir, 'w') + + chr_lens = OrderedDict() + genome_cov = [] + + for chrm in options.chrs: + # read chromosome coverage + chr_cov = np.array(w5_open[chrm], dtype='float16') + + # truncate to fit reshape here and below + pool_max = max(options.pool, options.max_pool) + chr_mod = len(chr_cov) % pool_max + chr_cov = chr_cov[:-chr_mod] + + # handle nan + chr_nan = np.mean(np.isnan(chr_cov), dtype='float64') + print('%-5s\t%7.2e' % (chrm, chr_nan), file=nan_out) + chr_cov = np.nan_to_num(chr_cov) + + # save chromosome + chr_lens[chrm] = len(chr_cov) + + # take means across windows + chr_cov_pool = np.mean(np.reshape(chr_cov, (-1, options.pool)), axis=1) + + # append to genome coverage + genome_cov.append(chr_cov_pool) + + genome_cov = np.concatenate(genome_cov) + + nan_out.close() + + ############################################################ + # plot distributions + + zero_mask = (genome_cov == 0) + zero_pct = np.mean(zero_mask) + + zero_out = open('%s/zero.txt' % options.out_dir, 'w') + print(zero_pct, file=zero_out) + zero_out.close() + + sample_size = min((~zero_mask).sum(), 200000) + sample_cov = np.random.choice(genome_cov[~zero_mask], size=sample_size, replace=False) + + plt.figure() + sns.distplot(sample_cov) + plt.savefig('%s/dist.pdf' % options.out_dir) + plt.close() + + plt.figure() + sns.distplot(np.sqrt(sample_cov)) + plt.savefig('%s/dist_sqrt.pdf' % options.out_dir) + plt.close() + + plt.figure() + sns.distplot(np.log(sample_cov+1)) + plt.savefig('%s/dist_log.pdf' % options.out_dir) + plt.close() + + ############################################################ + # histogram values + # (which help identify sparse, poorly normalized files) + + # find largest chromosome + chr_list = list(chr_lens.keys()) + lens_list = list(chr_lens.values()) + max_len_i = np.argmax(lens_list) + max_chr = chr_list[max_len_i] + + # read coverage + max_chr_cov = np.nan_to_num(w5_open[max_chr]) + + # count values + unique_cov, counts_cov = np.unique(max_chr_cov, return_counts=True) + + # write + hist_out = open('%s/hist.txt' % options.out_dir, 'w') + for i in range(len(unique_cov)): + print('%-4d\t%7.4f\t%9d' % (i, unique_cov[i], counts_cov[i]), file=hist_out) + hist_out.close() + + + ############################################################ + # counts at thresholds + + counts_out = open('%s/tcounts.txt' % options.out_dir, 'w') + for t in [4, 8, 16, 32, 64, 128, 256, 512]: + tcount = np.sum(genome_cov > t) + tpct = np.mean(genome_cov > t) + print('%-3d\t%8d\t%.2e' % (t, tcount, tpct), file=counts_out) + counts_out.close() + + ############################################################ + # compute genome percentiles + + pcts = np.array([.001, .01, .05, .25, .50, .75, .95, .99, .999]) + cov_pcts = np.percentile(genome_cov, 100*pcts) + + pcts_out = open('%s/percentiles.txt' % options.out_dir, 'w') + for i in range(len(pcts)): + print('%5.3f\t%7.3f' % (pcts[i], cov_pcts[i]), file=pcts_out) + pcts_out.close() + + ############################################################ + # compute genome and chromosome means + + means_out = open('%s/means.txt' % options.out_dir, 'w') + + genome_cov_mean = np.mean(genome_cov, dtype='float64') + print('%-5s\t%9d\t%6f\t%5.3f' % ('whole', 1, genome_cov_mean, 1.0), file=means_out) + + for chrm in options.chrs: + chr_cov = np.nan_to_num(w5_open[chrm]) + + # compute chromosome coverage mean and ratio + chr_cov_mean = np.mean(chr_cov, dtype='float64') + chr_ratio = chr_cov_mean / genome_cov_mean + print('%-23s\t%9d\t%6f\t%5.3f' % (chrm, len(chr_cov), chr_cov_mean, chr_ratio), file=means_out) + + means_out.close() + + ############################################################ + # compute genome and chromosome means + + # blacklist annotation + blacklist_trees = bed_chr_trees(options.blacklist_bed) + + # genes annotation + gene_trees = bed_chr_trees(options.genes_bed) + + # reshape + pool_mod = options.max_pool % options.pool + if pool_mod != 0: + old_pool = options.max_pool + options.max_pool -= pool_mod + print('Modifying max pool %d to %d to be divisible with avg pool.' % (old_pool, options.max_pool), file=sys.stderr) + add_pool = options.max_pool // options.pool + genome_cov_maxp = np.max(np.reshape(genome_cov, (-1,add_pool)), axis=1) + + max_out = open('%s/max.txt' % options.out_dir, 'w') + + mi = 0 + while mi < options.max_n: + max_i = np.argmax(genome_cov_maxp) + genome_i = max_i*options.max_pool + chrm, pos = genome_chr_pos(genome_i, chr_lens) + + annotations = [] + + # annotate blacklist + blacklist_chr_tree = blacklist_trees.get(chrm, IntervalTree()) + if blacklist_chr_tree[pos:pos+options.max_pool]: + annotations.append('blacklist') + + # annotate genes + gene_chr_tree = gene_trees.get(chrm, IntervalTree()) + if gene_chr_tree[pos:pos+options.max_pool]: + annotations.append('gene') + + ann_str = ','.join(annotations) + + print('%-5s\t%9d\t%7f\t%s' % (chrm, pos, genome_cov_maxp[max_i], ann_str), file=max_out) + + # zero the coverage so we don't pick it again + genome_cov_maxp[max_i] = 0 + + # next max + mi += 1 + + max_out.close() + + w5_open.close() + + +def bed_chr_trees(bed_file): + """Return a dict mapping chromosomes to IntervalTrees.""" + chr_trees = {} + if bed_file is not None: + for line in open(bed_file): + a = line.split() + chrm = a[0] + start = int(a[1]) + end = int(a[2]) + + if chrm not in chr_trees: + chr_trees[chrm] = IntervalTree() + + chr_trees[chrm][start:end] = True + + return chr_trees + + +def genome_chr_pos(gi, chr_lens): + """ Compute chromosome and position for a genome index. + + Args + gi (int): Genomic index + chr_lens (OrderedDict): Chromosome lengths + + Returns: + chrm (str): Chromosome + pos (int): Position + """ + + chrms_list = list(chr_lens.keys()) + lengths_list = list(chr_lens.values()) + + # chromosome index + ci = 0 + + # helper counters + gii = 0 + cii = 0 + + # while gi is beyond this chromosome + while ci < len(lengths_list) and gi - gii > lengths_list[ci]: + # advance genome index + gii += lengths_list[ci] + + # advance chromosome + ci += 1 + + # we shouldn't be beyond the chromosomes + assert (ci < len(lengths_list)) + + # set position + pos = gi - gii + + return chrms_list[ci], pos + +################################################################################ +# __main__ +################################################################################ +if __name__ == '__main__': + main() From 23e49ddb38ef14a34858312883166eef232aa9fa Mon Sep 17 00:00:00 2001 From: Johannes Linder Date: Tue, 1 Oct 2024 11:27:33 -0700 Subject: [PATCH 04/32] Updated download/install helper scripts. --- env_vars.sh | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/env_vars.sh b/env_vars.sh index 8d41e18..4719e52 100755 --- a/env_vars.sh +++ b/env_vars.sh @@ -2,8 +2,6 @@ # set these variables before running the script LOCAL_BORZOI_PATH="/home/jlinder/borzoi" -LOCAL_BASKERVILLE_PATH="/home/jlinder/baskerville" -LOCAL_WESTMINSTER_PATH="/home/jlinder/westminster" LOCAL_USER="jlinder" # create env_vars sh scripts in local conda env @@ -20,27 +18,17 @@ if ! [ -e $file_vars_deact ]; then echo '#!/bin/sh' > $file_vars_deact fi -# append borzoi (and baskerville/westminster) env variable exports to /activate.d/env_vars.sh +# append env variable exports to /activate.d/env_vars.sh echo "export BORZOI_DIR=$LOCAL_BORZOI_PATH" >> $file_vars_act echo 'export PATH=$BORZOI_DIR/src/scripts:$PATH' >> $file_vars_act echo 'export PYTHONPATH=$BORZOI_DIR/src/scripts:$PYTHONPATH' >> $file_vars_act -echo "export BASKERVILLE_DIR=$LOCAL_BASKERVILLE_PATH" >> $file_vars_act -echo 'export PATH=$BASKERVILLE_DIR/src/baskerville/scripts:$PATH' >> $file_vars_act -echo 'export PYTHONPATH=$BASKERVILLE_DIR/src/baskerville/scripts:$PYTHONPATH' >> $file_vars_act - -echo "export WESTMINSTER_DIR=$LOCAL_WESTMINSTER_PATH" >> $file_vars_act -echo 'export PATH=$WESTMINSTER_DIR/src/westminster/scripts:$PATH' >> $file_vars_act -echo 'export PYTHONPATH=$WESTMINSTER_DIR/src/westminster/scripts:$PYTHONPATH' >> $file_vars_act - echo 'export BORZOI_HG38=$BORZOI_DIR/examples/hg38' >> $file_vars_act echo 'export BORZOI_MM10=$BORZOI_DIR/examples/mm10' >> $file_vars_act echo "export BORZOI_CONDA=/home/$LOCAL_USER/anaconda3/etc/profile.d/conda.sh" >> $file_vars_act -# append borzoi env variable unsets to /deactivate.d/env_vars.sh -echo 'unset BASKERVILLE_DIR' >> $file_vars_deact -echo 'unset WESTMINSTER_DIR' >> $file_vars_deact +# append env variable unsets to /deactivate.d/env_vars.sh echo 'unset BORZOI_DIR' >> $file_vars_deact echo 'unset BORZOI_HG38' >> $file_vars_deact echo 'unset BORZOI_MM10' >> $file_vars_deact From e3a555e939d7170facf990f1a393d1d9267b84e4 Mon Sep 17 00:00:00 2001 From: johli Date: Tue, 1 Oct 2024 13:58:10 -0700 Subject: [PATCH 05/32] Update README.md --- README.md | 49 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index d81ff13..498f17c 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Code repository for Borzoi models, which are convolutional neural networks train [https://www.biorxiv.org/content/10.1101/2023.08.30.555582v1](https://www.biorxiv.org/content/10.1101/2023.08.30.555582v1). -Borzoi was trained on a large set of RNA-seq experiments from ENCODE and GTEx, as well as re-processed versions of the original Enformer training data (including ChIP-seq and DNase data from ENCODE, ATAC-seq data from CATlas, and CAGE data from FANTOM5). Click [here](https://raw.githubusercontent.com/calico/borzoi/main/examples/targets_human.txt) for a list of trained-on experiments. +Borzoi was trained on a large set of RNA-seq experiments from ENCODE and GTEx, as well as re-processed versions of the original Enformer training data (including ChIP-seq and DNase data from ENCODE, ATAC-seq data from CATlas, and CAGE data from FANTOM5). Here is a list of trained-on experiments: [human](https://raw.githubusercontent.com/calico/borzoi/main/examples/targets_human.txt) / [mouse](https://raw.githubusercontent.com/calico/borzoi/main/examples/targets_mouse.txt). The repository contains example usage code (including jupyter notebooks for predicting and visualizing genetic variants) as well as links for downloading model weights, training data, QTL benchmark tasks, etc. @@ -30,20 +30,51 @@ cd borzoi pip install -e . ``` -These repositories further depend on a number of python packages (which are automatically installed with borzoi). See **setup.cfg** for a complete list. The most important version dependencies are: -- Python == 3.9 -- Tensorflow == 2.12.x (see [https://www.tensorflow.org/install/pip](https://www.tensorflow.org/install/pip)) +To train new models, the [westminster repository](https://github.com/calico/westminster.git) is also required and can be installed with these commands: +```sh +git clone https://github.com/calico/westminster.git +cd westminster +pip install -e . +``` + +These repositories further depend on a number of python packages (which are automatically installed with borzoi). See **pyproject.toml** for a complete list. The most important version dependencies are: +- Python == 3.10 +- Tensorflow == 2.15.x (see [https://www.tensorflow.org/install/pip](https://www.tensorflow.org/install/pip)) *Note*: The example notebooks require jupyter, which can be installed with `pip install notebook`.
-A new conda environment can be created with `conda create -n borzoi_py39 python=3.9`. +A new conda environment can be created with `conda create -n borzoi_py310 python=3.10`. + +Finally, the code base relies on a number of environment variables. For convenience, these can be configured in the active conda environment with the 'env_vars.sh' script. +```sh +cd borzoi +conda activate borzoi_py310 +./env_vars.sh +``` + +Alternatively, these environment variables can be set manually: +```sh +export BORZOI_DIR=/home//borzoi +export PATH=$BORZOI_DIR/src/scripts:$PATH +export PYTHONPATH=$BORZOI_DIR/src/scripts:$PYTHONPATH + +export BORZOI_CONDA=/home//anaconda3/etc/profile.d/conda.sh +export BORZOI_HG38=$BORZOI_DIR/examples/hg38 +export BORZOI_MM10=$BORZOI_DIR/examples/mm10 +``` ### Model Availability The model weights can be downloaded as .h5 files from the URLs below. We trained a total of 4 model replicates with identical train, validation and test splits (test = fold3, validation = fold4 from [sequences_human.bed.gz](https://github.com/calico/borzoi/blob/main/data/sequences_human.bed.gz)). -[Borzoi V2 Replicate 0](https://storage.googleapis.com/seqnn-share/borzoi/f0/model0_best.h5)
-[Borzoi V2 Replicate 1](https://storage.googleapis.com/seqnn-share/borzoi/f1/model0_best.h5)
-[Borzoi V2 Replicate 2](https://storage.googleapis.com/seqnn-share/borzoi/f2/model0_best.h5)
-[Borzoi V2 Replicate 3](https://storage.googleapis.com/seqnn-share/borzoi/f3/model0_best.h5)
+[Borzoi Replicate 0 (human)](https://storage.googleapis.com/seqnn-share/borzoi/f0/model0_best.h5) | [(mouse)](https://storage.googleapis.com/seqnn-share/borzoi/f0/model1_best.h5)
+[Borzoi Replicate 1 (human)](https://storage.googleapis.com/seqnn-share/borzoi/f1/model0_best.h5) | [(mouse)](https://storage.googleapis.com/seqnn-share/borzoi/f1/model1_best.h5)
+[Borzoi Replicate 2 (human)](https://storage.googleapis.com/seqnn-share/borzoi/f2/model0_best.h5) | [(mouse)](https://storage.googleapis.com/seqnn-share/borzoi/f2/model1_best.h5)
+[Borzoi Replicate 3 (human)](https://storage.googleapis.com/seqnn-share/borzoi/f3/model0_best.h5) | [(mouse)](https://storage.googleapis.com/seqnn-share/borzoi/f3/model1_best.h5)
+ +For convenience, users can run *download_models.sh* to download model replicates and annotations into the 'examples/' folder. +```sh +cd borzoi +./download_models.sh +``` #### Mini Borzoi Models We have trained a collection of (smaller) model instances on various subsets of data modalities (or on all data modalities but with architectural changes compared to the original architecture). For example, some models are trained only on RNA-seq data while others are trained on DNase-, ATAC- and RNA-seq. Similarly, some model instances are trained on human-only data while others are trained on human- and mouse data. The models were trained with either 2- or 4-fold cross-validation and are available at the following URL: From 900be7cb760c189f27475b8d47b6f1931e80d5ba Mon Sep 17 00:00:00 2001 From: johli Date: Tue, 1 Oct 2024 13:59:36 -0700 Subject: [PATCH 06/32] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 498f17c..4c2a314 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ conda activate borzoi_py310 ./env_vars.sh ``` -Alternatively, these environment variables can be set manually: +Alternatively, the environment variables can be set manually: ```sh export BORZOI_DIR=/home//borzoi export PATH=$BORZOI_DIR/src/scripts:$PATH From 9c5df564d0db3acf721d3712db0f2e497c96d3d3 Mon Sep 17 00:00:00 2001 From: johli Date: Tue, 1 Oct 2024 14:00:27 -0700 Subject: [PATCH 07/32] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4c2a314..4a3941d 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ The model weights can be downloaded as .h5 files from the URLs below. We trained [Borzoi Replicate 2 (human)](https://storage.googleapis.com/seqnn-share/borzoi/f2/model0_best.h5) | [(mouse)](https://storage.googleapis.com/seqnn-share/borzoi/f2/model1_best.h5)
[Borzoi Replicate 3 (human)](https://storage.googleapis.com/seqnn-share/borzoi/f3/model0_best.h5) | [(mouse)](https://storage.googleapis.com/seqnn-share/borzoi/f3/model1_best.h5)
-For convenience, users can run *download_models.sh* to download model replicates and annotations into the 'examples/' folder. +Users can run the script *download_models.sh* to download all model replicates and annotations into the 'examples/' folder. ```sh cd borzoi ./download_models.sh From 6f8184d483202a0af726283849ec5a9a510d854b Mon Sep 17 00:00:00 2001 From: johli Date: Tue, 1 Oct 2024 15:16:25 -0700 Subject: [PATCH 08/32] Update README.md --- README.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4a3941d..85cca31 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ For example, here are the weights, targets, and parameter file of a model traine ### Data Availability The training data for Borzoi can be downloaded from the following URL: -[Borzoi V2 Training Data](https://storage.googleapis.com/borzoi-paper/data/)
+[Borzoi Training Data](https://storage.googleapis.com/borzoi-paper/data/)
*Note*: This data bucket is very large and thus set to "Requester Pays". @@ -103,6 +103,24 @@ The curated e-/s-/pa-/ipaQTL benchmarking data can be downloaded from the follow [paQTL Data](https://storage.googleapis.com/borzoi-paper/qtl/paqtl/)
[ipaQTL Data](https://storage.googleapis.com/borzoi-paper/qtl/ipaqtl/)
+### Paper Replication +To replicate the results presented in the paper, visit the [borzoi-paper repository](https://github.com/calico/borzoi-paper.git). This repository contains scripts for **training**, **evaluating**, and **analyzing** the published model. + +### Tutorials +Todo. + +#### Data Processing +Todo. + +#### Model Training +Todo. + +#### Variant Scoring +Todo. + +#### Sequence Attribution +Todo. + ### Example Notebooks The following notebooks contain example code for predicting and interpreting genetic variants. From 933f8c1a5b102435d11f8c7070f96143733dd22e Mon Sep 17 00:00:00 2001 From: johli Date: Tue, 1 Oct 2024 15:25:29 -0700 Subject: [PATCH 09/32] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 85cca31..1bbf9ce 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ The curated e-/s-/pa-/ipaQTL benchmarking data can be downloaded from the follow [ipaQTL Data](https://storage.googleapis.com/borzoi-paper/qtl/ipaqtl/)
### Paper Replication -To replicate the results presented in the paper, visit the [borzoi-paper repository](https://github.com/calico/borzoi-paper.git). This repository contains scripts for **training**, **evaluating**, and **analyzing** the published model. +To replicate the results presented in the paper, visit the [borzoi-paper repository](https://github.com/calico/borzoi-paper.git). This repository contains scripts for **training**, **evaluating**, and **analyzing** the published model, and for processing the **training data**. ### Tutorials Todo. From dd8db20a45cb49299e26d4a8476b8c594d356ab0 Mon Sep 17 00:00:00 2001 From: johli Date: Tue, 1 Oct 2024 15:39:00 -0700 Subject: [PATCH 10/32] Update README.md --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 1bbf9ce..50a627b 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,10 @@ Finally, the code base relies on a number of environment variables. For convenie cd borzoi conda activate borzoi_py310 ./env_vars.sh +cd ../baskerville +./env_vars.sh +cd ../westminster +./env_vars.sh ``` Alternatively, the environment variables can be set manually: @@ -57,11 +61,21 @@ export BORZOI_DIR=/home//borzoi export PATH=$BORZOI_DIR/src/scripts:$PATH export PYTHONPATH=$BORZOI_DIR/src/scripts:$PYTHONPATH +export BASKERVILLE_DIR=/home//baskerville +export PATH=$BASKERVILLE_DIR/src/baskerville/scripts:$PATH +export PYTHONPATH=$BASKERVILLE_DIR/src/baskerville/scripts:$PYTHONPATH + +export WESTMINSTER_DIR=/home//westminster +export PATH=$WESTMINSTER_DIR/src/westminster/scripts:$PATH +export PYTHONPATH=$WESTMINSTER_DIR/src/westminster/scripts:$PYTHONPATH + export BORZOI_CONDA=/home//anaconda3/etc/profile.d/conda.sh export BORZOI_HG38=$BORZOI_DIR/examples/hg38 export BORZOI_MM10=$BORZOI_DIR/examples/mm10 ``` +*Note*: The *baskerville* and *westminster* variables are only required for data processing and model training. + ### Model Availability The model weights can be downloaded as .h5 files from the URLs below. We trained a total of 4 model replicates with identical train, validation and test splits (test = fold3, validation = fold4 from [sequences_human.bed.gz](https://github.com/calico/borzoi/blob/main/data/sequences_human.bed.gz)). From 10ed86703662f54e4594768522cf8a3faaf336b6 Mon Sep 17 00:00:00 2001 From: johli Date: Tue, 1 Oct 2024 20:24:04 -0700 Subject: [PATCH 11/32] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 50a627b..0cbc43b 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,8 @@ These repositories further depend on a number of python packages (which are auto - Tensorflow == 2.15.x (see [https://www.tensorflow.org/install/pip](https://www.tensorflow.org/install/pip)) *Note*: The example notebooks require jupyter, which can be installed with `pip install notebook`.
-A new conda environment can be created with `conda create -n borzoi_py310 python=3.10`. +A new conda environment can be created with `conda create -n borzoi_py310 python=3.10`.
+Some of the scripts in this repository start multi-process jobs and require [slurm](https://slurm.schedmd.com/). Finally, the code base relies on a number of environment variables. For convenience, these can be configured in the active conda environment with the 'env_vars.sh' script. ```sh From 462188264fff8efccb54143577acec5e8b3b104f Mon Sep 17 00:00:00 2001 From: Johannes Linder Date: Fri, 4 Oct 2024 10:13:56 -0700 Subject: [PATCH 12/32] Added tutorials. Fixed single-species model loading in gradient scripts. --- src/scripts/borzoi_satg_gene.py | 14 +- .../borzoi_satg_gene_crispr_ism_shuffle.py | 14 +- src/scripts/borzoi_satg_gene_focused_ism.py | 14 +- src/scripts/borzoi_satg_polya.py | 14 +- src/scripts/borzoi_satg_splice.py | 14 +- .../interpret_sequence/HBE1_example.gtf | 39 +++ tutorials/latest/interpret_sequence/README.md | 3 + .../explore_grads_k562_HBE1.ipynb | 276 +++++++++++++++ .../run_gradients_expr_HBE1.sh | 3 + .../latest/interpret_sequence/vis_helpers.py | 153 ++++++++ tutorials/latest/make_data/Makefile | 45 +++ tutorials/latest/make_data/README.md | 3 + tutorials/latest/make_data/download_bw.sh | 41 +++ .../latest/make_data/download_dependencies.sh | 97 ++++++ tutorials/latest/make_data/process_w5.sh | 65 ++++ tutorials/latest/make_data/targets_human.txt | 3 + tutorials/latest/score_variants/README.md | 3 + .../score_variants/run_variant_scripts.ipynb | 169 +++++++++ .../latest/score_variants/score_expr_sad.sh | 5 + .../latest/score_variants/score_expr_sed.sh | 5 + .../latest/score_variants/score_polya.sh | 5 + .../latest/score_variants/score_splice.sh | 5 + tutorials/latest/score_variants/snps_expr.vcf | 6 + .../latest/score_variants/snps_polya.vcf | 10 + .../latest/score_variants/snps_splice.vcf | 10 + tutorials/latest/train_model/README.md | 3 + .../latest/train_model/params_micro.json | 74 ++++ tutorials/latest/train_model/params_mini.json | 73 ++++ tutorials/latest/train_model/train_micro.sh | 3 + tutorials/latest/train_model/train_mini.sh | 3 + tutorials/legacy/interpret_sequence/README.md | 3 + .../explore_grads_liver_CFHR2.ipynb | 328 ++++++++++++++++++ .../explore_polya_grads_CD99.ipynb | 180 ++++++++++ .../explore_splice_grads_GCFC2.ipynb | 180 ++++++++++ .../run_gradients_expr_CFHR2.sh | 3 + .../run_gradients_polya_CD99.sh | 3 + .../run_gradients_splice_GCFC2.sh | 3 + .../legacy/interpret_sequence/vis_helpers.py | 153 ++++++++ tutorials/legacy/make_data/Makefile | 45 +++ tutorials/legacy/make_data/README.md | 3 + tutorials/legacy/make_data/download_bw.sh | 41 +++ .../legacy/make_data/download_dependencies.sh | 97 ++++++ tutorials/legacy/make_data/process_w5.sh | 65 ++++ tutorials/legacy/make_data/targets_human.txt | 3 + tutorials/legacy/score_variants/README.md | 3 + .../score_variants/run_variant_scripts.ipynb | 201 +++++++++++ .../legacy/score_variants/score_expr_sad.sh | 5 + .../legacy/score_variants/score_expr_sed.sh | 5 + .../legacy/score_variants/score_polya.sh | 5 + .../legacy/score_variants/score_splice.sh | 5 + tutorials/legacy/score_variants/snps_expr.vcf | 6 + .../legacy/score_variants/snps_polya.vcf | 10 + .../legacy/score_variants/snps_splice.vcf | 10 + tutorials/legacy/train_model/README.md | 3 + .../legacy/train_model/params_micro.json | 78 +++++ tutorials/legacy/train_model/params_mini.json | 77 ++++ tutorials/legacy/train_model/train_micro.sh | 3 + tutorials/legacy/train_model/train_mini.sh | 3 + 58 files changed, 2683 insertions(+), 10 deletions(-) create mode 100644 tutorials/latest/interpret_sequence/HBE1_example.gtf create mode 100644 tutorials/latest/interpret_sequence/README.md create mode 100644 tutorials/latest/interpret_sequence/explore_grads_k562_HBE1.ipynb create mode 100755 tutorials/latest/interpret_sequence/run_gradients_expr_HBE1.sh create mode 100644 tutorials/latest/interpret_sequence/vis_helpers.py create mode 100644 tutorials/latest/make_data/Makefile create mode 100644 tutorials/latest/make_data/README.md create mode 100755 tutorials/latest/make_data/download_bw.sh create mode 100755 tutorials/latest/make_data/download_dependencies.sh create mode 100755 tutorials/latest/make_data/process_w5.sh create mode 100644 tutorials/latest/make_data/targets_human.txt create mode 100644 tutorials/latest/score_variants/README.md create mode 100644 tutorials/latest/score_variants/run_variant_scripts.ipynb create mode 100644 tutorials/latest/score_variants/score_expr_sad.sh create mode 100755 tutorials/latest/score_variants/score_expr_sed.sh create mode 100644 tutorials/latest/score_variants/score_polya.sh create mode 100644 tutorials/latest/score_variants/score_splice.sh create mode 100644 tutorials/latest/score_variants/snps_expr.vcf create mode 100644 tutorials/latest/score_variants/snps_polya.vcf create mode 100644 tutorials/latest/score_variants/snps_splice.vcf create mode 100644 tutorials/latest/train_model/README.md create mode 100644 tutorials/latest/train_model/params_micro.json create mode 100644 tutorials/latest/train_model/params_mini.json create mode 100755 tutorials/latest/train_model/train_micro.sh create mode 100755 tutorials/latest/train_model/train_mini.sh create mode 100644 tutorials/legacy/interpret_sequence/README.md create mode 100644 tutorials/legacy/interpret_sequence/explore_grads_liver_CFHR2.ipynb create mode 100644 tutorials/legacy/interpret_sequence/explore_polya_grads_CD99.ipynb create mode 100644 tutorials/legacy/interpret_sequence/explore_splice_grads_GCFC2.ipynb create mode 100755 tutorials/legacy/interpret_sequence/run_gradients_expr_CFHR2.sh create mode 100755 tutorials/legacy/interpret_sequence/run_gradients_polya_CD99.sh create mode 100755 tutorials/legacy/interpret_sequence/run_gradients_splice_GCFC2.sh create mode 100644 tutorials/legacy/interpret_sequence/vis_helpers.py create mode 100644 tutorials/legacy/make_data/Makefile create mode 100644 tutorials/legacy/make_data/README.md create mode 100755 tutorials/legacy/make_data/download_bw.sh create mode 100755 tutorials/legacy/make_data/download_dependencies.sh create mode 100755 tutorials/legacy/make_data/process_w5.sh create mode 100644 tutorials/legacy/make_data/targets_human.txt create mode 100644 tutorials/legacy/score_variants/README.md create mode 100644 tutorials/legacy/score_variants/run_variant_scripts.ipynb create mode 100755 tutorials/legacy/score_variants/score_expr_sad.sh create mode 100755 tutorials/legacy/score_variants/score_expr_sed.sh create mode 100755 tutorials/legacy/score_variants/score_polya.sh create mode 100755 tutorials/legacy/score_variants/score_splice.sh create mode 100644 tutorials/legacy/score_variants/snps_expr.vcf create mode 100644 tutorials/legacy/score_variants/snps_polya.vcf create mode 100644 tutorials/legacy/score_variants/snps_splice.vcf create mode 100644 tutorials/legacy/train_model/README.md create mode 100644 tutorials/legacy/train_model/params_micro.json create mode 100644 tutorials/legacy/train_model/params_mini.json create mode 100755 tutorials/legacy/train_model/train_micro.sh create mode 100755 tutorials/legacy/train_model/train_mini.sh diff --git a/src/scripts/borzoi_satg_gene.py b/src/scripts/borzoi_satg_gene.py index 1c96712..9429498 100755 --- a/src/scripts/borzoi_satg_gene.py +++ b/src/scripts/borzoi_satg_gene.py @@ -229,8 +229,13 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) @@ -308,8 +313,13 @@ def main(): # load model fold seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) diff --git a/src/scripts/borzoi_satg_gene_crispr_ism_shuffle.py b/src/scripts/borzoi_satg_gene_crispr_ism_shuffle.py index b3fd477..0db478d 100755 --- a/src/scripts/borzoi_satg_gene_crispr_ism_shuffle.py +++ b/src/scripts/borzoi_satg_gene_crispr_ism_shuffle.py @@ -252,8 +252,13 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) @@ -376,8 +381,13 @@ def main(): # load model fold seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) diff --git a/src/scripts/borzoi_satg_gene_focused_ism.py b/src/scripts/borzoi_satg_gene_focused_ism.py index f095be8..5ee58ca 100755 --- a/src/scripts/borzoi_satg_gene_focused_ism.py +++ b/src/scripts/borzoi_satg_gene_focused_ism.py @@ -267,8 +267,13 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) @@ -514,8 +519,13 @@ def main(): # load model fold seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) diff --git a/src/scripts/borzoi_satg_polya.py b/src/scripts/borzoi_satg_polya.py index 9f26eba..98206a1 100755 --- a/src/scripts/borzoi_satg_polya.py +++ b/src/scripts/borzoi_satg_polya.py @@ -180,8 +180,13 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) @@ -309,8 +314,13 @@ def main(): # load model fold seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) diff --git a/src/scripts/borzoi_satg_splice.py b/src/scripts/borzoi_satg_splice.py index 473192f..24648ce 100755 --- a/src/scripts/borzoi_satg_splice.py +++ b/src/scripts/borzoi_satg_splice.py @@ -181,8 +181,13 @@ def main(): # load first model fold to get parameters seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(options.folds[0]) + "c0/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(options.folds[0]) + "c0/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) @@ -281,8 +286,13 @@ def main(): # load model fold seqnn_model = seqnn.SeqNN(params_model) + + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5" + if not os.path.isfile(model_path) : + model_path = model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model_best.h5" + seqnn_model.restore( - model_folder + "/f" + str(fold_ix) + "c" + str(cross_ix) + "/train/model" + str(options.head_i) + "_best.h5", + model_path, options.head_i ) seqnn_model.build_slice(targets_df.index, False) diff --git a/tutorials/latest/interpret_sequence/HBE1_example.gtf b/tutorials/latest/interpret_sequence/HBE1_example.gtf new file mode 100644 index 0000000..6e39119 --- /dev/null +++ b/tutorials/latest/interpret_sequence/HBE1_example.gtf @@ -0,0 +1,39 @@ +chr11 HAVANA transcript 5268345 5269945 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA exon 5269799 5269945 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 1; exon_id "ENSE00003817775.1"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA CDS 5269799 5269890 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 1; exon_id "ENSE00003817775.1"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA start_codon 5269888 5269890 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 1; exon_id "ENSE00003817775.1"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA exon 5269454 5269676 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 2; exon_id "ENSE00001057367.1"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA CDS 5269454 5269676 . - 1 gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 2; exon_id "ENSE00001057367.1"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA exon 5268345 5268597 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 3; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA CDS 5268472 5268597 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 3; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA stop_codon 5268469 5268471 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 3; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA UTR 5269891 5269945 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 1; exon_id "ENSE00003817775.1"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA UTR 5268345 5268471 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000396895.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-203"; exon_number 3; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000380104.2"; transcript_support_level "5"; hgnc_id "HGNC:4830"; tag "CAGE_supported_TSS"; tag "basic"; tag "Ensembl_canonical"; tag "MANE_Select"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000494678.3"; +chr11 HAVANA transcript 5268345 5505604 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA exon 5505569 5505604 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 1; exon_id "ENSE00001484269.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA exon 5281909 5281951 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 2; exon_id "ENSE00001484268.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA exon 5269799 5270156 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 3; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA CDS 5269799 5269890 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 3; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA start_codon 5269888 5269890 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 3; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA exon 5269454 5269676 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 4; exon_id "ENSE00001057367.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA CDS 5269454 5269676 . - 1 gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 4; exon_id "ENSE00001057367.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA exon 5268345 5268597 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 5; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA CDS 5268472 5268597 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 5; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA stop_codon 5268469 5268471 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 5; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA UTR 5505569 5505604 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 1; exon_id "ENSE00001484269.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA UTR 5281909 5281951 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 2; exon_id "ENSE00001484268.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA UTR 5269891 5270156 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 3; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA UTR 5268345 5268471 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000380237.5"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-202"; exon_number 5; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000369586.1"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "CAGE_supported_TSS"; tag "dotter_confirmed"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142973.4"; +chr11 HAVANA transcript 5268345 5505652 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA exon 5505569 5505652 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 1; exon_id "ENSE00001526635.2"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA exon 5269799 5270156 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 2; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA CDS 5269799 5269890 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 2; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA start_codon 5269888 5269890 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 2; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA exon 5269454 5269676 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 3; exon_id "ENSE00001057367.1"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA CDS 5269454 5269676 . - 1 gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 3; exon_id "ENSE00001057367.1"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA exon 5268345 5268597 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 4; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA CDS 5268472 5268597 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 4; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA stop_codon 5268469 5268471 . - 0 gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 4; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA UTR 5505569 5505652 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 1; exon_id "ENSE00001526635.2"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA UTR 5269891 5270156 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 2; exon_id "ENSE00001484266.1"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; +chr11 HAVANA UTR 5268345 5268471 . - . gene_id "ENSG00000213931.7"; transcript_id "ENST00000292896.3"; gene_type "protein_coding"; gene_name "HBE1"; transcript_type "protein_coding"; transcript_name "HBE1-201"; exon_number 4; exon_id "ENSE00001484208.2"; level 2; protein_id "ENSP00000292896.2"; transcript_support_level "1"; hgnc_id "HGNC:4830"; tag "alternative_5_UTR"; tag "upstream_uORF"; tag "dotter_confirmed"; tag "RNA_Seq_supported_partial"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS7756.1"; havana_gene "OTTHUMG00000066675.9"; havana_transcript "OTTHUMT00000142974.5"; diff --git a/tutorials/latest/interpret_sequence/README.md b/tutorials/latest/interpret_sequence/README.md new file mode 100644 index 0000000..1ac18dd --- /dev/null +++ b/tutorials/latest/interpret_sequence/README.md @@ -0,0 +1,3 @@ +## Interpretation + +Todo. diff --git a/tutorials/latest/interpret_sequence/explore_grads_k562_HBE1.ipynb b/tutorials/latest/interpret_sequence/explore_grads_k562_HBE1.ipynb new file mode 100644 index 0000000..dc044d5 --- /dev/null +++ b/tutorials/latest/interpret_sequence/explore_grads_k562_HBE1.ipynb @@ -0,0 +1,276 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7030e9ad", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import h5py\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from scipy.ndimage import gaussian_filter1d\n", + "\n", + "from vis_helpers import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3bcaea3d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scores_hyp.shape = (1, 1, 393216, 4)\n", + "scores.shape = (1, 1, 393216, 4)\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Load scores for the selected set of targets (grad)\n", + "\n", + "import gc\n", + "\n", + "seqs = None\n", + "strands = None\n", + "chrs = None\n", + "starts = None\n", + "ends = None\n", + "genes = None\n", + "\n", + "all_scores_hyp = []\n", + "all_scores = []\n", + "\n", + "gtex_tissues = ['liver']\n", + "\n", + "#Load score file\n", + "score_file = h5py.File('k562_HBE1/scores_f0c0.h5', 'r')\n", + "\n", + "#Get scores and onehots\n", + "scores = score_file['grads'][()][..., 0]\n", + "seqs = score_file['seqs'][()]\n", + "\n", + "#Get auxiliary information\n", + "strands = score_file['strand'][()]\n", + "strands = np.array([strands[j].decode() for j in range(strands.shape[0])])\n", + "\n", + "chrs = score_file['chr'][()]\n", + "chrs = np.array([chrs[j].decode() for j in range(chrs.shape[0])])\n", + "\n", + "starts = np.array(score_file['start'][()])\n", + "ends = np.array(score_file['end'][()])\n", + "\n", + "genes = score_file['gene'][()]\n", + "genes = np.array([genes[j].decode().split(\".\")[0] for j in range(genes.shape[0])])\n", + "\n", + "#Append hypothetical scores\n", + "all_scores_hyp.append(scores[None, ...])\n", + "\n", + "#Append input-gated scores\n", + "all_scores.append((scores * seqs)[None, ...])\n", + "\n", + "#Collect garbage\n", + "gc.collect()\n", + "\n", + "#Collect final scores\n", + "scores_hyp = np.concatenate(all_scores_hyp, axis=0)\n", + "scores = np.concatenate(all_scores, axis=0)\n", + "\n", + "print(\"scores_hyp.shape = \" + str(scores_hyp.shape))\n", + "print(\"scores.shape = \" + str(scores.shape))\n", + "\n", + "score_file = None\n", + "\n", + "#Collect garbage\n", + "gc.collect()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "955bf762", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "#Enumerate and visualize attributions; k562 example HBE1\n", + "\n", + "save_index = []\n", + "\n", + "#Visualization parameters\n", + "logo_width = 192\n", + "\n", + "top_n = 1\n", + "\n", + "use_gaussian = True\n", + "min_padding = 65536\n", + "gaussian_sigma = 8\n", + "local_window = 1024\n", + "\n", + "main_tissue_ix = 0\n", + "\n", + "tissue_colors = ['darkblue']\n", + "\n", + "#Loop over examples\n", + "for example_ix in range(top_n) :\n", + " \n", + " print(\"-- Example = \" + str(example_ix)+ \" --\")\n", + " \n", + " print(\" - \" + genes[example_ix] + \"(\" + str(strands[example_ix]) + \")\")\n", + " print(\" - \" + chrs[example_ix] + \":\" + str(starts[example_ix]) + \"-\" + str(ends[example_ix]))\n", + "\n", + " #Grad analysis\n", + " \n", + " #Calculate min and max scores globally (for scales)\n", + " min_val = np.min(scores[:, example_ix, ...])\n", + " max_val = np.max(scores[:, example_ix, ...])\n", + " \n", + " print(\" -- min_val = \" + str(round(min_val, 4)))\n", + " print(\" -- max_val = \" + str(round(max_val, 4)))\n", + " \n", + " max_abs_val = max(np.abs(min_val), np.abs(max_val))\n", + "\n", + " min_val -= 0.1 * max_abs_val\n", + " max_val += 0.1 * max_abs_val\n", + "\n", + " print(\" - (Gradient score profiles per tissue) - \")\n", + " \n", + " #Gradient profiles across input sequence\n", + " f, ax = plt.subplots(len(gtex_tissues), 1, figsize=(8, len(gtex_tissues) * 1.5))\n", + " \n", + " if len(gtex_tissues) == 1 :\n", + " ax = [ax]\n", + "\n", + " #Loop over tissues\n", + " for tissue_ix in range(len(gtex_tissues)) :\n", + "\n", + " #Get tissue scores\n", + " score = scores[tissue_ix, example_ix, ...]\n", + "\n", + " l1 = ax[tissue_ix].plot(np.arange(seqs.shape[1]), np.sum(score, axis=-1), linewidth=1, linestyle='-', color=tissue_colors[tissue_ix], label=gtex_tissues[tissue_ix])\n", + " \n", + " plt.sca(ax[tissue_ix])\n", + " \n", + " plt.xlim(0, seqs.shape[1])\n", + " plt.ylim(min_val, max_val)\n", + " \n", + " plt.legend(handles=[l1[0]], fontsize=8)\n", + " \n", + " plt.yticks([], [])\n", + " plt.xticks([], [])\n", + " \n", + " plt.sca(ax[0])\n", + " plt.title(\"Gradient Saliency for gene = '\" + genes[example_ix] + \"' (\" + str(strands[example_ix]) + \")\", fontsize=8)\n", + " \n", + " plt.sca(ax[len(gtex_tissues)-1])\n", + " plt.xlabel(chrs[example_ix] + \":\" + str(starts[example_ix]) + \"-\" + str(ends[example_ix]), fontsize=8)\n", + " \n", + " plt.sca(plt.gca())\n", + " plt.tight_layout()\n", + " \n", + " plt.show()\n", + "\n", + " #Apply gaussian filter\n", + " smooth_score = np.sum(scores[main_tissue_ix, example_ix, ...], axis=-1)\n", + " if use_gaussian :\n", + " smooth_score = gaussian_filter1d(smooth_score.astype('float32'), sigma=gaussian_sigma, truncate=2).astype('float16')\n", + " \n", + " #Calculate min/max positions and (differential) values\n", + " #max_pos = np.argmax(smooth_score[min_padding:-min_padding]) + min_padding\n", + " \n", + " max_pos = np.argmax(smooth_score[min_padding:-min_padding]) + min_padding\n", + "\n", + " print(\" - (Attribution at position of Max positive differential saliency) -\")\n", + "\n", + " print(\" - max_pos (rel) = \" + str(max_pos))\n", + " print(\" - max_pos (abs) = \" + str(starts[example_ix] + max_pos))\n", + " \n", + " #Visualize contribution scores\n", + " plot_start = max_pos - logo_width // 2\n", + " plot_end = max_pos + logo_width // 2\n", + " \n", + " print(\" - \" + chrs[example_ix] + \":\" + str(starts[example_ix] + max_pos - logo_width // 2) + \"-\" + str(starts[example_ix] + max_pos + logo_width // 2))\n", + "\n", + " #Logo min/max value across tissues\n", + " min_logo_val = np.min(scores[:, example_ix, plot_start:plot_end, :])\n", + " max_logo_val = np.max(scores[:, example_ix, plot_start:plot_end, :])\n", + "\n", + " max_abs_logo_val = max(np.abs(min_logo_val), np.abs(max_logo_val))\n", + "\n", + " min_logo_val -= 0.02 * max_abs_logo_val\n", + " max_logo_val += 0.02 * max_abs_logo_val\n", + "\n", + " print(\" - y_min = \" + str(round(min_logo_val, 8)))\n", + " print(\" - y_max = \" + str(round(max_logo_val, 8)))\n", + "\n", + " #Loop over tissues\n", + " for tissue_ix in range(len(gtex_tissues)) :\n", + " print(gtex_tissues[tissue_ix])\n", + "\n", + " #Get tissue-specific scores\n", + " score = scores[tissue_ix, example_ix, plot_start:plot_end, :]\n", + "\n", + " #Plot scores as sequence logo\n", + " plot_seq_scores(\n", + " score,\n", + " y_min=min_logo_val,\n", + " y_max=max_logo_val,\n", + " figsize=(8, 1),\n", + " plot_y_ticks=False,\n", + " )\n", + " \n", + " print(\"--------------------\")\n", + " print(\"\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67a3cf9d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/latest/interpret_sequence/run_gradients_expr_HBE1.sh b/tutorials/latest/interpret_sequence/run_gradients_expr_HBE1.sh new file mode 100755 index 0000000..987a843 --- /dev/null +++ b/tutorials/latest/interpret_sequence/run_gradients_expr_HBE1.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +borzoi_satg_gene.py -o k562_HBE1 -f 0 -c 0 --rc --track_scale 0.3 --track_transform 0.5 --clip_soft 384.0 -t ../make_data/targets_human.txt ../train_model/params_mini.json ../train_model/mini_models HBE1_example.gtf diff --git a/tutorials/latest/interpret_sequence/vis_helpers.py b/tutorials/latest/interpret_sequence/vis_helpers.py new file mode 100644 index 0000000..00b92ef --- /dev/null +++ b/tutorials/latest/interpret_sequence/vis_helpers.py @@ -0,0 +1,153 @@ +import sys +import os +import numpy as np + +import matplotlib.pyplot as plt + +import matplotlib.cm as cm +import matplotlib.colors as colors + +import matplotlib as mpl +from matplotlib.text import TextPath +from matplotlib.patches import PathPatch, Rectangle +from matplotlib.font_manager import FontProperties +from matplotlib import gridspec +from matplotlib.ticker import FormatStrFormatter + +#Helper function to draw a letter at a given position +def dna_letter_at(letter, x, y, yscale=1, ax=None, color=None, alpha=1.0): + + fp = FontProperties(family="DejaVu Sans", weight="bold") + globscale = 1.35 + LETTERS = { "T" : TextPath((-0.305, 0), "T", size=1, prop=fp), + "G" : TextPath((-0.384, 0), "G", size=1, prop=fp), + "A" : TextPath((-0.35, 0), "A", size=1, prop=fp), + "C" : TextPath((-0.366, 0), "C", size=1, prop=fp), + "UP" : TextPath((-0.488, 0), '$\\Uparrow$', size=1, prop=fp), + "DN" : TextPath((-0.488, 0), '$\\Downarrow$', size=1, prop=fp), + "(" : TextPath((-0.25, 0), "(", size=1, prop=fp), + "." : TextPath((-0.125, 0), "-", size=1, prop=fp), + ")" : TextPath((-0.1, 0), ")", size=1, prop=fp)} + COLOR_SCHEME = {'G': 'orange',#'orange', + 'A': 'green',#'red', + 'C': 'blue',#'blue', + 'T': 'red',#'darkgreen', + 'UP': 'green', + 'DN': 'red', + '(': 'black', + '.': 'black', + ')': 'black'} + + + text = LETTERS[letter] + + chosen_color = COLOR_SCHEME[letter] + if color is not None : + chosen_color = color + + t = mpl.transforms.Affine2D().scale(1*globscale, yscale*globscale) + \ + mpl.transforms.Affine2D().translate(x,y) + ax.transData + p = PathPatch(text, lw=0, fc=chosen_color, alpha=alpha, transform=t) + if ax != None: + ax.add_artist(p) + return p + +#Function to plot sequence logo +def plot_seq_scores(importance_scores, figsize=(16, 2), plot_y_ticks=True, y_min=None, y_max=None, save_figs=False, fig_name="default") : + + importance_scores = importance_scores.T + + fig = plt.figure(figsize=figsize) + + ref_seq = "" + for j in range(importance_scores.shape[1]) : + argmax_nt = np.argmax(np.abs(importance_scores[:, j])) + + if argmax_nt == 0 : + ref_seq += "A" + elif argmax_nt == 1 : + ref_seq += "C" + elif argmax_nt == 2 : + ref_seq += "G" + elif argmax_nt == 3 : + ref_seq += "T" + + ax = plt.gca() + + for i in range(0, len(ref_seq)) : + mutability_score = np.sum(importance_scores[:, i]) + color = None + dna_letter_at(ref_seq[i], i + 0.5, 0, mutability_score, ax, color=color) + + plt.sca(ax) + plt.xticks([], []) + plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%.3f')) + + plt.xlim((0, len(ref_seq))) + + #plt.axis('off') + + if plot_y_ticks : + plt.yticks(fontsize=12) + else : + plt.yticks([], []) + + if y_min is not None and y_max is not None : + plt.ylim(y_min, y_max) + elif y_min is not None : + plt.ylim(y_min) + else : + plt.ylim( + np.min(importance_scores) - 0.1 * np.max(np.abs(importance_scores)), + np.max(importance_scores) + 0.1 * np.max(np.abs(importance_scores)) + ) + + plt.axhline(y=0., color='black', linestyle='-', linewidth=1) + + #for axis in fig.axes : + # axis.get_xaxis().set_visible(False) + # axis.get_yaxis().set_visible(False) + + plt.tight_layout() + + if save_figs : + plt.savefig(fig_name + ".png", transparent=True, dpi=300) + plt.savefig(fig_name + ".eps") + + plt.show() + +#Function to visualize a pair of sequence logos +def visualize_input_gradient_pair(att_grad_wt, att_grad_mut, plot_start=0, plot_end=100, save_figs=False, fig_name='') : + + scores_wt = att_grad_wt[plot_start:plot_end, :] + scores_mut = att_grad_mut[plot_start:plot_end, :] + + y_min = min(np.min(scores_wt), np.min(scores_mut)) + y_max = max(np.max(scores_wt), np.max(scores_mut)) + + y_max_abs = max(np.abs(y_min), np.abs(y_max)) + + y_min = y_min - 0.05 * y_max_abs + y_max = y_max + 0.05 * y_max_abs + + if np.sum(scores_mut) != 0. : + print("--- WT ---") + + plot_seq_scores( + scores_wt, y_min=y_min, y_max=y_max, + figsize=(8, 1), + plot_y_ticks=False, + save_figs=save_figs, + fig_name=fig_name + '_wt', + ) + + if np.sum(scores_mut) != 0. : + + print("--- Mut ---") + plot_seq_scores( + scores_mut, y_min=y_min, y_max=y_max, + figsize=(8, 1), + plot_y_ticks=False, + save_figs=save_figs, + fig_name=fig_name + '_mut', + ) diff --git a/tutorials/latest/make_data/Makefile b/tutorials/latest/make_data/Makefile new file mode 100644 index 0000000..c47bb3d --- /dev/null +++ b/tutorials/latest/make_data/Makefile @@ -0,0 +1,45 @@ +FASTA_HUMAN=$$BORZOI_HG38/assembly/gnomad/hg38.ml.fa +GAPS_HUMAN=$$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed +UMAP_HUMAN=$$BORZOI_HG38/mappability/umap_k36_t10_l32.bed +BLACK_HUMAN=$$BORZOI_HG38/blacklist/blacklist_hg38_all.bed + +FASTA_MOUSE=$$BORZOI_MM10/assembly/ucsc/mm10.ml.fa +GAPS_MOUSE=$$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed +UMAP_MOUSE=$$BORZOI_MM10/mappability/umap_k36_t10_l32.bed +BLACK_MOUSE=$$BORZOI_MM10/blacklist/blacklist_mm10_all.bed + +ALIGN=$$BORZOI_HG38/align/hg38.mm10.syn.net.gz + +OUT=data + +# mini borzoi configuration +LENGTH=393216 +TSTRIDE=43691 # (393216-2*131072)/3 +CROP=0 +WIDTH=32 +FOLDS=8 + +AOPTS=--break 2097152 -c $(CROP) --nf 524288 --no 393216 -l $(LENGTH) --stride $(TSTRIDE) -f $(FOLDS) --umap_t 0.5 -w $(WIDTH) +DOPTS=-c $(CROP) -d 2 -f $(FOLDS) -l $(LENGTH) -p 64 -r 16 --umap_clip 0.5 -w $(WIDTH) + +all: $(OUT)/hg38/tfrecords/train-0.tfr # $(OUT)/mm10/tfrecords/train-0.tfr + +umap_human.bed: + cat $(UMAP_HUMAN) $(BLACK_HUMAN) | awk 'BEGIN {OFS="\t"} {print $$1, $$2, $$3}' | bedtools sort -i - | bedtools merge -i - > umap_human.bed + +umap_mouse.bed: + cat $(UMAP_MOUSE) $(BLACK_MOUSE) | awk 'BEGIN {OFS="\t"} {print $$1, $$2, $$3}' | bedtools sort -i - | bedtools merge -i - > umap_mouse.bed + +# targets file is already generated in this example +#targets_human.txt targets_mouse.txt: +# ./make_targets.py + +$(OUT)/hg38/sequences.bed $(OUT)/mm10/sequences.bed: umap_human.bed umap_mouse.bed + hound_data_align.py -a hg38,mm10 -g $(GAPS_HUMAN),$(GAPS_MOUSE) -u umap_human.bed,umap_mouse.bed $(AOPTS) -o $(OUT) $(ALIGN) $(FASTA_HUMAN),$(FASTA_MOUSE) + +$(OUT)/hg38/tfrecords/train-0.tfr: $(OUT)/hg38/sequences.bed targets_human.txt + hound_data.py --restart $(DOPTS) -b $(BLACK_HUMAN) -o $(OUT)/hg38 $(FASTA_HUMAN) -u umap_human.bed targets_human.txt + +# no mouse data in this example +#$(OUT)/mm10/tfrecords/train-0.tfr: $(OUT)/mm10/sequences.bed targets_mouse.txt +# hound_data.py --restart $(DOPTS) -b $(BLACK_MOUSE) -o $(OUT)/mm10 $(FASTA_MOUSE) -u umap_mouse.bed targets_mouse.txt diff --git a/tutorials/latest/make_data/README.md b/tutorials/latest/make_data/README.md new file mode 100644 index 0000000..035a37d --- /dev/null +++ b/tutorials/latest/make_data/README.md @@ -0,0 +1,3 @@ +## Data Processing + +Todo. diff --git a/tutorials/latest/make_data/download_bw.sh b/tutorials/latest/make_data/download_bw.sh new file mode 100755 index 0000000..239f004 --- /dev/null +++ b/tutorials/latest/make_data/download_bw.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# download example data from ENCODE (ENCSR000AEL - K562 RNA-seq); 2 replicates + +# define ENCODE ID +ENC_ID='ENCSR000AEL' + +# define remote urls +URL_P_REP1='https://www.encodeproject.org/files/ENCFF980ZHM/@@download/ENCFF980ZHM.bigWig' +URL_M_REP1='https://www.encodeproject.org/files/ENCFF533LJF/@@download/ENCFF533LJF.bigWig' + +URL_P_REP2='https://www.encodeproject.org/files/ENCFF335LVS/@@download/ENCFF335LVS.bigWig' +URL_M_REP2='https://www.encodeproject.org/files/ENCFF257NOL/@@download/ENCFF257NOL.bigWig' + +# define ENCODE file IDs +FILE_P_REP1='ENCFF980ZHM' +FILE_M_REP1='ENCFF533LJF' + +FILE_P_REP2='ENCFF335LVS' +FILE_M_REP2='ENCFF257NOL' + +# create folder for bigwig files +mkdir -p "human/rna/encode/$ENC_ID/rep1" +mkdir -p "human/rna/encode/$ENC_ID/rep2" + + +# download bigwig files; rep1 +if [ -f "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1.bigWig" ]; then + echo "example RNA-seq data already downloaded (rep 1)." +else + wget $URL_P_REP1 -O "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1.bigWig" + wget $URL_M_REP1 -O "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1.bigWig" +fi + +# download bigwig files; rep2 +if [ -f "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2.bigWig" ]; then + echo "example RNA-seq data already downloaded (rep 2)." +else + wget $URL_P_REP2 -O "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2.bigWig" + wget $URL_M_REP2 -O "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2.bigWig" +fi diff --git a/tutorials/latest/make_data/download_dependencies.sh b/tutorials/latest/make_data/download_dependencies.sh new file mode 100755 index 0000000..cd23a51 --- /dev/null +++ b/tutorials/latest/make_data/download_dependencies.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +# create additional folder in borzoi data folders +mkdir -p "$BORZOI_HG38/assembly/ucsc" +mkdir -p "$BORZOI_HG38/assembly/gnomad" +mkdir -p "$BORZOI_HG38/mappability" +mkdir -p "$BORZOI_HG38/blacklist" +mkdir -p "$BORZOI_HG38/align" + +mkdir -p "$BORZOI_MM10/assembly/ucsc" +mkdir -p "$BORZOI_MM10/mappability" +mkdir -p "$BORZOI_MM10/blacklist" + + +# download and uncompress auxiliary files required for Makefile (hg38) +if [ -f "$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed" ]; then + echo "hg38_gaps.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38_gaps.bed.gz | gunzip -c > "$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed" +fi + +if [ -f "$BORZOI_HG38/mappability/umap_k36_t10_l32.bed" ]; then + echo "umap_k36_t10_l32.bed (hg38) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_k36_t10_l32_hg38.bed.gz | gunzip -c > "$BORZOI_HG38/mappability/umap_k36_t10_l32.bed" +fi + +if [ -f "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" ]; then + echo "blacklist_hg38_all.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/blacklist_hg38_all.bed.gz | gunzip -c > "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" +fi + +if [ -f "$BORZOI_HG38/align/hg38.mm10.syn.net.gz" ]; then + echo "Splice site annotation already exist." +else + wget https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38.mm10.syn.net.gz -O "$BORZOI_HG38/align/hg38.mm10.syn.net.gz" +fi + + +# download and uncompress auxiliary files required for Makefile (mm10) +if [ -f "$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed" ]; then + echo "mm10_gaps.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/mm10_gaps.bed.gz | gunzip -c > "$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed" +fi + +if [ -f "$BORZOI_MM10/mappability/umap_k36_t10_l32.bed" ]; then + echo "umap_k36_t10_l32.bed (mm10) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_k36_t10_l32_mm10.bed.gz | gunzip -c > "$BORZOI_MM10/mappability/umap_k36_t10_l32.bed" +fi + +if [ -f "$BORZOI_MM10/blacklist/blacklist_mm10_all.bed" ]; then + echo "blacklist_mm10_all.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/blacklist_mm10_all.bed.gz | gunzip -c > "$BORZOI_MM10/blacklist/blacklist_mm10_all.bed" +fi + + +# download and uncompress pre-compiled umap bed files +if [ -f umap_human.bed ]; then + echo "umap_human.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_human.bed.gz | gunzip -c > umap_human.bed +fi + +if [ -f umap_mouse.bed ]; then + echo "umap_mouse.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_mouse.bed.gz | gunzip -c > umap_mouse.bed +fi + + +# download and index hg38 ml genome +if [ -f "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" ]; then + echo "hg38.ml.fa already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38.ml.fa.gz | gunzip -c > "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" + idx_genome.py "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" +fi + +# download and index hg38 ml genome (gnomad major alleles) +if [ -f "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" ]; then + echo "hg38.ml.fa (gnomad) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38_gnomad.ml.fa.gz | gunzip -c > "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" + idx_genome.py "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" +fi + +# download and index mm10 ml genome +if [ -f "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" ]; then + echo "mm10.ml.fa already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/mm10.ml.fa.gz | gunzip -c > "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" + idx_genome.py "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" +fi diff --git a/tutorials/latest/make_data/process_w5.sh b/tutorials/latest/make_data/process_w5.sh new file mode 100755 index 0000000..9caa697 --- /dev/null +++ b/tutorials/latest/make_data/process_w5.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# merge bigwig replicates, generate .w5 files and run qc + +# define ENCODE ID +ENC_ID='ENCSR000AEL' + +# define ENCODE file IDs +FILE_P_REP1='ENCFF980ZHM' +FILE_M_REP1='ENCFF533LJF' + +FILE_P_REP2='ENCFF335LVS' +FILE_M_REP2='ENCFF257NOL' + +# create folder for merged replicate files +mkdir -p "human/rna/encode/$ENC_ID/summary" + + +# step 1: generate per-replicate .w5 files + +# rep1 +if [ -f "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" ]; then + echo "example RNA-seq .w5 already exists (rep 1)." +else + bw_h5.py -z "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1.bigWig" "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" + bw_h5.py -z "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1.bigWig" "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1-.w5" +fi + +# rep2 +if [ -f "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" ]; then + echo "example RNA-seq .w5 already exists (rep 2)." +else + bw_h5.py -z "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2.bigWig" "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" + bw_h5.py -z "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2.bigWig" "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2-.w5" +fi + + +# step 2: merge replicates + +if [ -f "human/rna/encode/$ENC_ID/summary/coverage+.w5" ]; then + echo "example RNA-seq .w5 already exists (merged)." +else + w5_merge.py -w -s mean -z "human/rna/encode/$ENC_ID/summary/coverage+.w5" "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" + w5_merge.py -w -s mean -z "human/rna/encode/$ENC_ID/summary/coverage-.w5" "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1-.w5" "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2-.w5" +fi + + +# step 3: run qc on each replicate and the merged file + +if [ -f "human/rna/encode/$ENC_ID/summary/covqc/means.txt" ]; then + echo "qc statistics already exist." +else + # rep1 + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep1/covqc" "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep1/covqc_m" "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1-.w5" + + # rep2 + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep2/covqc" "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep2/covqc_m" "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2-.w5" + + # summary + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/summary/covqc" "human/rna/encode/$ENC_ID/summary/coverage+.w5" + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/summary/covqc_m" "human/rna/encode/$ENC_ID/summary/coverage-.w5" +fi + diff --git a/tutorials/latest/make_data/targets_human.txt b/tutorials/latest/make_data/targets_human.txt new file mode 100644 index 0000000..0baf8d7 --- /dev/null +++ b/tutorials/latest/make_data/targets_human.txt @@ -0,0 +1,3 @@ + identifier file clip clip_soft scale sum_stat strand_pair description +0 ENCFF980ZHM+ human/rna/encode/ENCSR000AEL/summary/coverage+.w5 768 384 0.3 sum_sqrt 1 RNA:K562 +1 ENCFF980ZHM- human/rna/encode/ENCSR000AEL/summary/coverage-.w5 768 384 0.3 sum_sqrt 0 RNA:K562 diff --git a/tutorials/latest/score_variants/README.md b/tutorials/latest/score_variants/README.md new file mode 100644 index 0000000..827434f --- /dev/null +++ b/tutorials/latest/score_variants/README.md @@ -0,0 +1,3 @@ +## Variant Scoring + +Todo. diff --git a/tutorials/latest/score_variants/run_variant_scripts.ipynb b/tutorials/latest/score_variants/run_variant_scripts.ipynb new file mode 100644 index 0000000..db9a747 --- /dev/null +++ b/tutorials/latest/score_variants/run_variant_scripts.ipynb @@ -0,0 +1,169 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f5d0f9fb", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import h5py\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a94cbf8", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate gene-specific variant effect scores\n", + "\n", + "!./score_expr_sed.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1047ff0f", + "metadata": {}, + "outputs": [], + "source": [ + "#Print an example variant effect prediction for a SNP-gene pair (gene-specific expression)\n", + "\n", + "sed_h5 = h5py.File('snp_sed/f0c0/sed.h5', 'r')\n", + "\n", + "row_ix = 63\n", + "target_ix = 0\n", + "\n", + "print(\"score: 'logSED', snp: '\" + str(sed_h5['snp'][sed_h5['si'][row_ix]].decode()) + \"', gene: '\" + str(sed_h5['gene'][sed_h5['si'][row_ix]].decode()) + \"', track: '\" + str(sed_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sed_h5['logSED'][row_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f105ecd9", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate gene-agnostic variant effect scores\n", + "\n", + "!./score_expr_sad.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96e4f7cb", + "metadata": {}, + "outputs": [], + "source": [ + "#Print an example variant effect prediction for a SNP (gene-agnostic expression)\n", + "\n", + "sad_h5 = h5py.File('snp_sad/f0c0/sad.h5', 'r')\n", + "\n", + "snp_ix = 1\n", + "target_ix = 0\n", + "\n", + "print(\"score: 'logD2', snp: '\" + str(sad_h5['snp'][snp_ix].decode()) + \"', track: '\" + str(sad_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sad_h5['logD2'][snp_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c56efaef", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate splice variant effect scores\n", + "\n", + "!./score_splice.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "980993fc", + "metadata": {}, + "outputs": [], + "source": [ + "#Print an example variant effect prediction for a SNP-gene pair (splicing)\n", + "\n", + "sed_h5 = h5py.File('snp_splice/f0c0/sed.h5', 'r')\n", + "\n", + "row_ix = 116\n", + "target_ix = 755\n", + "\n", + "print(\"score: 'nDi', snp: '\" + str(sed_h5['snp'][sed_h5['si'][row_ix]].decode()) + \"', gene: '\" + str(sed_h5['gene'][sed_h5['si'][row_ix]].decode()) + \"', track: '\" + str(sed_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sed_h5['nDi'][row_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05cccfb6", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate polyadenylation variant effect scores\n", + "\n", + "!./score_polya.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43ac562f", + "metadata": {}, + "outputs": [], + "source": [ + "#Print an example variant effect prediction for a SNP-gene pair (polyadenylation)\n", + "\n", + "sed_h5 = h5py.File('snp_polya/f0c0/sed.h5', 'r')\n", + "\n", + "row_ix = 47\n", + "target_ix = 100\n", + "\n", + "print(\"score: 'logSED', snp: '\" + str(sed_h5['snp'][sed_h5['si'][row_ix]].decode()) + \"', gene: '\" + str(sed_h5['gene'][sed_h5['si'][row_ix]].decode()) + \"', track: '\" + str(sed_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sed_h5['COVR'][row_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ba23572", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/latest/score_variants/score_expr_sad.sh b/tutorials/latest/score_variants/score_expr_sad.sh new file mode 100644 index 0000000..5e66a53 --- /dev/null +++ b/tutorials/latest/score_variants/score_expr_sad.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_sad/f0c0 + +borzoi_sad.py -o snp_sad/f0c0 --rc --stats logD2 -t ../make_data/targets_human.txt ../train_model/params_mini.json ../train_model/mini_models/f0c0/train/model_best.h5 snps_expr.vcf diff --git a/tutorials/latest/score_variants/score_expr_sed.sh b/tutorials/latest/score_variants/score_expr_sed.sh new file mode 100755 index 0000000..79587bb --- /dev/null +++ b/tutorials/latest/score_variants/score_expr_sed.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_sed/f0c0 + +borzoi_sed.py -o snp_sed/f0c0 --rc --stats logSED,logD2 -t ../make_data/targets_human.txt ../train_model/params_mini.json ../train_model/mini_models/f0c0/train/model_best.h5 snps_expr.vcf diff --git a/tutorials/latest/score_variants/score_polya.sh b/tutorials/latest/score_variants/score_polya.sh new file mode 100644 index 0000000..a4b6a06 --- /dev/null +++ b/tutorials/latest/score_variants/score_polya.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_polya/f0c0 + +borzoi_sed_paqtl_cov.py -o snp_polya/f0c0 --rc --stats COVR -t ../make_data/targets_human.txt ../train_model/params_mini.json ../train_model/mini_models/f0c0/train/model_best.h5 snps_polya.vcf diff --git a/tutorials/latest/score_variants/score_splice.sh b/tutorials/latest/score_variants/score_splice.sh new file mode 100644 index 0000000..db78c57 --- /dev/null +++ b/tutorials/latest/score_variants/score_splice.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_splice/f0c0 + +borzoi_sed.py -o snp_splice/f0c0 --span --no_untransform --rc --stats nDi -t ../make_data/targets_human.txt ../train_model/params_mini.json ../train_model/mini_models/f0c0/train/model_best.h5 snps_splice.vcf diff --git a/tutorials/latest/score_variants/snps_expr.vcf b/tutorials/latest/score_variants/snps_expr.vcf new file mode 100644 index 0000000..bb8d7cc --- /dev/null +++ b/tutorials/latest/score_variants/snps_expr.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.2 +chr1 43110773 chr1_43110773_G_A_b38 G A . . +chr1 43120331 chr1_43120331_C_T_b38 C T . . +chr1 46309111 chr1_46309111_A_G_b38 A G . . +chr1 52632886 chr1_52632886_A_C_b38 A C . . +chr1 54053434 chr1_54053434_G_A_b38 G A . . diff --git a/tutorials/latest/score_variants/snps_polya.vcf b/tutorials/latest/score_variants/snps_polya.vcf new file mode 100644 index 0000000..5be4cad --- /dev/null +++ b/tutorials/latest/score_variants/snps_polya.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 11790946 chr1_11790946_G_C G C . . MT=ENSG00000177000.grp_2.downstream.ENST00000641805;PD=924;PI=chr1_11790946_G_C +chr1 150160094 chr1_150160094_C_G C G . . MT=ENSG00000023902.grp_1.downstream.ENST00000369126;PD=29;PI=chr1_150160094_C_G +chr16 57665101 chr16_57665101_A_G A G . . MT=ENSG00000205336.grp_1.downstream.ENST00000568908;PD=73;PI=chr16_57665101_A_G +chr16 80976052 chr16_80976052_T_G T G . . MT=ENSG00000103121.grp_2.downstream.ENST00000565925;PD=24;PI=chr16_80976052_T_G +chr16 88857261 chr16_88857261_T_C T C . . MT=ENSG00000167515.grp_2.downstream.ENST00000564547;PD=3851;PI=chr16_88857261_T_C \ No newline at end of file diff --git a/tutorials/latest/score_variants/snps_splice.vcf b/tutorials/latest/score_variants/snps_splice.vcf new file mode 100644 index 0000000..710eaf2 --- /dev/null +++ b/tutorials/latest/score_variants/snps_splice.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 1665061 chr1_1665061_C_T C T . . MT=ENSG00000189339.grp_2.contained.ENST00000611123;SD=959;PI=chr1_1665061_C_T +chr1 1689221 chr1_1689221_G_A G A . . MT=ENSG00000189339.grp_1.contained.ENST00000614300;SD=1753;PI=chr1_1689221_G_A +chr1 50655526 chr1_50655526_T_C T C . . MT=ENSG00000185104.grp_2.contained.ENST00000396153;SD=3;PI=chr1_50655526_T_C +chr1 109489368 chr1_109489368_C_G C G . . MT=ENSG00000143537.grp_2.contained.ENST00000360674;SD=1;PI=chr1_155060832_G_A +chr1 156236330 chr1_156236330_G_A G A . . MT=ENSG00000160783.grp_1.contained.ENST00000368279;SD=17;PI=chr1_156236330_G_A diff --git a/tutorials/latest/train_model/README.md b/tutorials/latest/train_model/README.md new file mode 100644 index 0000000..1587061 --- /dev/null +++ b/tutorials/latest/train_model/README.md @@ -0,0 +1,3 @@ +## Model Training + +Todo. diff --git a/tutorials/latest/train_model/params_micro.json b/tutorials/latest/train_model/params_micro.json new file mode 100644 index 0000000..ab03fc6 --- /dev/null +++ b/tutorials/latest/train_model/params_micro.json @@ -0,0 +1,74 @@ +{ + "train": { + "batch_size": 4, + "shuffle_buffer": 256, + "optimizer": "adam", + "learning_rate": 0.0002, + "loss": "poisson_mn", + "total_weight": 0.2, + "weight_range": 8, + "weight_exp": 6, + "warmup_steps": 10000, + "global_clipnorm": 0.2, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "patience": 30, + "train_epochs_min": 130, + "train_epochs_max": 180 + }, + "model": { + "seq_length": 393216, + "augment_rc": true, + "augment_shift": 3, + "activation": "gelu", + "norm_type": "batch", + "bn_momentum": 0.9, + "kernel_initializer": "lecun_normal", + "l2_scale": 1.0e-6, + "trunk": [ + { + "name": "conv_dna", + "filters": 128, + "kernel_size": 11, + "norm_type": null, + "activation": "linear", + "pool_size": 2 + }, + { + "name": "res_tower", + "filters_init": 160, + "filters_end": 320, + "divisible_by": 8, + "kernel_size": 5, + "num_convs": 1, + "pool_size": 2, + "repeat": 6 + }, + { + "name": "transformer_tower", + "key_size": 32, + "heads": 4, + "num_position_features": 32, + "dropout": 0.1, + "attention_dropout": 0.01, + "mha_l2_scale": 1.0e-8, + "l2_scale": 1.0e-8, + "kernel_initializer": "he_normal", + "repeat": 4 + }, + { + "name": "unet_conv", + "kernel_size": 3 + }, + { + "name": "unet_conv", + "kernel_size": 3 + } + ], + "head_human": { + "name": "final", + "units": 2, + "activation": "softplus" + } + } +} diff --git a/tutorials/latest/train_model/params_mini.json b/tutorials/latest/train_model/params_mini.json new file mode 100644 index 0000000..d3907ae --- /dev/null +++ b/tutorials/latest/train_model/params_mini.json @@ -0,0 +1,73 @@ +{ + "train": { + "batch_size": 2, + "shuffle_buffer": 256, + "optimizer": "adam", + "learning_rate": 0.0001, + "loss": "poisson_mn", + "total_weight": 0.2, + "weight_range": 8, + "weight_exp": 6, + "warmup_steps": 20000, + "global_clipnorm": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "patience": 30, + "train_epochs_min": 130, + "train_epochs_max": 180 + }, + "model": { + "seq_length": 393216, + "augment_rc": true, + "augment_shift": 3, + "activation": "gelu", + "norm_type": "batch", + "bn_momentum": 0.9, + "kernel_initializer": "lecun_normal", + "l2_scale": 5.0e-7, + "trunk": [ + { + "name": "conv_dna", + "filters": 320, + "kernel_size": 11, + "norm_type": null, + "activation": "linear", + "pool_size": 2 + }, + { + "name": "res_tower", + "filters_init": 384, + "filters_end": 768, + "divisible_by": 16, + "kernel_size": 5, + "num_convs": 1, + "pool_size": 2, + "repeat": 6 + }, + { + "name": "transformer_tower", + "key_size": 64, + "heads": 4, + "num_position_features": 32, + "dropout": 0.2, + "mha_l2_scale": 1.0e-8, + "l2_scale": 1.0e-8, + "kernel_initializer": "he_normal", + "repeat": 8 + }, + { + "name": "unet_conv", + "kernel_size": 3 + }, + { + "name": "unet_conv", + "kernel_size": 3 + } + ], + "head_human": { + "name": "final", + "units": 2, + "activation": "softplus" + } + } +} diff --git a/tutorials/latest/train_model/train_micro.sh b/tutorials/latest/train_model/train_micro.sh new file mode 100755 index 0000000..3c334ee --- /dev/null +++ b/tutorials/latest/train_model/train_micro.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +westminster_train_folds.py -e borzoi_py310 -f 2 -c 1 -q rtx4090 -o micro_models params_micro.json ../make_data/data/hg38 diff --git a/tutorials/latest/train_model/train_mini.sh b/tutorials/latest/train_model/train_mini.sh new file mode 100755 index 0000000..2cc5aa4 --- /dev/null +++ b/tutorials/latest/train_model/train_mini.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +westminster_train_folds.py -e borzoi_py310 -f 2 -c 1 -q rtx4090 -o mini_models params_mini.json ../make_data/data/hg38 diff --git a/tutorials/legacy/interpret_sequence/README.md b/tutorials/legacy/interpret_sequence/README.md new file mode 100644 index 0000000..1ac18dd --- /dev/null +++ b/tutorials/legacy/interpret_sequence/README.md @@ -0,0 +1,3 @@ +## Interpretation + +Todo. diff --git a/tutorials/legacy/interpret_sequence/explore_grads_liver_CFHR2.ipynb b/tutorials/legacy/interpret_sequence/explore_grads_liver_CFHR2.ipynb new file mode 100644 index 0000000..38b5c04 --- /dev/null +++ b/tutorials/legacy/interpret_sequence/explore_grads_liver_CFHR2.ipynb @@ -0,0 +1,328 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7030e9ad", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import h5py\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from scipy.ndimage import gaussian_filter1d\n", + "\n", + "from vis_helpers import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3bcaea3d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scores_hyp.shape = (1, 1, 524288, 4)\n", + "scores.shape = (1, 1, 524288, 4)\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Load scores for the selected set of GTEx tissues (grad)\n", + "\n", + "import gc\n", + "\n", + "seqs = None\n", + "strands = None\n", + "chrs = None\n", + "starts = None\n", + "ends = None\n", + "genes = None\n", + "\n", + "all_scores_hyp = []\n", + "all_scores = []\n", + "\n", + "gtex_tissues = ['liver']\n", + "\n", + "#Load score file\n", + "score_file = h5py.File('../../../examples/saved_models/gtex_CFHR2/scores_f3c0.h5', 'r')\n", + "\n", + "#Get scores and onehots\n", + "scores = score_file['grads'][()][..., 0]\n", + "seqs = score_file['seqs'][()]\n", + "\n", + "#Get auxiliary information\n", + "strands = score_file['strand'][()]\n", + "strands = np.array([strands[j].decode() for j in range(strands.shape[0])])\n", + "\n", + "chrs = score_file['chr'][()]\n", + "chrs = np.array([chrs[j].decode() for j in range(chrs.shape[0])])\n", + "\n", + "starts = np.array(score_file['start'][()])\n", + "ends = np.array(score_file['end'][()])\n", + "\n", + "genes = score_file['gene'][()]\n", + "genes = np.array([genes[j].decode().split(\".\")[0] for j in range(genes.shape[0])])\n", + "\n", + "#Append hypothetical scores\n", + "all_scores_hyp.append(scores[None, ...])\n", + "\n", + "#Append input-gated scores\n", + "all_scores.append((scores * seqs)[None, ...])\n", + "\n", + "#Collect garbage\n", + "gc.collect()\n", + "\n", + "#Collect final scores\n", + "scores_hyp = np.concatenate(all_scores_hyp, axis=0)\n", + "scores = np.concatenate(all_scores, axis=0)\n", + "\n", + "print(\"scores_hyp.shape = \" + str(scores_hyp.shape))\n", + "print(\"scores.shape = \" + str(scores.shape))\n", + "\n", + "score_file = None\n", + "\n", + "#Collect garbage\n", + "gc.collect()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "955bf762", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-- Example = 0 --\n", + " - ENSG00000080910(+)\n", + " - chr1:196692638-197216926\n", + " -- min_val = -1.719\n", + " -- max_val = 3.385\n", + " - (Gradient score profiles per tissue) - \n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " - (Attribution at position of Max positive differential saliency) -\n", + " - max_pos (rel) = 251085\n", + " - max_pos (abs) = 196943723\n", + " - chr1:196943627-196943819\n", + " - y_min = -1.78648438\n", + " - y_max = 3.45445312\n", + "liver\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------\n", + "\n" + ] + } + ], + "source": [ + "#Enumerate and visualize attributions; liver example CFHR2\n", + "\n", + "save_index = []\n", + "\n", + "#Visualization parameters\n", + "logo_width = 192\n", + "\n", + "top_n = 1\n", + "\n", + "use_gaussian = True\n", + "min_padding = 65536\n", + "gaussian_sigma = 8\n", + "local_window = 1024\n", + "\n", + "main_tissue_ix = 0\n", + "\n", + "tissue_colors = ['darkblue']\n", + "\n", + "#Loop over examples\n", + "for example_ix in range(top_n) :\n", + " \n", + " print(\"-- Example = \" + str(example_ix)+ \" --\")\n", + " \n", + " print(\" - \" + genes[example_ix] + \"(\" + str(strands[example_ix]) + \")\")\n", + " print(\" - \" + chrs[example_ix] + \":\" + str(starts[example_ix]) + \"-\" + str(ends[example_ix]))\n", + "\n", + " #Grad analysis\n", + " \n", + " #Calculate min and max scores globally (for scales)\n", + " min_val = np.min(scores[:, example_ix, ...])\n", + " max_val = np.max(scores[:, example_ix, ...])\n", + " \n", + " print(\" -- min_val = \" + str(round(min_val, 4)))\n", + " print(\" -- max_val = \" + str(round(max_val, 4)))\n", + " \n", + " max_abs_val = max(np.abs(min_val), np.abs(max_val))\n", + "\n", + " min_val -= 0.1 * max_abs_val\n", + " max_val += 0.1 * max_abs_val\n", + "\n", + " print(\" - (Gradient score profiles per tissue) - \")\n", + " \n", + " #Gradient profiles across input sequence\n", + " f, ax = plt.subplots(len(gtex_tissues), 1, figsize=(8, len(gtex_tissues) * 1.5))\n", + " \n", + " if len(gtex_tissues) == 1 :\n", + " ax = [ax]\n", + "\n", + " #Loop over tissues\n", + " for tissue_ix in range(len(gtex_tissues)) :\n", + "\n", + " #Get tissue scores\n", + " score = scores[tissue_ix, example_ix, ...]\n", + "\n", + " l1 = ax[tissue_ix].plot(np.arange(seqs.shape[1]), np.sum(score, axis=-1), linewidth=1, linestyle='-', color=tissue_colors[tissue_ix], label=gtex_tissues[tissue_ix])\n", + " \n", + " plt.sca(ax[tissue_ix])\n", + " \n", + " plt.xlim(0, seqs.shape[1])\n", + " plt.ylim(min_val, max_val)\n", + " \n", + " plt.legend(handles=[l1[0]], fontsize=8)\n", + " \n", + " plt.yticks([], [])\n", + " plt.xticks([], [])\n", + " \n", + " plt.sca(ax[0])\n", + " plt.title(\"Gradient Saliency for gene = '\" + genes[example_ix] + \"' (\" + str(strands[example_ix]) + \")\", fontsize=8)\n", + " \n", + " plt.sca(ax[len(gtex_tissues)-1])\n", + " plt.xlabel(chrs[example_ix] + \":\" + str(starts[example_ix]) + \"-\" + str(ends[example_ix]), fontsize=8)\n", + " \n", + " plt.sca(plt.gca())\n", + " plt.tight_layout()\n", + " \n", + " plt.show()\n", + "\n", + " #Apply gaussian filter\n", + " smooth_score = np.sum(scores[main_tissue_ix, example_ix, ...], axis=-1)\n", + " if use_gaussian :\n", + " smooth_score = gaussian_filter1d(smooth_score.astype('float32'), sigma=gaussian_sigma, truncate=2).astype('float16')\n", + " \n", + " #Calculate min/max positions and (differential) values\n", + " max_pos = np.argmax(smooth_score[min_padding:-min_padding]) + min_padding\n", + "\n", + " print(\" - (Attribution at position of Max positive differential saliency) -\")\n", + "\n", + " print(\" - max_pos (rel) = \" + str(max_pos))\n", + " print(\" - max_pos (abs) = \" + str(starts[example_ix] + max_pos))\n", + " \n", + " #Visualize contribution scores\n", + " plot_start = max_pos - logo_width // 2\n", + " plot_end = max_pos + logo_width // 2\n", + " \n", + " print(\" - \" + chrs[example_ix] + \":\" + str(starts[example_ix] + max_pos - logo_width // 2) + \"-\" + str(starts[example_ix] + max_pos + logo_width // 2))\n", + "\n", + " #Logo min/max value across tissues\n", + " min_logo_val = np.min(scores[:, example_ix, plot_start:plot_end, :])\n", + " max_logo_val = np.max(scores[:, example_ix, plot_start:plot_end, :])\n", + "\n", + " max_abs_logo_val = max(np.abs(min_logo_val), np.abs(max_logo_val))\n", + "\n", + " min_logo_val -= 0.02 * max_abs_logo_val\n", + " max_logo_val += 0.02 * max_abs_logo_val\n", + "\n", + " print(\" - y_min = \" + str(round(min_logo_val, 8)))\n", + " print(\" - y_max = \" + str(round(max_logo_val, 8)))\n", + "\n", + " #Loop over tissues\n", + " for tissue_ix in range(len(gtex_tissues)) :\n", + " print(gtex_tissues[tissue_ix])\n", + "\n", + " #Get tissue-specific scores\n", + " score = scores[tissue_ix, example_ix, plot_start:plot_end, :]\n", + "\n", + " #Plot scores as sequence logo\n", + " plot_seq_scores(\n", + " score,\n", + " y_min=min_logo_val,\n", + " y_max=max_logo_val,\n", + " figsize=(8, 1),\n", + " plot_y_ticks=False,\n", + " )\n", + " \n", + " print(\"--------------------\")\n", + " print(\"\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67a3cf9d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/legacy/interpret_sequence/explore_polya_grads_CD99.ipynb b/tutorials/legacy/interpret_sequence/explore_polya_grads_CD99.ipynb new file mode 100644 index 0000000..a4f3a1c --- /dev/null +++ b/tutorials/legacy/interpret_sequence/explore_polya_grads_CD99.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7030e9ad", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import h5py\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from scipy.ndimage import gaussian_filter1d\n", + "\n", + "from vis_helpers import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "534495a0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scores.shape = (1, 524288, 4)\n" + ] + } + ], + "source": [ + "#Load scores\n", + "\n", + "score_file = h5py.File('../../../examples/saved_models/gtex_CD99/scores_f3c0.h5', 'r')\n", + "\n", + "scores = score_file['grads'][()][:, :, :, 0]\n", + "seqs = score_file['seqs'][()][:]\n", + "genes = score_file['gene'][()][:]\n", + "genes = np.array([genes[j].decode() for j in range(genes.shape[0])])\n", + "strands = score_file['strand'][()][:]\n", + "strands = np.array([strands[j].decode() for j in range(strands.shape[0])])\n", + "\n", + "#Input-gate the scores\n", + "scores = scores * seqs\n", + "\n", + "print(\"scores.shape = \" + str(scores.shape))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4dcb8667", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-- 0 (+) --\n", + " - gene_id = 'ENSG00000002586.20\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAABZCAYAAACjWLKDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAUYElEQVR4nO3df3iVdf3H8de9s8F2xjZRNpU2EWEwGBgy+WEQQVEpRlBgmaSGmJpemeZlZZfiF5LiMiuN0EvEXFj5o4QK+6FCCRGMkCCFSQwV25i4IcJ+s+2c8/3jzb37nLGNcc6ZY+v5uK5znXM+5/7xuT/35/7x/nzu+z5OKBQKCQAAAABikNDdGQAAAADQ8xFYAAAAAIgZgQUAAACAmBFYAAAAAIgZgQUAAACAmBFYAAAAAIgZgQUAAACAmCVGO2IwGFR5ebnS0tLkOE488wQAAADgNBAKhVRdXa2BAwcqIaHjPomoA4vy8nLl5OREOzoAAACAHqK0tFTZ2dkdDhN1YJGWltYyk/T09GgnAwAAAOA0VVVVpZycnJZz/45EHVi4lz+lp6cTWAAAgBPU1tbq/PPPlyTt379fqc8+K/Xpo9rZsyPTU1O7L5MAOqUztz5EHVgAAACczKFDh7wv111n7zU1kekAegWeCgUAAAAgZvRYAAAAoMcIBAJqamrq7mz0OklJSfL5fDFNg8ACAAAAPUJNTY3KysoUCoW6Oyu9juM4ys7OVr9+/aKeBoEFAAAATnuBQEBlZWXy+/3KzMzkf9TiKBQKqbKyUmVlZcrNzY2654LAAgAAAKe9pqYmhUIhZWZmKiUlpbuz0+tkZmZq//79ampqIrAAAACnl4SEBF188cUtn0+WDnQGPRVdIx7lSmABAAC6REpKirZt29bpdAA9G80EAAAAQJSampq0aNEi5eXlKT8/XxdddJFmz56tnTt3xjxtx3FUU1MjSRozZozq6+tjmt6DDz6oioqKmPPVHnosAAAAgCjNnz9fNTU12rJli/r37y9JWrt2rXbv3q0xY8ZEDBsIBKK+fyEegcqDDz6o6dOnKysrK+ZptYXAAgAAdIm6ujqNHDlSklRcXCx/e+l+fztTADpQVyft2dO188jLkzqonyUlJVqzZo1KS0tbggpJmjlzpiSpsLBQTz/9tLKyslRcXKxly5Zpy5Yteuqpp9Tc3KykpCQtW7ZMEyZMkCStXr1a3/3ud9W/f3/NmDEjYl6O46i6ulr9+vVTSUmJbrvtNlVUVKixsVE33nijbr755pbhli5dqtWrV6uiokILFy7U/PnztXjxYpWXl2vu3LlKTk5WYWHhCYFPrAgsAABAlwiFQnr77bdbPp8sHTgle/ZIBQVdO4/t26WxY9v9eceOHRo6dKjOPPPMdofZtGmTduzYodzcXEnS0KFD9c1vflOSVFRUpAULFmjXrl2qqKjQV7/6VW3evFnDhw/X/fff3+b0AoGArrrqKj355JPKy8tTXV2dJk6cqIkTJ2rs8bwmJydr69atev311zV+/HhdffXVWrhwoX7+85/rt7/9rUaNGhVtiXSIwAIAAAA9T16enfh39TxOIvxpSm+88YbmzJmj+vp6TZkyRZMmTdLkyZNbggrJgpElS5bovffeU2JiooqLi9XY2KiioiKNHTtWw4cPlyTdcMMN+va3v33C/P7zn/9o9+7duvLKK1vSqqurVVxc3BJYzJs3T5I0YsQIJSYm6uDBg8rOzo6uDE4BgQUAAAB6Hr+/w96ED8JFF12kkpISvf/+++rfv7+GDBminTt3qrCwUM8//7wkRfyTdWNjo+bMmaOXX35ZBQUFqqqqUkZGhhobGzvdexcKhTRgwIAO77lITk5u+ezz+dTc3BzdAp4ingoFAAAARCE3N1ezZs3SggULdOTIkZb02traNodvaGhQU1OTcnJyJEnLli1r+e2SSy7Rjh07tHfvXknSypUr25zG8OHD5ff7tWrVqpa0ffv26fDhwyfNb3p6uo4ePXrS4aJFYAEAAABEqbCwUKNHj9aECRM0cuRITZo0SevWrdOdd955wrDp6elavHixxo8frylTpqhv374tv2VlZWnFihWaOXOmPvKRj7T755GJiYlau3atnn32WV144YXKz8/X9ddf36lH0d56662aP3++xowZE5enTLXmhKK8a8rtujl69KjS09PjnS8AANDD1dbWtlwGUlNTo9Tjn2traiLTU1O7LY/oORoaGvTWW29p8ODBEZf6ID7aK99TOefnHgsAANAlHMdpeaxs+A2u7aUD6NkILAAAQJfw+/3avXt3p9MB9GzcYwEAAIAeg/8+6RrxKFd6LAAAAHDaS0pKkuM4qqysVGZmJpfRxVEoFFJlZaUcx1FSUlLU0yGwAAAAXaKurk7jxo2TJG3btk3+9tL9/namAHh8Pp+ys7NVVlam/fv3d3d2eh3HcZSdnS2fzxf1NAgsAABAlwiFQiouLm75fLJ04GT69eun3NxcNTU1dXdWep2kpKSYggqJwAIAAHSF556Thg7t7lygF/L5fDGfAKNrEFgAAID4mztXSkvr7lwA+ADxVCgAANA1qqu7OwcAPkAEFgAAAABiRmABAAAAIGbcYwEAALqEI2nQoEH2Oew/BxzHaTMdQM9GYAEAALqEX2rz/wb8fj//QwD0QlwKBQAAACBmBBYAAAAAYkZgAQAAukS9pHHjxmncuHGqr6/30uvr20wH0LNxjwUAAOgSQUmvvPKKfQ4GvfRgsM10AD0bPRYAAAAAYkZgAQAAACBmBBYAAAAAYkZgAQAAACBmBBYAAAAAYsZToQAAQJcZMGDAKaUD6LkILAAAQOz+/ndp0iQpwbsYIlVSZWXlCYOmpqa2mQ6gZ+NSKAAAEJtHH5WmTJEeeqi7cwKgGxFYAACA2Nx3n71v2dK9+QDQrQgsAABAbNx/z/b5IpLrJU2dOlVTp05VfX29l15f32Y6gJ6NeywAIBa7dknDhkl9+nR3ToDO27ZNOuMMKTc3PtMLBOzdvb+itFSSFJS0YcMG+xx2mVQwGPTS3aDE9dJL0uHD0he/eOJ83n1XqqyURo2KT757u8ZG6W9/kz796e7OCf5H0GMBANE6dkwaPVr62tfiM73NmyXHkY4ejc/0eoODB6WmJvt85IiVz7e+JYVC0qZN3Zq1kyoqkoqLuzsXZutW6e23ve/jx1tAHK177pHWr/e+Hztm76GQnfifd96J49x1l/e5Xz/v88aNUmamtHOnnQh/6lPSlVfatFobPdpe6JylS6VLL5X27pU2bGi7TLtaVZV0uvVKBYMWcCHunFAoulpWVVWljIwMHX3kEaWnpFgrhePYj+4k162zA+TcubYSHccbRpKqq62iX3758dw4Nm5VlZSe7g3rjhs+7dbZducfCtkrfF47d0o5OdJZZ9lvDQ12AP/EJ6RVq6QrrpCSkyPz4DjSK69IAwdK55xjv117rb0vWiSVl9v0Roxov5DCl7W99GDQDo4ZGVLfvt6Ofv16aeJEye+PHPfAAds5P/GEVFtr47VWXS3t328tRpMnS2lpkeXVej24v23fbjfe3XijfZ88OfJ3x7Gye+opaf58r6zd38vK7L2uzobNy/OWsbTU8tVWeYWv2127pAcekL7zHSk/35bx5puln/3MOxA5jnT11dLgwbazOnjQfj9yxNbLxInSiy9K06ZFtiK7ea2qklavtmXYuFFauVL6xS+8cqmvl2644cR8StL990tZWVbfjh2zdTF7tnTokDRunLWy5edLd9xh9WrFisjxZ82SZs6Urr/evk+bZuMNHiylpNj427dLv/qV9NGP2nCPPSYVFEjPPy+98YY3rYEDbXldX/+69M9/2glENFatityGNm6U9u2zspg3L3LYCROkpCSru+efL2Vneyd5I0ZIr78ujRlj2164hx6yOvuVr0i33mqtpU88Id12m/T++9Ltt0vXXCO9847NMxiUfv97ac0a6aabbPizz5YSE+2EbfFiafly6ZZbLA9uHXTzeMst3vYsRdb7xx6zZbzmGmnqVNtX9e9vdfyPf7STl7w8rzzclthjx2x/UVJi82/LokWWz5tuspbVm26Sfv1ry9PYsVZ/T9WXvmTbXrhLL5W+8AXpuusi0z/2Mdu3xiInR/re9+zz5s1Wly+7zMrdPUGdOlV6+eXI8WbPtu19+/a2p3vddVa2P/2p9NZb0g9+4J1wLl9urei/+53V5TPPlHbsiG05YnXHHdKPfhS/6T35pLf+w5c9N9fqVGdNmybNmWPb4cKFVm/jGZC23p6iVCvJDSFqZE+J6ij9tDBggO3TJbsh/ayzbB90KsaNs16hzrjsMunPf45Mu/xy2w/l51t+Yt2eO1JQ0P72Gm7BAum112zbvOwy20dXVdkx2nXXXXY5nHvPzVVX2b7PNXOmtHZtx/PJyrL98gMPeGmzZtmx4O677fzIvZfn4YftPKm01LaBJUss/eMft/33ww+ffLn697d8rVolDRlix9nBg23b2rvX9rOhkPTCCzb8o4/a8Tr8HKj1+VB76SUl0o9/7M378cftePeHP1g5rlol/fWv9tuiRdKgQXa89PnsWPl//ye9+Wb79cs9hrf2xBO2PP/6V+T8Jenee6ULLrDPa9ZYkP/uu9L06bYP/vKXVSUpQ9LRo0eVnp7eYXHGHlhI6ngWAADgf1GPDCyAeHIbLR3Hu2SwhzmVwCL2eywOHbLehdbXSIZClhYKWaTVXut9MOi1BLbVq+FOSzqx1dEdNhiMnH9bEVv4NNyVm5AQOf/WWrd07ttnrbM+n40fClnLaXvjnkp6+HKH57/1ModC1mLqtsS3V1Zu2Scmeq2tblm1lQd3Ou602ysXt+zCb9AL31hal334cgUClp+2ektcTU3Ws5Ge7s2/udkrZ3f6zc32PSnJW0/usiUkRPZahc8jIcFehw979TYQsGUOb2lobLT0DRustWjUKBuvb19bdndegYDNI7zHzOeznoRzzrHlcZe3ocFaVtyen6SkyB2OO51AQKqosFYydxi3vh07Zi1FS5dai4VbJs3NNmwgYC1IjY1WZklJXo/VgQNe75vjeMPX1loLfFJSZDklJlrvk89n801Ksve+fb3lray01p7mZuslKiiQPvQhb5t05xMMWk+Q3x+5/Rw7Zj2Dkyfb96YmL19uHXPL2d2PuOu/ttZaqUaP9sr5vfek1FRbn4mJNk7r+uhy8+huI+G9NW4ZhG934fXDLfM335Q+/GG7LOT222353GXo08e7PESy5U9J8b67y1Nfb3lISPB6TiUrz/HjbTnT0rxesuXL7TKRzEybRmKiVw+LiqxXxF22UMjW4f79ls+0NC8vdXW2/05MtFapxESppsbq3rBhXtm5r02brM659WLJEukb37BxDx2yVsm0NOvh8vmsjldVWU+jW7fc/LrbemOj1Se3boTvn9z5r1ljLWf9+kkzZngtt1dcIf3mN1bOr71mPR1z5lg+58+3Xs+sLK+eVVdLv/yl9VZOmyZdfLH0mc9YfqqrrZwdx5YnOdnG8fks724daW62ZUpN9bb1/HzpJz+xcq+stJ7DF1+0ZXvkESuzkSNtOn6/rcfKSpvfBRdITz9tvU6OY/NparLXm2/atrFypfUcDRli+QoGrczcV0KCt4986SXrKXznHRt/xgxvfd9xh5XLyJH2fdAgaehQuxykstJ6OkeNsv1iebl04YW2DAcOeL3XBw5Yb9WwYVaGd95pPVgHD0qFhYqLFSu8HuN777VW22uvtdby5culZ57xhv3kJ631OCHB1rW77ZWVWQv85z7nbadnnGHbXyBg68Hvl1591ZalpsZbn01Ntk9LSLDesoICb/tcutTqTFaWzcet148+Kl1yifXGnneeV4ePHLHtNBCQ/vEPm+ewYVaXm5vt5TiWr2DQ5l9fb+WekWH1sk8fL1+1tbaPS0uzPFRUWH0991yr1888Y+vo8cdteUtKrCV80SJb7+5leevXW2t+RoblwT3+JSRYWVRW2nres8d6zaurrRW/oMCGf+MNqzvuPnL8eLs8ce9er8ciFLJlSU729jNpafbu7p+2brXpFRTYPiUlxYZpbPSO7e4xxj12u8eFPXts+EGDvHwkJNh5WnOz9TgEg/bZ57Plcvd/DzxgLf+zZ9s+wV33DQ1WFu7xyeezXkD3apfExMhjt3TiMbz1uVxbamutTDIyrHzc44m7bC+8YD3bmZn2PRCwZXHz5u43m5u942NRkW2n7rqsr7eyu+8+m8/3v+/VseZmKzt3fJ/Pm7fjWF1zz3Mk673YvdvqfifE3mPRiegFAHAaeO21yEsue5pFi+yEQLIT/7/8RfrsZ73fX33VAqji4o4vU0X8jRkj/fvfdrJSXt5yj0Wneizuvlup991nl8Lm53snZIcP20kl4mP9eguAw+9vATrhVM75CSwAAD3DsWPWSzFvXvutgege//2v3QfmXm9/fP3USso6PkiFwgKLc89V1jvvWHpNjVJTwy6Gan1PJYBuRWABAAC6z+jR1gORktL2E4H27vV6zlqfhgwebJfvEVgAp4VTOefnfywAAEB8tb7vsrX27k+U7Kk4GzfGNz8APhAEFgAAIL7a6m0oKrLHgUuRD4tobfBgewHocQgsAABAfB3vsWgIBjVn+nQpENBzZ58t97lnDY6jOcc/P9fQoOTwJ6IB6LEILAAAQHwdDywCkv60bp19dv9BXVLgjDP0J/dzD322P4ATtfMHDgAAAFFq61Ko8P9xAdArEVgAAID4+uEP7T38fyj4Twqg1yOwAAAA8TV7tvTcc/anbAD+Z3CPBQAAiL/Pf16qrY1Mu+ce6cCB7skPgC5HYAEAAD4Yixfbe+uAA0CvEHVg4f5hd1VVVdwyAwAAeo/asACiqqqq5QlQ7aUDOP245/qhth7K0IoT6sxQbSgrK1NOTk40owIAAADoQUpLS5Wdnd3hMFEHFsFgUOXl5UpLS5PjOFFlEAAAAMDpKxQKqbq6WgMHDlRCQsfPfYo6sAAAAAAAF4+bBQAAABAzAgsAAAAAMSOwAAAAABAzAgsAAAAAMSOwAAAAABAzAgsAAAAAMSOwAAAAABAzAgsAAAAAMSOwAAAAABAzAgsAAAAAMSOwAAAAABAzAgsAAAAAMft/1LaIsEggB2EAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize polya-centric gradient for gene(s)\n", + "\n", + "#Find position of max saliency\n", + "max_poses = np.argmax(np.sum(scores, axis=-1), axis=-1)\n", + "\n", + "#Loop over genes\n", + "for example_ix in range(scores.shape[0]) :\n", + " \n", + " #Get max pos\n", + " max_pos = max_poses[example_ix]\n", + " \n", + " #Only visualize genes that are not extremely long\n", + " if max_pos >= 150000 and max_pos < seqs.shape[1] - 150000 :\n", + " \n", + " print(\"-- \" + str(example_ix) + \" (\" + str(strands[example_ix]) + \") --\")\n", + " print(\" - gene_id = '\" + str(genes[example_ix]))\n", + "\n", + " #Plot scores\n", + " f = plt.figure(figsize=(8, 1))\n", + "\n", + " #Annotate 4kb window\n", + " plot_start = max_pos - 2000\n", + " plot_end = max_pos + 6 + 2000\n", + "\n", + " l1 = plt.plot(np.arange(seqs.shape[1]), np.sum(scores[example_ix, ...], axis=-1), linewidth=1, linestyle='-', color='red', label='Gradient')\n", + "\n", + " plt.axvline(x=plot_start, color='black', linestyle='--')\n", + " plt.axvline(x=plot_end, color='black', linestyle='--')\n", + "\n", + " plt.xlim(0, seqs.shape[1])\n", + " \n", + " plt.legend(handles=[l1[0]], fontsize=8)\n", + " \n", + " plt.yticks([], [])\n", + " plt.xticks([], [])\n", + "\n", + " plt.tight_layout()\n", + "\n", + " plt.show()\n", + " \n", + " #Visualize contribution scores\n", + " plot_start = max_pos - 100\n", + " plot_end = max_pos + 6 + 100\n", + " \n", + " #Rev-comp scores if gene is on minus strand\n", + " if strands[example_ix] == '-' :\n", + " plot_end = seqs.shape[1] - (max_pos - 100)\n", + " plot_start = seqs.shape[1] - (max_pos + 6 + 100)\n", + " \n", + " #Plot sequence logo\n", + " visualize_input_gradient_pair(\n", + " scores[example_ix, :, :] if strands[example_ix] == '+' else scores[example_ix, ::-1, ::-1],\n", + " np.zeros(scores[example_ix, ...].shape),\n", + " plot_start=plot_start,\n", + " plot_end=plot_end,\n", + " save_figs=False,\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d7aefe0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/legacy/interpret_sequence/explore_splice_grads_GCFC2.ipynb b/tutorials/legacy/interpret_sequence/explore_splice_grads_GCFC2.ipynb new file mode 100644 index 0000000..cc22f72 --- /dev/null +++ b/tutorials/legacy/interpret_sequence/explore_splice_grads_GCFC2.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7030e9ad", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import h5py\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from scipy.ndimage import gaussian_filter1d\n", + "\n", + "from vis_helpers import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "534495a0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "scores.shape = (1, 524288, 4)\n" + ] + } + ], + "source": [ + "#Load scores\n", + "\n", + "score_file = h5py.File('../../../examples/saved_models/gtex_GCFC2/scores_f3c0.h5', 'r')\n", + "\n", + "scores = score_file['grads'][()][:, :, :, 0]\n", + "seqs = score_file['seqs'][()][:]\n", + "genes = score_file['gene'][()][:]\n", + "genes = np.array([genes[j].decode() for j in range(genes.shape[0])])\n", + "strands = score_file['strand'][()][:]\n", + "strands = np.array([strands[j].decode() for j in range(strands.shape[0])])\n", + "\n", + "#Input-gate the scores\n", + "scores = scores * seqs\n", + "\n", + "print(\"scores.shape = \" + str(scores.shape))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fd114809", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-- 0 (-) --\n", + " - gene_id = 'ENSG00000005436.14\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAABZCAYAAACjWLKDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAVi0lEQVR4nO3de3BU5f3H8c8SUiEJQdRgocmolRQKarkUQXDQqq2jqMhQa4tTLUO9o6WodawIir8qo61F4/RqNd7wjlYYbJFWrArhIgElEQlqlBgkgUAum4Qku8/vj8eTs5vsJtlLspvwfs3snLPPOec5z7ns7vPd5znneIwxRgAAAAAQg36JLgAAAACA3o/AAgAAAEDMCCwAAAAAxIzAAgAAAEDMCCwAAAAAxIzAAgAAAEDMCCwAAAAAxKx/tAv6/X6Vl5dr0KBB8ng88SwTAAAAgCRgjFFtba2GDx+ufv06bpOIOrAoLy9XTk5OtIsDAAAA6CX27Nmj7OzsDueJOrAYNGhQ60oyMzOjzQYAAABAkqqpqVFOTk5r3b8jUQcWTvenzMxMAgsAQNx4vV6deOKJkqTS0lKlP/OMNGSIvNOnB6enpyeukABwhOnKpQ9RBxYAAHSX/fv3u2+uu84O6+qC0wEASYW7QgEAAACIGS0WAAAA6DV8Pp+am5sTXYw+JzU1VSkpKTHlQWABAACAXqGurk5lZWUyxiS6KH2Ox+NRdna2MjIyos6DwAIAAABJz+fzqaysTGlpacrKyuI5anFkjFFlZaXKysqUm5sbdcsFgQUAAACSXnNzs4wxysrK0sCBAxNdnD4nKytLpaWlam5uJrAAAPQN/fr10/e///3W8c7SARxZaKnoHvHYrwQWAICkMnDgQG3evLnL6QCA5MBfPgAAAECUmpubdc8992jUqFEaM2aMxo0bp0svvVTbtm2LOW+Px6O6ujpJ0tixY9XQ0BBTfsuWLVNFRUXM5QqHFgsAAAAgSnPmzFFdXZ02bNigIUOGSJJWrlypoqIijR07Nmhen88X9fUL8QhUli1bpvPOO09Dhw6NOa9QCCwAAEmlvr5eo0ePliQVFxcrLVx6WlqYHAAcEerrpZ07u3cdo0ZJHXzXlJSU6NVXX9WePXtagwpJuvjiiyVJ+fn5ev755zV06FAVFxcrLy9PGzZs0HPPPaeWlhalpqYqLy9PkyZNkiStWLFCv/3tbzVkyBBdeOGFQevyeDyqra1VRkaGSkpKNH/+fFVUVKipqUnXXnutbrjhhtb5li5dqhUrVqiiokKLFi3SnDlztGTJEpWXl+vHP/6xBgwYoPz8/HaBT6wILAAAScUYo88//7x1vLN0AEeonTulCRO6dx3vvy+NHx92cmFhoUaMGKFjjjkm7DzvvvuuCgsLlZubK0kaMWKEFixYIEkqKCjQ3LlztWPHDlVUVOjqq6/W+vXrNXLkSD3wwAMh8/P5fJo9e7aefvppjRo1SvX19Zo8ebImT56s8V+XdcCAAdq4caM++ugjnX766fr5z3+uRYsW6fHHH9fLL7+sU045Jdo90iECCwAAAPQ+o0bZin93r6MTgXdT+uSTTzRr1iw1NDRo2rRpmjp1qs4888zWoEKywcjvfvc7HThwQP3791dxcbGamppUUFCg8ePHa+TIkZKka665Rrfffnu79X388ccqKirST3/609a02tpaFRcXtwYWV1xxhSTpu9/9rvr376+vvvpK2dnZ0e2DCBBYAAAAoPdJS+uwNaEnjBs3TiUlJTp48KCGDBmik08+Wdu2bVN+fr5WrVolSUFPsm5qatKsWbO0bt06TZgwQTU1NRo8eLCampq63BJrjNFxxx3X4TUXAwYMaB1PSUlRS0tLdBsYIe4KBQAAAEQhNzdXM2bM0Ny5c3Xo0KHWdK/XG3L+xsZGNTc3KycnR5KUl5fXOu2MM85QYWGhdu3aJUl67LHHQuYxcuRIpaWl6amnnmpN2717t6qqqjotb2ZmpqqrqzudL1oEFgAAAECU8vPzdeqpp2rSpEkaPXq0pk6dqrVr1+q2225rN29mZqaWLFmi008/XdOmTdNRRx3VOm3o0KH629/+posvvlhTpkwJ+yDQ/v37a+XKlXrxxRd12mmnacyYMfrlL3/ZpVvR3nzzzZozZ47Gjh0bl7tMteUxUV4B5zTdVFdXKzMzM97lAgAcobxeb2vXgbq6OqV/Pe6tqwtOT09PWBkB9LzGxkZ99tlnOumkk4K6+iA+wu3fSOr8XGMBAEgqHo+n9baygRdFhksHACQHAgugpxUUSFlZ0sknJ7okQFJKS0tTUVFRl9MBAMmBwALoaWecYYfchx8AgIjxHJvuEY/9SmABAACApJeamiqPx6PKykplZWXRJTKOjDGqrKyUx+NRampq1PkQWAAAkkp9fb0mTpwoSdq8ebPSwqWnpYXJAUBflJKSouzsbJWVlam0tDTRxelzPB6PsrOzlZKSEnUeBBYAgKRijFFxcXHreGfpAI4cGRkZys3NVXNzc6KL0uekpqbGFFRIBBYAAADoRVJSUmKuAKN78IA8AAAAADEjsAAAAAAQMwILAAAAADEjsAAAAAAQMy7eBgAkFY/HoxNOOKF1vLN0AEByILAAACSVtLS0kPeoD5cOAEgOdIUCAAAAEDMCCwBActm1K9ElAABEgcACSJS33kp0CdDX+HzS0qW9u2K+YYMaRo7UxNxcTZw4UQ0NDa2TGhoaNHHixHbpYXm9Ek/nBYAeQ2ABJMo55yS6BH3P1q3S3XcnuhSJc/fd0h13SCNHJrok0du1S35JW3bv1pYtW+R/6aXWSf5Nm7Rlyxab7vd3nldGhnThhd1XVgBAEAKLI8XatfbfTPR99fXSmDFH5j+1EyZI99yT6FIkztatiS5B7JYuDX5/1VXu+NlnR57f2rUxFQcA0HUEFvH2r39JL7+c6FIE++gj6Yc/lH72s+SqbOblSa+9luhS9Kye2N4zzpCKi6Ubbuj+dSG59OY/D/74R+mdd6SqqsiXLSqSDhwIP72xMXR6c7OUny8ZE/k6AQDtEFjE2wUXSJddluhSBPv0Uzt86SVp2LDElsVhjHTzzdLMme2n/ehH0qxZ7dO93u4vV3dZskR67LHQ2xtvlZV2WFsb33xLSqT33gtOe/11yeOJz7pKS6VLLpGammLPKxl98on02Wfdu459+9zx116Tqqvbz1NWJhUU2Ar86tU2LdR8PW3BAmnaNKmiIvJlTzlFmjw58uUefVSaM0d6++3IlwUAtENg0RX79tnKU6J+fP1+W+mK1kUXueMd/avXkzr6Z/XNN6UVK4LTVq+2/aV37+7ecnVFY2PkLT+LF0tXX90+/ZJLurZ8U5N08KANEu+6q+N59+61w1WrIitjKMZIhw/b8e98RzrzzODpM2bY4Y4dduj322UaG20FNhI33iitXNl3KnnXXy+9+KL7fsQI6dvf7tqymzbZ7xxnv3ZV4D/zM2dKkya57z/+2J4/OTm2VesHP5CmT5fWrJGOPtr+6x+otlb6+9+D06K9GPr55+327NxpP8tOC8GhQ/b7taYm8jxffz34/e7dNpgK1foQrkXCuQA8GQIrAOgLTJSqq6uNJFNdXe0mNjUZ4/O57/1+Y5Yvt+mOL74wprnZDnfvNmbzZvu+udmYvXuNOXw49Ar9fmNKSuzQ73fTAqcHDnfsMObYY425/353nsZGY+67z5j6evve5zPm7beN2bfPmK1b3WULC42RjLnpJruM/VmyL0d5uTFlZXbcmfaXv7jjX35pp0vGXHGFnd/vt9u4b58x771nX2vW2G0uLbXzzphh1+9sp9/v5vnnPxvz4YfuPm5psfvt/vuNue46O8+DD9rtk4xZujS47M7rr381ZssWY6ZMMaaiwt3urVvtsdq+3a7L2U/Ll7vb3txsTFWVMffea8vh9xszfLgxv/61MUVFNr/Ac6HtcamoMOaqq+yxd8rT0GCn+3zBZV6+3Jh//9seMyfttdfceevq2udfXGzLH3huOOV2lnOO/fbtNs8rr7TDiy6y6T6fPSaHD9v0hQuNqay0+3vhQrcsr7/u5uf3G5Ofb9PffNOYQ4eMeeSR0Ps/3Ovss+3wssuMufZaYx5+2Jht2+z+bjvvsGH2XHD238GDdn94vcHzHThgP2sNDcZ8/rk9n7/80k6rqQn9WXKWra11x194wR0vLHTPV+c1a1bw+3PPtcPHH7flvPFGY044wZiNG+0x3rLFmCeftPOcd57dzsDli4qMeeUVY2691Zh//CN42vLlwZ97p+w+nzuP32+P+bp19v28eXaegwfd5f77X3c7neO+YYM9Dz791JivvrLfAXv32vn27rXLVlXZ89vvt+eEMcYMGmTM6tX2s71pk93fTln277f5Ou/XrXPLXVRkz9mHHrLTsrKM+eST9sd7//7g8WHD3PdTpkR2nrV9TZvmjjufk507jcnNtWlPPWXPocBlvvENeywDz5sPPrDT/vMfY/75Tzv+1lt2HzrLDR/uni/O91SIV51k9PWrrqP0W24x5je/aZ/HqlXt0x5+2Jj//c+W+fBhu/4TT7TT/u//bHpDg/0MFRQYc/nl7nHas8eYJ56wx76y0pa/qso9lyorjXnjDXss33nHft+vWWPPo8pKm++779p8nO8tv9+eCxddZMyyZXYf79jR/rurM1VV9nNtjD0fzznHmF/8wv6+BM5TUhK8nN9vyxKoosL9PESipcWWYeNGY9avt+872o5w+bddZtcuY555xv5O9+9vl/vww9B5V1cH/+Y4+Tm/L85ntSuam+1xa7ueUPUT57smlKYm+53z3nvGfPZZ8P7u7Dg7v0OB87e02PO2s+MTrh7VttydaWx06wGRqKqyv4HOPvd67f4MpazMnnN+v1tXjPR4OcJtU1NT+3O9t4n0eyFQ289FFELW+cPwGGNMNAFJTU2NBg8erGpJmXELcwAARzqvpBO/Hi+VlN5JOrrZgAFSerptdWr7qqiwLZXo+9LTpYED3fcejx063W+zstxzoicNGhS+O67HY/9aiMSxx9phRz08jjnG3X7ns+Dsh860LdOQIbZHgmPgQNtLIfPr2vXhw/amLOFkZEipqcF5xMvX+6LG79fggwdVXV2tzMyOa/39418KSd/7nrR9e7dkHTd5ebZJf8GCRJcEABAgXVKon+hw6ehmixa5laFQr8WL47u++fOlZcs6nicrq+sVuVBGj7Y3uUgGzrWPN94oLVzYtWXGjw++C9zixfY6yki36aSTunbt17hx0k9+YsedSrEz3LlTev996YorbFq4bTjtNNvlMtKbTEyYIJ1/vnTffe2nTZwoXX65VF4uPfRQ8PYsWGArxnfeGbzMvffabrebNrXPb/Zs6dRT7fgdd9jh4MH2Zij332/fX3mlPX8CPwOSPR+d8zY93XYdzciw3WIffNCmX3ON7bKakiI98YRN+9Wvgm+TftdddtnMTJu3z+duw/Tpdt0+n91eSbr1Vhv8P/GE7Z78wANuXnPmuOuZN89eVyZJN91k68GOjAyprs6W5bjj3C7Xt91mh42NXb+Ve080i4TU0BB5c2ukbrvNHvJo1NS4442NxuTlhW9Kck4tZ94vvrDjBw8aM26c7VbQ2bY6XZ5WrAif/y23RLwZIX8GjLFdZCRjVq7sPA+v1zbzh+J0SYrEoUN2OG+eMX/4Q/vl16835rnngtP8frcbS6yam+3559izJ/y8Tz5pu8Q4Tfs5OcFdKxz19W6XqgcftGkPP9y1rig7dhhz9dVul5SPP7bdLmbPdvNvu8zAgbZLzIwZ9hgENhu3PdahdLTNgbZutV2zAvN1jld9vT2309OD15mRETqvxsbgLnErVtj5H3nEpj/7bMfldroWhjsXA8voaLtvAn3wge3u012c7lXGGDN0aHA3rc7Mndv5MQwU7txatMh2XQvsqnH77Xaa19t5voHLOXl2pYuC32+7HQUqKLCfpd273bQ773S7EEb62rHD5vHSS/a90yXq6adt+vz57rzvvBO6nOXlxkyf3r67x4QJwft+//7u+b1qabFdx/oKp2tfZzZvjrlrBnqRggL7vYNeK5I6f+ICi75k1SpbAesuV11lf+TWrIl82aam0IEFord9uzEvvtg+vb7eVm6c/s7GdF45WrfOnbeqyl5/EOoHt+1yHVVynHmOPz76bQzlkUds+cJZs8autysVi1D8fmP+9Ce373oonVXu5s2z16okm40b7b754IOuze9cl9DR/g60bZsx3/qWe+ynTAm+hiYePv88uv7WXbF2bWRBRSgVFfbYO39A1NRE/53n9dprkQAAPXyNRRf6WyFGBw7YZs5ly6T+UfReu+ce6YUX7PMsJLfZDt1v/37bZPnNb4ae3tVj8eqrUmGhbY79xjds/85wnH6f559vn6vSk+rqbJMqYrN+vTR1qu2rnJXV9eXmzpUef9w+E2L+/G4rXrdwzltJDY8+qgvmzZMkvSHJ6dXdIOkCSTrrLL3xxhsaGNjfuy2/33Y3kPjOA4AYRFLn755rLBBfxx7r9ouLxuLF9hXww40ectxx8cln5szIn4Fx9NHxWXckCCriY8qU6CrDZ51lA4ujjop/mXqQ3+eTc9PhwMuC/ZJNf/tt+Tu7YLjf13dTT02Ne/kAAKHxHIsjybPP2ifbom+79lo7vP76xJYDPc+pbPf2PxHidZehFSsifxYIACBqBBZHktmz2z/gDD3n+ONDP1E83gYMCB7iyOG0cvTGwKK83L1LSby6Ls2caR/sCADoEQQWQE+59FLpscfc92vXds96nH7lkd7SD72f809/v1741T5smHvtUGCLhXPLRwBA0uuFvz5AL1Raav+NDez7fu653bMu5wL/lpbuyR/JqzcHFpLb0hIYWNx+uzteV9ez5QEARKSX/voAvcwJJ/TcRaS0WBy5evs1FhdeKJ19tnTZZW5aNHfCAwAkBN/YQF8zb559IuukSYkuCXrarFnSk0/ap6/2RoMHS2+9JXm9SktLCzlLuHQAQOLxHAugJzU0SGlp9ja0lZWJLg3QOzgtMDyPAgB6XCR1frpCAYlw6qmJLgEAAEBcEVgAidBb+8ADAACEQWABAEgqjY2Nmj59uqZPn67GxsZO0wEAyYGLtwEAScXn82n16tWt452lAwCSAy0WQE866ijppJOkhQsTXRIAAIC4osUC6En9+kmffproUgAAAMQdLRYAAAAAYkaLBQAguf3+99L69YkuBQCgE7RYAACS2y23SK+8kuhSAAA6EXWLhfPA7pqamrgVBgAAr9fbOl5TU9N6B6hw6QCA7uPU9Z26f0c8pitzhVBWVqacnJxoFgUAAADQi+zZs0fZ2dkdzhN1YOH3+1VeXq5BgwbJw1OEAQAAgD7HGKPa2loNHz5c/fp1fBVF1IEFAAAAADi4eBsAAABAzAgsAAAAAMSMwAIAAABAzAgsAAAAAMSMwAIAAABAzAgsAAAAAMSMwAIAAABAzAgsAAAAAMSMwAIAAABAzAgsAAAAAMSMwAIAAABAzAgsAAAAAMTs/wEbSMf9OrtM2wAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize splice-centric gradient for gene(s)\n", + "\n", + "#Find position of max saliency\n", + "max_poses = np.argmax(np.sum(scores, axis=-1), axis=-1)\n", + "\n", + "#Loop over genes\n", + "for example_ix in range(scores.shape[0]) :\n", + " \n", + " #Get max pos\n", + " max_pos = max_poses[example_ix]\n", + " \n", + " #Only visualize genes that are not extremely long\n", + " if max_pos >= 150000 and max_pos < seqs.shape[1] - 150000 :\n", + " \n", + " print(\"-- \" + str(example_ix) + \" (\" + str(strands[example_ix]) + \") --\")\n", + " print(\" - gene_id = '\" + str(genes[example_ix]))\n", + "\n", + " #Plot scores\n", + " f = plt.figure(figsize=(8, 1))\n", + "\n", + " #Annotate 4kb window\n", + " plot_start = max_pos - 2000\n", + " plot_end = max_pos + 6 + 2000\n", + "\n", + " l1 = plt.plot(np.arange(seqs.shape[1]), np.sum(scores[example_ix, ...], axis=-1), linewidth=1, linestyle='-', color='red', label='Gradient')\n", + "\n", + " plt.axvline(x=plot_start, color='black', linestyle='--')\n", + " plt.axvline(x=plot_end, color='black', linestyle='--')\n", + "\n", + " plt.xlim(0, seqs.shape[1])\n", + " \n", + " plt.legend(handles=[l1[0]], fontsize=8)\n", + " \n", + " plt.yticks([], [])\n", + " plt.xticks([], [])\n", + "\n", + " plt.tight_layout()\n", + "\n", + " plt.show()\n", + " \n", + " #Visualize contribution scores\n", + " plot_start = max_pos - 100\n", + " plot_end = max_pos + 6 + 100\n", + " \n", + " #Rev-comp scores if gene is on minus strand\n", + " if strands[example_ix] == '-' :\n", + " plot_end = seqs.shape[1] - (max_pos - 100)\n", + " plot_start = seqs.shape[1] - (max_pos + 6 + 100)\n", + " \n", + " #Plot sequence logo\n", + " visualize_input_gradient_pair(\n", + " scores[example_ix, :, :] if strands[example_ix] == '+' else scores[example_ix, ::-1, ::-1],\n", + " np.zeros(scores[example_ix, ...].shape),\n", + " plot_start=plot_start,\n", + " plot_end=plot_end,\n", + " save_figs=False,\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d7aefe0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/legacy/interpret_sequence/run_gradients_expr_CFHR2.sh b/tutorials/legacy/interpret_sequence/run_gradients_expr_CFHR2.sh new file mode 100755 index 0000000..7f1e551 --- /dev/null +++ b/tutorials/legacy/interpret_sequence/run_gradients_expr_CFHR2.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +borzoi_satg_gene.py -o ../../../examples/saved_models/gtex_CFHR2 -f 3 -c 0 --rc --untransform_old --track_scale 0.01 --track_transform 0.75 --clip_soft 384.0 -t ../../../examples/targets_gtex_liver.txt ../../../examples/params_pred.json ../../../examples/saved_models ../../../examples/CFHR2_example.gtf diff --git a/tutorials/legacy/interpret_sequence/run_gradients_polya_CD99.sh b/tutorials/legacy/interpret_sequence/run_gradients_polya_CD99.sh new file mode 100755 index 0000000..e1f8b94 --- /dev/null +++ b/tutorials/legacy/interpret_sequence/run_gradients_polya_CD99.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +borzoi_satg_polya.py -o ../../../examples/saved_models/gtex_CD99 -f 3 -c 0 --rc --untransform_old --track_scale 0.01 --track_transform 0.75 --clip_soft 384.0 -t ../../../examples/targets_gtex.txt ../../../examples/params_pred.json ../../../examples/saved_models ../../../examples/CD99_example.gtf diff --git a/tutorials/legacy/interpret_sequence/run_gradients_splice_GCFC2.sh b/tutorials/legacy/interpret_sequence/run_gradients_splice_GCFC2.sh new file mode 100755 index 0000000..9fc75fb --- /dev/null +++ b/tutorials/legacy/interpret_sequence/run_gradients_splice_GCFC2.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +borzoi_satg_splice.py -o ../../../examples/saved_models/gtex_GCFC2 -f 3 -c 0 --rc --untransform_old --track_scale 0.01 --track_transform 0.75 --clip_soft 384.0 -t ../../../examples/targets_gtex.txt ../../../examples/params_pred.json ../../../examples/saved_models ../../../examples/GCFC2_example.gtf diff --git a/tutorials/legacy/interpret_sequence/vis_helpers.py b/tutorials/legacy/interpret_sequence/vis_helpers.py new file mode 100644 index 0000000..00b92ef --- /dev/null +++ b/tutorials/legacy/interpret_sequence/vis_helpers.py @@ -0,0 +1,153 @@ +import sys +import os +import numpy as np + +import matplotlib.pyplot as plt + +import matplotlib.cm as cm +import matplotlib.colors as colors + +import matplotlib as mpl +from matplotlib.text import TextPath +from matplotlib.patches import PathPatch, Rectangle +from matplotlib.font_manager import FontProperties +from matplotlib import gridspec +from matplotlib.ticker import FormatStrFormatter + +#Helper function to draw a letter at a given position +def dna_letter_at(letter, x, y, yscale=1, ax=None, color=None, alpha=1.0): + + fp = FontProperties(family="DejaVu Sans", weight="bold") + globscale = 1.35 + LETTERS = { "T" : TextPath((-0.305, 0), "T", size=1, prop=fp), + "G" : TextPath((-0.384, 0), "G", size=1, prop=fp), + "A" : TextPath((-0.35, 0), "A", size=1, prop=fp), + "C" : TextPath((-0.366, 0), "C", size=1, prop=fp), + "UP" : TextPath((-0.488, 0), '$\\Uparrow$', size=1, prop=fp), + "DN" : TextPath((-0.488, 0), '$\\Downarrow$', size=1, prop=fp), + "(" : TextPath((-0.25, 0), "(", size=1, prop=fp), + "." : TextPath((-0.125, 0), "-", size=1, prop=fp), + ")" : TextPath((-0.1, 0), ")", size=1, prop=fp)} + COLOR_SCHEME = {'G': 'orange',#'orange', + 'A': 'green',#'red', + 'C': 'blue',#'blue', + 'T': 'red',#'darkgreen', + 'UP': 'green', + 'DN': 'red', + '(': 'black', + '.': 'black', + ')': 'black'} + + + text = LETTERS[letter] + + chosen_color = COLOR_SCHEME[letter] + if color is not None : + chosen_color = color + + t = mpl.transforms.Affine2D().scale(1*globscale, yscale*globscale) + \ + mpl.transforms.Affine2D().translate(x,y) + ax.transData + p = PathPatch(text, lw=0, fc=chosen_color, alpha=alpha, transform=t) + if ax != None: + ax.add_artist(p) + return p + +#Function to plot sequence logo +def plot_seq_scores(importance_scores, figsize=(16, 2), plot_y_ticks=True, y_min=None, y_max=None, save_figs=False, fig_name="default") : + + importance_scores = importance_scores.T + + fig = plt.figure(figsize=figsize) + + ref_seq = "" + for j in range(importance_scores.shape[1]) : + argmax_nt = np.argmax(np.abs(importance_scores[:, j])) + + if argmax_nt == 0 : + ref_seq += "A" + elif argmax_nt == 1 : + ref_seq += "C" + elif argmax_nt == 2 : + ref_seq += "G" + elif argmax_nt == 3 : + ref_seq += "T" + + ax = plt.gca() + + for i in range(0, len(ref_seq)) : + mutability_score = np.sum(importance_scores[:, i]) + color = None + dna_letter_at(ref_seq[i], i + 0.5, 0, mutability_score, ax, color=color) + + plt.sca(ax) + plt.xticks([], []) + plt.gca().yaxis.set_major_formatter(FormatStrFormatter('%.3f')) + + plt.xlim((0, len(ref_seq))) + + #plt.axis('off') + + if plot_y_ticks : + plt.yticks(fontsize=12) + else : + plt.yticks([], []) + + if y_min is not None and y_max is not None : + plt.ylim(y_min, y_max) + elif y_min is not None : + plt.ylim(y_min) + else : + plt.ylim( + np.min(importance_scores) - 0.1 * np.max(np.abs(importance_scores)), + np.max(importance_scores) + 0.1 * np.max(np.abs(importance_scores)) + ) + + plt.axhline(y=0., color='black', linestyle='-', linewidth=1) + + #for axis in fig.axes : + # axis.get_xaxis().set_visible(False) + # axis.get_yaxis().set_visible(False) + + plt.tight_layout() + + if save_figs : + plt.savefig(fig_name + ".png", transparent=True, dpi=300) + plt.savefig(fig_name + ".eps") + + plt.show() + +#Function to visualize a pair of sequence logos +def visualize_input_gradient_pair(att_grad_wt, att_grad_mut, plot_start=0, plot_end=100, save_figs=False, fig_name='') : + + scores_wt = att_grad_wt[plot_start:plot_end, :] + scores_mut = att_grad_mut[plot_start:plot_end, :] + + y_min = min(np.min(scores_wt), np.min(scores_mut)) + y_max = max(np.max(scores_wt), np.max(scores_mut)) + + y_max_abs = max(np.abs(y_min), np.abs(y_max)) + + y_min = y_min - 0.05 * y_max_abs + y_max = y_max + 0.05 * y_max_abs + + if np.sum(scores_mut) != 0. : + print("--- WT ---") + + plot_seq_scores( + scores_wt, y_min=y_min, y_max=y_max, + figsize=(8, 1), + plot_y_ticks=False, + save_figs=save_figs, + fig_name=fig_name + '_wt', + ) + + if np.sum(scores_mut) != 0. : + + print("--- Mut ---") + plot_seq_scores( + scores_mut, y_min=y_min, y_max=y_max, + figsize=(8, 1), + plot_y_ticks=False, + save_figs=save_figs, + fig_name=fig_name + '_mut', + ) diff --git a/tutorials/legacy/make_data/Makefile b/tutorials/legacy/make_data/Makefile new file mode 100644 index 0000000..f2dce79 --- /dev/null +++ b/tutorials/legacy/make_data/Makefile @@ -0,0 +1,45 @@ +FASTA_HUMAN=$$BORZOI_HG38/assembly/ucsc/hg38.ml.fa +GAPS_HUMAN=$$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed +UMAP_HUMAN=$$BORZOI_HG38/mappability/umap_k36_t10_l32.bed +BLACK_HUMAN=$$BORZOI_HG38/blacklist/blacklist_hg38_all.bed + +FASTA_MOUSE=$$BORZOI_MM10/assembly/ucsc/mm10.ml.fa +GAPS_MOUSE=$$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed +UMAP_MOUSE=$$BORZOI_MM10/mappability/umap_k36_t10_l32.bed +BLACK_MOUSE=$$BORZOI_MM10/blacklist/blacklist_mm10_all.bed + +ALIGN=$$BORZOI_HG38/align/hg38.mm10.syn.net.gz + +OUT=data + +# mini borzoi configuration +LENGTH=393216 +TSTRIDE=43691 # (393216-2*131072)/3 +CROP=98304 +WIDTH=32 +FOLDS=8 + +AOPTS=--break 2097152 -c $(CROP) --nf 524288 --no 393216 -l $(LENGTH) --stride $(TSTRIDE) -f $(FOLDS) --umap_t 0.5 -w $(WIDTH) +DOPTS=-c $(CROP) -d 2 -f $(FOLDS) -l $(LENGTH) -p 64 -r 16 --umap_clip 0.5 -w $(WIDTH) --transform_old + +all: $(OUT)/hg38/tfrecords/train-0.tfr # $(OUT)/mm10/tfrecords/train-0.tfr + +umap_human.bed: + cat $(UMAP_HUMAN) $(BLACK_HUMAN) | awk 'BEGIN {OFS="\t"} {print $$1, $$2, $$3}' | bedtools sort -i - | bedtools merge -i - > umap_human.bed + +umap_mouse.bed: + cat $(UMAP_MOUSE) $(BLACK_MOUSE) | awk 'BEGIN {OFS="\t"} {print $$1, $$2, $$3}' | bedtools sort -i - | bedtools merge -i - > umap_mouse.bed + +# targets file is already generated in this example +#targets_human.txt targets_mouse.txt: +# ./make_targets.py + +$(OUT)/hg38/sequences.bed $(OUT)/mm10/sequences.bed: umap_human.bed umap_mouse.bed + hound_data_align.py -a hg38,mm10 -g $(GAPS_HUMAN),$(GAPS_MOUSE) -u umap_human.bed,umap_mouse.bed $(AOPTS) -o $(OUT) $(ALIGN) $(FASTA_HUMAN),$(FASTA_MOUSE) + +$(OUT)/hg38/tfrecords/train-0.tfr: $(OUT)/hg38/sequences.bed targets_human.txt + hound_data.py --restart $(DOPTS) -b $(BLACK_HUMAN) -o $(OUT)/hg38 $(FASTA_HUMAN) -u umap_human.bed targets_human.txt + +# no mouse data in this example +#$(OUT)/mm10/tfrecords/train-0.tfr: $(OUT)/mm10/sequences.bed targets_mouse.txt +# hound_data.py --restart $(DOPTS) -b $(BLACK_MOUSE) -o $(OUT)/mm10 $(FASTA_MOUSE) -u umap_mouse.bed targets_mouse.txt diff --git a/tutorials/legacy/make_data/README.md b/tutorials/legacy/make_data/README.md new file mode 100644 index 0000000..035a37d --- /dev/null +++ b/tutorials/legacy/make_data/README.md @@ -0,0 +1,3 @@ +## Data Processing + +Todo. diff --git a/tutorials/legacy/make_data/download_bw.sh b/tutorials/legacy/make_data/download_bw.sh new file mode 100755 index 0000000..239f004 --- /dev/null +++ b/tutorials/legacy/make_data/download_bw.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# download example data from ENCODE (ENCSR000AEL - K562 RNA-seq); 2 replicates + +# define ENCODE ID +ENC_ID='ENCSR000AEL' + +# define remote urls +URL_P_REP1='https://www.encodeproject.org/files/ENCFF980ZHM/@@download/ENCFF980ZHM.bigWig' +URL_M_REP1='https://www.encodeproject.org/files/ENCFF533LJF/@@download/ENCFF533LJF.bigWig' + +URL_P_REP2='https://www.encodeproject.org/files/ENCFF335LVS/@@download/ENCFF335LVS.bigWig' +URL_M_REP2='https://www.encodeproject.org/files/ENCFF257NOL/@@download/ENCFF257NOL.bigWig' + +# define ENCODE file IDs +FILE_P_REP1='ENCFF980ZHM' +FILE_M_REP1='ENCFF533LJF' + +FILE_P_REP2='ENCFF335LVS' +FILE_M_REP2='ENCFF257NOL' + +# create folder for bigwig files +mkdir -p "human/rna/encode/$ENC_ID/rep1" +mkdir -p "human/rna/encode/$ENC_ID/rep2" + + +# download bigwig files; rep1 +if [ -f "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1.bigWig" ]; then + echo "example RNA-seq data already downloaded (rep 1)." +else + wget $URL_P_REP1 -O "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1.bigWig" + wget $URL_M_REP1 -O "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1.bigWig" +fi + +# download bigwig files; rep2 +if [ -f "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2.bigWig" ]; then + echo "example RNA-seq data already downloaded (rep 2)." +else + wget $URL_P_REP2 -O "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2.bigWig" + wget $URL_M_REP2 -O "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2.bigWig" +fi diff --git a/tutorials/legacy/make_data/download_dependencies.sh b/tutorials/legacy/make_data/download_dependencies.sh new file mode 100755 index 0000000..cd23a51 --- /dev/null +++ b/tutorials/legacy/make_data/download_dependencies.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +# create additional folder in borzoi data folders +mkdir -p "$BORZOI_HG38/assembly/ucsc" +mkdir -p "$BORZOI_HG38/assembly/gnomad" +mkdir -p "$BORZOI_HG38/mappability" +mkdir -p "$BORZOI_HG38/blacklist" +mkdir -p "$BORZOI_HG38/align" + +mkdir -p "$BORZOI_MM10/assembly/ucsc" +mkdir -p "$BORZOI_MM10/mappability" +mkdir -p "$BORZOI_MM10/blacklist" + + +# download and uncompress auxiliary files required for Makefile (hg38) +if [ -f "$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed" ]; then + echo "hg38_gaps.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38_gaps.bed.gz | gunzip -c > "$BORZOI_HG38/assembly/ucsc/hg38_gaps.bed" +fi + +if [ -f "$BORZOI_HG38/mappability/umap_k36_t10_l32.bed" ]; then + echo "umap_k36_t10_l32.bed (hg38) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_k36_t10_l32_hg38.bed.gz | gunzip -c > "$BORZOI_HG38/mappability/umap_k36_t10_l32.bed" +fi + +if [ -f "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" ]; then + echo "blacklist_hg38_all.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/blacklist_hg38_all.bed.gz | gunzip -c > "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" +fi + +if [ -f "$BORZOI_HG38/align/hg38.mm10.syn.net.gz" ]; then + echo "Splice site annotation already exist." +else + wget https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38.mm10.syn.net.gz -O "$BORZOI_HG38/align/hg38.mm10.syn.net.gz" +fi + + +# download and uncompress auxiliary files required for Makefile (mm10) +if [ -f "$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed" ]; then + echo "mm10_gaps.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/mm10_gaps.bed.gz | gunzip -c > "$BORZOI_MM10/assembly/ucsc/mm10_gaps.bed" +fi + +if [ -f "$BORZOI_MM10/mappability/umap_k36_t10_l32.bed" ]; then + echo "umap_k36_t10_l32.bed (mm10) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_k36_t10_l32_mm10.bed.gz | gunzip -c > "$BORZOI_MM10/mappability/umap_k36_t10_l32.bed" +fi + +if [ -f "$BORZOI_MM10/blacklist/blacklist_mm10_all.bed" ]; then + echo "blacklist_mm10_all.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/blacklist_mm10_all.bed.gz | gunzip -c > "$BORZOI_MM10/blacklist/blacklist_mm10_all.bed" +fi + + +# download and uncompress pre-compiled umap bed files +if [ -f umap_human.bed ]; then + echo "umap_human.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_human.bed.gz | gunzip -c > umap_human.bed +fi + +if [ -f umap_mouse.bed ]; then + echo "umap_mouse.bed already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/umap_mouse.bed.gz | gunzip -c > umap_mouse.bed +fi + + +# download and index hg38 ml genome +if [ -f "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" ]; then + echo "hg38.ml.fa already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38.ml.fa.gz | gunzip -c > "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" + idx_genome.py "$BORZOI_HG38/assembly/ucsc/hg38.ml.fa" +fi + +# download and index hg38 ml genome (gnomad major alleles) +if [ -f "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" ]; then + echo "hg38.ml.fa (gnomad) already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/hg38_gnomad.ml.fa.gz | gunzip -c > "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" + idx_genome.py "$BORZOI_HG38/assembly/gnomad/hg38.ml.fa" +fi + +# download and index mm10 ml genome +if [ -f "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" ]; then + echo "mm10.ml.fa already exists." +else + wget -O - https://storage.googleapis.com/seqnn-share/helper/dependencies/mm10.ml.fa.gz | gunzip -c > "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" + idx_genome.py "$BORZOI_MM10/assembly/ucsc/mm10.ml.fa" +fi diff --git a/tutorials/legacy/make_data/process_w5.sh b/tutorials/legacy/make_data/process_w5.sh new file mode 100755 index 0000000..9caa697 --- /dev/null +++ b/tutorials/legacy/make_data/process_w5.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# merge bigwig replicates, generate .w5 files and run qc + +# define ENCODE ID +ENC_ID='ENCSR000AEL' + +# define ENCODE file IDs +FILE_P_REP1='ENCFF980ZHM' +FILE_M_REP1='ENCFF533LJF' + +FILE_P_REP2='ENCFF335LVS' +FILE_M_REP2='ENCFF257NOL' + +# create folder for merged replicate files +mkdir -p "human/rna/encode/$ENC_ID/summary" + + +# step 1: generate per-replicate .w5 files + +# rep1 +if [ -f "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" ]; then + echo "example RNA-seq .w5 already exists (rep 1)." +else + bw_h5.py -z "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1.bigWig" "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" + bw_h5.py -z "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1.bigWig" "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1-.w5" +fi + +# rep2 +if [ -f "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" ]; then + echo "example RNA-seq .w5 already exists (rep 2)." +else + bw_h5.py -z "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2.bigWig" "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" + bw_h5.py -z "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2.bigWig" "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2-.w5" +fi + + +# step 2: merge replicates + +if [ -f "human/rna/encode/$ENC_ID/summary/coverage+.w5" ]; then + echo "example RNA-seq .w5 already exists (merged)." +else + w5_merge.py -w -s mean -z "human/rna/encode/$ENC_ID/summary/coverage+.w5" "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" + w5_merge.py -w -s mean -z "human/rna/encode/$ENC_ID/summary/coverage-.w5" "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1-.w5" "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2-.w5" +fi + + +# step 3: run qc on each replicate and the merged file + +if [ -f "human/rna/encode/$ENC_ID/summary/covqc/means.txt" ]; then + echo "qc statistics already exist." +else + # rep1 + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep1/covqc" "human/rna/encode/$ENC_ID/rep1/$FILE_P_REP1+.w5" + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep1/covqc_m" "human/rna/encode/$ENC_ID/rep1/$FILE_M_REP1-.w5" + + # rep2 + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep2/covqc" "human/rna/encode/$ENC_ID/rep2/$FILE_P_REP2+.w5" + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/rep2/covqc_m" "human/rna/encode/$ENC_ID/rep2/$FILE_M_REP2-.w5" + + # summary + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/summary/covqc" "human/rna/encode/$ENC_ID/summary/coverage+.w5" + w5_qc.py -b "$BORZOI_HG38/blacklist/blacklist_hg38_all.bed" -o "human/rna/encode/$ENC_ID/summary/covqc_m" "human/rna/encode/$ENC_ID/summary/coverage-.w5" +fi + diff --git a/tutorials/legacy/make_data/targets_human.txt b/tutorials/legacy/make_data/targets_human.txt new file mode 100644 index 0000000..0baf8d7 --- /dev/null +++ b/tutorials/legacy/make_data/targets_human.txt @@ -0,0 +1,3 @@ + identifier file clip clip_soft scale sum_stat strand_pair description +0 ENCFF980ZHM+ human/rna/encode/ENCSR000AEL/summary/coverage+.w5 768 384 0.3 sum_sqrt 1 RNA:K562 +1 ENCFF980ZHM- human/rna/encode/ENCSR000AEL/summary/coverage-.w5 768 384 0.3 sum_sqrt 0 RNA:K562 diff --git a/tutorials/legacy/score_variants/README.md b/tutorials/legacy/score_variants/README.md new file mode 100644 index 0000000..827434f --- /dev/null +++ b/tutorials/legacy/score_variants/README.md @@ -0,0 +1,3 @@ +## Variant Scoring + +Todo. diff --git a/tutorials/legacy/score_variants/run_variant_scripts.ipynb b/tutorials/legacy/score_variants/run_variant_scripts.ipynb new file mode 100644 index 0000000..828c610 --- /dev/null +++ b/tutorials/legacy/score_variants/run_variant_scripts.ipynb @@ -0,0 +1,201 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f5d0f9fb", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import h5py\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a94cbf8", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate gene-specific variant effect scores\n", + "\n", + "!./score_expr_sed.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1047ff0f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "score: 'logSED', snp: 'chr1_46309111_A_G_b38', gene: 'ENSG00000237090.1', track: 'RNA:adipose_tissue' => -0.2551\n" + ] + } + ], + "source": [ + "#Print an example variant effect prediction for a SNP-gene pair (gene-specific expression)\n", + "\n", + "sed_h5 = h5py.File('snp_sed/f3c0/sed.h5', 'r')\n", + "\n", + "row_ix = 63\n", + "target_ix = 0\n", + "\n", + "print(\"score: 'logSED', snp: '\" + str(sed_h5['snp'][sed_h5['si'][row_ix]].decode()) + \"', gene: '\" + str(sed_h5['gene'][sed_h5['si'][row_ix]].decode()) + \"', track: '\" + str(sed_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sed_h5['logSED'][row_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f105ecd9", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate gene-agnostic variant effect scores\n", + "\n", + "!./score_expr_sad.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "96e4f7cb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "score: 'logD2', snp: 'chr1_43120331_C_T_b38', track: 'RNA:adipose_tissue' => 0.1057\n" + ] + } + ], + "source": [ + "#Print an example variant effect prediction for a SNP (gene-agnostic expression)\n", + "\n", + "sad_h5 = h5py.File('snp_sad/f3c0/sad.h5', 'r')\n", + "\n", + "snp_ix = 1\n", + "target_ix = 0\n", + "\n", + "print(\"score: 'logD2', snp: '\" + str(sad_h5['snp'][snp_ix].decode()) + \"', track: '\" + str(sad_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sad_h5['logD2'][snp_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c56efaef", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate splice variant effect scores\n", + "\n", + "!./score_splice.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "980993fc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "score: 'nDi', snp: 'chr1_156236330_G_A', gene: 'ENSG00000225905.1', track: 'RNA:foreskin fibroblast male newborn' => 0.0022\n" + ] + } + ], + "source": [ + "#Print an example variant effect prediction for a SNP-gene pair (splicing)\n", + "\n", + "sed_h5 = h5py.File('snp_splice/f3c0/sed.h5', 'r')\n", + "\n", + "row_ix = 116\n", + "target_ix = 755\n", + "\n", + "print(\"score: 'nDi', snp: '\" + str(sed_h5['snp'][sed_h5['si'][row_ix]].decode()) + \"', gene: '\" + str(sed_h5['gene'][sed_h5['si'][row_ix]].decode()) + \"', track: '\" + str(sed_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sed_h5['nDi'][row_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05cccfb6", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Calculate polyadenylation variant effect scores\n", + "\n", + "!./score_polya.sh\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "43ac562f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "score: 'logSED', snp: 'chr16_80976052_T_G', gene: 'ENSG00000132879.14', track: 'RNA:HeLa-S3 nuclear fraction' => 0.0628\n" + ] + } + ], + "source": [ + "#Print an example variant effect prediction for a SNP-gene pair (polyadenylation)\n", + "\n", + "sed_h5 = h5py.File('snp_polya/f3c0/sed.h5', 'r')\n", + "\n", + "row_ix = 47\n", + "target_ix = 100\n", + "\n", + "print(\"score: 'logSED', snp: '\" + str(sed_h5['snp'][sed_h5['si'][row_ix]].decode()) + \"', gene: '\" + str(sed_h5['gene'][sed_h5['si'][row_ix]].decode()) + \"', track: '\" + str(sed_h5['target_labels'][target_ix].decode()) + \"' => \" + str(round(sed_h5['COVR'][row_ix, target_ix], 4)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ba23572", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tutorials/legacy/score_variants/score_expr_sad.sh b/tutorials/legacy/score_variants/score_expr_sad.sh new file mode 100755 index 0000000..0d7c74a --- /dev/null +++ b/tutorials/legacy/score_variants/score_expr_sad.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_sad/f3c0 + +borzoi_sad.py -o snp_sad/f3c0 --rc --stats logD2 -u -t ../../../examples/targets_human.txt ../../../examples/params_pred.json ../../../examples/saved_models/f3c0/train/model0_best.h5 snps_expr.vcf diff --git a/tutorials/legacy/score_variants/score_expr_sed.sh b/tutorials/legacy/score_variants/score_expr_sed.sh new file mode 100755 index 0000000..9b97e2e --- /dev/null +++ b/tutorials/legacy/score_variants/score_expr_sed.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_sed/f3c0 + +borzoi_sed.py -o snp_sed/f3c0 --rc --stats logSED,logD2 -u -t ../../../examples/targets_gtex.txt ../../../examples/params_pred.json ../../../examples/saved_models/f3c0/train/model0_best.h5 snps_expr.vcf diff --git a/tutorials/legacy/score_variants/score_polya.sh b/tutorials/legacy/score_variants/score_polya.sh new file mode 100755 index 0000000..7eb24a5 --- /dev/null +++ b/tutorials/legacy/score_variants/score_polya.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_polya/f3c0 + +borzoi_sed_paqtl_cov.py -o snp_polya/f3c0 --rc --stats COVR -u -t ../../../examples/targets_rna.txt ../../../examples/params_pred.json ../../../examples/saved_models/f3c0/train/model0_best.h5 snps_polya.vcf diff --git a/tutorials/legacy/score_variants/score_splice.sh b/tutorials/legacy/score_variants/score_splice.sh new file mode 100755 index 0000000..f85779f --- /dev/null +++ b/tutorials/legacy/score_variants/score_splice.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +mkdir -p snp_splice/f3c0 + +borzoi_sed.py -o snp_splice/f3c0 --span --no_untransform --rc --stats nDi -u -t ../../../examples/targets_rna.txt ../../../examples/params_pred.json ../../../examples/saved_models/f3c0/train/model0_best.h5 snps_splice.vcf diff --git a/tutorials/legacy/score_variants/snps_expr.vcf b/tutorials/legacy/score_variants/snps_expr.vcf new file mode 100644 index 0000000..bb8d7cc --- /dev/null +++ b/tutorials/legacy/score_variants/snps_expr.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.2 +chr1 43110773 chr1_43110773_G_A_b38 G A . . +chr1 43120331 chr1_43120331_C_T_b38 C T . . +chr1 46309111 chr1_46309111_A_G_b38 A G . . +chr1 52632886 chr1_52632886_A_C_b38 A C . . +chr1 54053434 chr1_54053434_G_A_b38 G A . . diff --git a/tutorials/legacy/score_variants/snps_polya.vcf b/tutorials/legacy/score_variants/snps_polya.vcf new file mode 100644 index 0000000..5be4cad --- /dev/null +++ b/tutorials/legacy/score_variants/snps_polya.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 11790946 chr1_11790946_G_C G C . . MT=ENSG00000177000.grp_2.downstream.ENST00000641805;PD=924;PI=chr1_11790946_G_C +chr1 150160094 chr1_150160094_C_G C G . . MT=ENSG00000023902.grp_1.downstream.ENST00000369126;PD=29;PI=chr1_150160094_C_G +chr16 57665101 chr16_57665101_A_G A G . . MT=ENSG00000205336.grp_1.downstream.ENST00000568908;PD=73;PI=chr16_57665101_A_G +chr16 80976052 chr16_80976052_T_G T G . . MT=ENSG00000103121.grp_2.downstream.ENST00000565925;PD=24;PI=chr16_80976052_T_G +chr16 88857261 chr16_88857261_T_C T C . . MT=ENSG00000167515.grp_2.downstream.ENST00000564547;PD=3851;PI=chr16_88857261_T_C \ No newline at end of file diff --git a/tutorials/legacy/score_variants/snps_splice.vcf b/tutorials/legacy/score_variants/snps_splice.vcf new file mode 100644 index 0000000..710eaf2 --- /dev/null +++ b/tutorials/legacy/score_variants/snps_splice.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##INFO= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 1665061 chr1_1665061_C_T C T . . MT=ENSG00000189339.grp_2.contained.ENST00000611123;SD=959;PI=chr1_1665061_C_T +chr1 1689221 chr1_1689221_G_A G A . . MT=ENSG00000189339.grp_1.contained.ENST00000614300;SD=1753;PI=chr1_1689221_G_A +chr1 50655526 chr1_50655526_T_C T C . . MT=ENSG00000185104.grp_2.contained.ENST00000396153;SD=3;PI=chr1_50655526_T_C +chr1 109489368 chr1_109489368_C_G C G . . MT=ENSG00000143537.grp_2.contained.ENST00000360674;SD=1;PI=chr1_155060832_G_A +chr1 156236330 chr1_156236330_G_A G A . . MT=ENSG00000160783.grp_1.contained.ENST00000368279;SD=17;PI=chr1_156236330_G_A diff --git a/tutorials/legacy/train_model/README.md b/tutorials/legacy/train_model/README.md new file mode 100644 index 0000000..1587061 --- /dev/null +++ b/tutorials/legacy/train_model/README.md @@ -0,0 +1,3 @@ +## Model Training + +Todo. diff --git a/tutorials/legacy/train_model/params_micro.json b/tutorials/legacy/train_model/params_micro.json new file mode 100644 index 0000000..5a9c716 --- /dev/null +++ b/tutorials/legacy/train_model/params_micro.json @@ -0,0 +1,78 @@ +{ + "train": { + "batch_size": 4, + "shuffle_buffer": 256, + "optimizer": "adam", + "learning_rate": 0.0002, + "loss": "poisson_mn", + "total_weight": 0.2, + "warmup_steps": 10000, + "global_clipnorm": 0.2, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "patience": 30, + "train_epochs_min": 130, + "train_epochs_max": 180 + }, + "model": { + "seq_length": 393216, + "augment_rc": true, + "augment_shift": 3, + "activation": "gelu", + "norm_type": "batch", + "bn_momentum": 0.9, + "kernel_initializer": "lecun_normal", + "l2_scale": 1.0e-6, + "trunk": [ + { + "name": "conv_dna", + "filters": 128, + "kernel_size": 11, + "norm_type": null, + "activation": "linear", + "pool_size": 2 + }, + { + "name": "res_tower", + "filters_init": 160, + "filters_end": 320, + "divisible_by": 8, + "kernel_size": 5, + "num_convs": 1, + "pool_size": 2, + "repeat": 6 + }, + { + "name": "transformer_tower", + "key_size": 32, + "heads": 4, + "num_position_features": 32, + "dropout": 0.1, + "attention_dropout": 0.01, + "mha_l2_scale": 1.0e-8, + "l2_scale": 1.0e-8, + "kernel_initializer": "he_normal", + "repeat": 4 + }, + { + "name": "unet_conv", + "kernel_size": 3, + "upsample_conv": true + }, + { + "name": "unet_conv", + "kernel_size": 3, + "upsample_conv": true + }, + { + "name": "Cropping1D", + "cropping": 3072 + } + ], + "head_human": { + "name": "final", + "units": 2, + "activation": "softplus" + } + } +} diff --git a/tutorials/legacy/train_model/params_mini.json b/tutorials/legacy/train_model/params_mini.json new file mode 100644 index 0000000..14c089c --- /dev/null +++ b/tutorials/legacy/train_model/params_mini.json @@ -0,0 +1,77 @@ +{ + "train": { + "batch_size": 2, + "shuffle_buffer": 256, + "optimizer": "adam", + "learning_rate": 0.0001, + "loss": "poisson_mn", + "total_weight": 0.2, + "warmup_steps": 20000, + "global_clipnorm": 0.1, + "adam_beta1": 0.9, + "adam_beta2": 0.999, + "patience": 30, + "train_epochs_min": 130, + "train_epochs_max": 180 + }, + "model": { + "seq_length": 393216, + "augment_rc": true, + "augment_shift": 3, + "activation": "gelu", + "norm_type": "batch", + "bn_momentum": 0.9, + "kernel_initializer": "lecun_normal", + "l2_scale": 1.0e-6, + "trunk": [ + { + "name": "conv_dna", + "filters": 320, + "kernel_size": 11, + "norm_type": null, + "activation": "linear", + "pool_size": 2 + }, + { + "name": "res_tower", + "filters_init": 384, + "filters_end": 768, + "divisible_by": 16, + "kernel_size": 5, + "num_convs": 1, + "pool_size": 2, + "repeat": 6 + }, + { + "name": "transformer_tower", + "key_size": 64, + "heads": 4, + "num_position_features": 32, + "dropout": 0.2, + "mha_l2_scale": 1.0e-8, + "l2_scale": 1.0e-8, + "kernel_initializer": "he_normal", + "repeat": 8 + }, + { + "name": "unet_conv", + "kernel_size": 3, + "upsample_conv": true + }, + { + "name": "unet_conv", + "kernel_size": 3, + "upsample_conv": true + }, + { + "name": "Cropping1D", + "cropping": 3072 + } + ], + "head_human": { + "name": "final", + "units": 2, + "activation": "softplus" + } + } +} diff --git a/tutorials/legacy/train_model/train_micro.sh b/tutorials/legacy/train_model/train_micro.sh new file mode 100755 index 0000000..3c334ee --- /dev/null +++ b/tutorials/legacy/train_model/train_micro.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +westminster_train_folds.py -e borzoi_py310 -f 2 -c 1 -q rtx4090 -o micro_models params_micro.json ../make_data/data/hg38 diff --git a/tutorials/legacy/train_model/train_mini.sh b/tutorials/legacy/train_model/train_mini.sh new file mode 100755 index 0000000..2cc5aa4 --- /dev/null +++ b/tutorials/legacy/train_model/train_mini.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +westminster_train_folds.py -e borzoi_py310 -f 2 -c 1 -q rtx4090 -o mini_models params_mini.json ../make_data/data/hg38 From 125b1b90529381afd2eb069ebdcbe21f41569764 Mon Sep 17 00:00:00 2001 From: johli Date: Fri, 4 Oct 2024 10:23:29 -0700 Subject: [PATCH 13/32] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0cbc43b..8352984 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ These repositories further depend on a number of python packages (which are auto A new conda environment can be created with `conda create -n borzoi_py310 python=3.10`.
Some of the scripts in this repository start multi-process jobs and require [slurm](https://slurm.schedmd.com/). -Finally, the code base relies on a number of environment variables. For convenience, these can be configured in the active conda environment with the 'env_vars.sh' script. +Finally, the code base relies on a number of environment variables. For convenience, these can be configured in the active conda environment with the 'env_vars.sh' script. First, open up 'env_vars.sh' in each repository folder and change the two lines of code at the top to your username and local path. Then, issue these commands: ```sh cd borzoi conda activate borzoi_py310 From a40c67a222a6c29e9f684c06f2e15acf460867f4 Mon Sep 17 00:00:00 2001 From: Johannes Linder Date: Fri, 4 Oct 2024 15:27:56 -0700 Subject: [PATCH 14/32] Updated params file. --- examples/params.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/params.json b/examples/params.json index 4fe232c..a0f6b07 100644 --- a/examples/params.json +++ b/examples/params.json @@ -1,6 +1,6 @@ { "train": { - "batch_size": 1, + "batch_size": 2, "shuffle_buffer": 256, "optimizer": "adam", "learning_rate": 0.00006, From ec758a1eb4f062bb2474ee3622427a49dcdae6d2 Mon Sep 17 00:00:00 2001 From: johli Date: Fri, 4 Oct 2024 16:00:06 -0700 Subject: [PATCH 15/32] Update README.md --- README.md | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 8352984..c89042a 100644 --- a/README.md +++ b/README.md @@ -122,19 +122,12 @@ The curated e-/s-/pa-/ipaQTL benchmarking data can be downloaded from the follow To replicate the results presented in the paper, visit the [borzoi-paper repository](https://github.com/calico/borzoi-paper.git). This repository contains scripts for **training**, **evaluating**, and **analyzing** the published model, and for processing the **training data**. ### Tutorials -Todo. +The following directories contain *minimal* tutorials regarding model training, variant scoring, and interpretation. The 'legacy' tutorials use data transformations that are similar to those used in the manuscript, while 'latest' use updated (and simpler) transformations. Note that these tutorials are only intended to showcase core functionality on sample data (such as processing an RNA-seq experiment, or training a simple model). For advanced analyses, we recommend studying the results presented in the manuscript (see [Paper Replication](https://github.com/calico/borzoi/tree/main?tab=readme-ov-file#paper-replication)). -#### Data Processing -Todo. - -#### Model Training -Todo. - -#### Variant Scoring -Todo. - -#### Sequence Attribution -Todo. +- **Data Processing** [latest](https://github.com/calico/borzoi/tree/main/tutorials/latest/make_data) | [legacy](https://github.com/calico/borzoi/tree/main/tutorials/legacy/make_data)
+- **Model Training** [latest](https://github.com/calico/borzoi/tree/main/tutorials/latest/train_model) | [legacy](https://github.com/calico/borzoi/tree/main/tutorials/legacy/train_model)
+- **Variant Scoring** [latest](https://github.com/calico/borzoi/tree/main/tutorials/latest/score_variants) | [legacy](https://github.com/calico/borzoi/tree/main/tutorials/legacy/score_variants)
+- **Sequence Interpretation** [latest](https://github.com/calico/borzoi/tree/main/tutorials/latest/interpret_sequence) | [legacy](https://github.com/calico/borzoi/tree/main/tutorials/legacy/interpret_sequence)
### Example Notebooks The following notebooks contain example code for predicting and interpreting genetic variants. From 8b851dc154e081d2a12765afe7a13d2468f7fe4a Mon Sep 17 00:00:00 2001 From: johli Date: Fri, 4 Oct 2024 17:39:18 -0700 Subject: [PATCH 16/32] Update README.md --- tutorials/legacy/make_data/README.md | 42 +++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tutorials/legacy/make_data/README.md b/tutorials/legacy/make_data/README.md index 035a37d..05a53b6 100644 --- a/tutorials/legacy/make_data/README.md +++ b/tutorials/legacy/make_data/README.md @@ -1,3 +1,43 @@ ## Data Processing -Todo. +This tutorial decribes how to process a .bigwig sequencing experiment into compressed .w5 format, merge replicates, generate QC metrics, and finally create TFRecord files containing binned coverage values suitable for training Borzoi models. We will exemplify this for the ENCODE K562 RNA-seq experiment [ENCSR000AEL](https://www.encodeproject.org/experiments/ENCSR000AEL/). + +First, activate the conda environment and run the script 'download_dependencies.sh' to download required auxiliary files. +```sh +conda activate borzoi_py310 +cd ~/borzoi/tutorials/legacy/make_data +./download_dependencies.sh +``` + +Next, run the script 'download_bw.sh' to download sample ENCODE .bigwig files and arrange them in a folder structure. +```sh +./download_bw.sh +``` + +Then run script 'process_w5.sh' to generate compressed .w5 files (hdf5) from the input .bigwig files, merge the two replicates, and calculate basic QC metrics. This .sh script internally calls 'bw_h5.py' to generate .w5 files, 'w5_merge.py' to merge replicates, and 'w5_qc.py' to calculate QC metrics. +```sh +./process_w5.sh +``` + +Finally, run the Makefile to create genome-wide binned coverage tracks, stored as compressed TFRecords. +```sh +make +``` + +In this example, the Makefile creates 8 cross-validation folds of TFRecords with input sequences of length 393216 bp, generated with a genome-wide stride of 43691 bp. The output coverage tracks corresponding to each input sequence are cropped by 98304 bp on each side, before pooling the measurements in 32 bp bins. This results in 6144 coverage bins per 393kb sequence. The specific .w5 tracks to include in the TFRecord generation, and the scales and pooling transforms applied to the bins of each experiment, are given in the targets file 'targets_human.txt'. Below is a description of the columns in this file. + +*targets_human.txt*: +- (unnamed) => integer index of each track (must start from 0 when training a new model). +- 'identifier' => unique identifier of each experiment (and strand). +- 'file' => local file path to .w5 file. +- 'clip' => hard clipping threshold to be applied to each bin, after soft-clipping. +- 'clip_soft' => soft clipping (squashing) threshold. +- 'scale' => scale value applied to each 32 bp bin after clipping. +- 'sum_stat' => type of bin-level pooling operation ('sum_sqrt' = sum and exponentiate by 3/4). +- 'strand_pair' => integer index of the other stranded track of an experiment (same index as current row if unstranded). +- 'description' => text description of experiment. + +*Notes*: +- See [here](https://github.com/calico/borzoi-paper/tree/main/data/training) for a description of the scripts called by the Makefile to create TFRecords. +- Of note, the **legacy** settings are activated in these data processing scripts with the flag '--transform_old' in the Makefile. +- The **legacy** approach crops to the coverage tracks, a practice we have since abandonded in favor of a position-specific loss scale. From 66bb7300e055536f1c6fc0f2e59f176a021fd59c Mon Sep 17 00:00:00 2001 From: johli Date: Fri, 4 Oct 2024 17:58:27 -0700 Subject: [PATCH 17/32] Update README.md --- tutorials/legacy/train_model/README.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tutorials/legacy/train_model/README.md b/tutorials/legacy/train_model/README.md index 1587061..4cf844b 100644 --- a/tutorials/legacy/train_model/README.md +++ b/tutorials/legacy/train_model/README.md @@ -1,3 +1,20 @@ ## Model Training -Todo. +This tutorial describes how to train smaller Borzoi models on the example RNA-seq experiment processed in the [make_data tutorial](https://github.com/calico/borzoi/tree/main/tutorials/legacy/make_data). + +To train a 'Mini Borzoi' ensemble (~40M parameters, 2 cross-validation folds), run the script 'train_mini.sh'. The model parameters are specified in 'params_mini.json'. This model can be trained with a batch size of 2 on a 24GB NVIDIA Titan RTX or RTX4090 GPU. +```sh +conda activate borzoi_py310 +cd ~/borzoi/tutorials/legacy/train_model +./train_mini.sh +``` + +Alternatively, to train an even smaller 'Micro Borzoi' ensemble (~5M parameters), run the script 'train_micro.sh'. This model can fit into the above GPU cards with a batch size of 4, which means the learning rate can be doubled and each epoch finished in half the time. +```sh +./train_micro.sh +``` + +*Notes*: +- See [here](https://github.com/calico/borzoi-paper/tree/main/model) for a description of the scripts called internally by the training .sh script. +- The **legacy** model crops the predicted tracks (see layer 'Cropping1D' in the parameters file). In this example, the input sequence has length 393216 bp, and the cropping layer removes 3072x 32 bp bins from each side, resulting in 6144 bins. +- In the **legacy** architecture, there is an extra/superfluous linear convolution applied in each 'unet_conv' layer (see the bool 'upsample_conv' in the parameters file). This additional convolution has since been removed. From 9ea70888f309946c634f6e2c18ee484b2802e1a7 Mon Sep 17 00:00:00 2001 From: johli Date: Sat, 5 Oct 2024 09:51:47 -0700 Subject: [PATCH 18/32] Update README.md --- tutorials/legacy/score_variants/README.md | 30 ++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tutorials/legacy/score_variants/README.md b/tutorials/legacy/score_variants/README.md index 827434f..8387632 100644 --- a/tutorials/legacy/score_variants/README.md +++ b/tutorials/legacy/score_variants/README.md @@ -1,3 +1,31 @@ ## Variant Scoring -Todo. +This tutorial describes how to predict variant effect scores for a small set of SNVs defined in a .vcf file. For examples showcasing variant effect prediction at a larger scale (e.g. fine-mapped eQTL classification benchmarks), we refer the user to the [borzoi-paper respository](https://github.com/calico/borzoi-paper/tree/main). This example uses the pre-trained, published Borzoi model to predict variant effects. To download this model, run the script 'download_models.sh' in the 'borzoi' root folder. + +First, to calculate **gene-specific expression** scores, run the script 'score_expr_sed.sh'. Two different statistics are computed: (1) logSED (gene expression log fold change), and (2) logD2 (bin-level L2 norm across the coverage profile intersecting the exons of the gene). +```sh +conda activate borzoi_py310 +cd ~/borzoi/tutorials/legacy/score_variants +./score_expr_sed.sh +``` + +To calculate **gene-agnostic expression** scores, run the script 'score_expr_sad.sh'. One statistic is computed: logD2 (bin-level L2 norm across the entire predicted coverage track). +```sh +./score_expr_sad.sh +``` + +To calculate **gene-specific polyadenylation** scores, run the script 'score_polya.sh'. One statistic is computed: COVR (3' coverage ratio across pA junctions of the target gene). +```sh +./score_polya.sh +``` + +To calculate **gene-specific splicing** scores, run the script 'score_splice.sh'. One statistic is computed: nDi (normalized maximum absolute difference in coverage bins across the target gene span). +```sh +./score_splice.sh +``` + +Finally, the jupyter notebook 'run_variant_scripts.ipynb' is provided for convenience to execute all above scripts. The notebook also exemplifies how to navigate the variant prediction hdf5 files and print some example scores. + +*Notes*: +- The legacy data transforms are activated in all above .sh scripts with the flag '-u'. + From a19b0ce26bc26fc2676dea65900243827cffc7b4 Mon Sep 17 00:00:00 2001 From: johli Date: Sat, 5 Oct 2024 10:14:45 -0700 Subject: [PATCH 19/32] Update README.md --- tutorials/legacy/interpret_sequence/README.md | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tutorials/legacy/interpret_sequence/README.md b/tutorials/legacy/interpret_sequence/README.md index 1ac18dd..b98b5b6 100644 --- a/tutorials/legacy/interpret_sequence/README.md +++ b/tutorials/legacy/interpret_sequence/README.md @@ -1,3 +1,25 @@ ## Interpretation -Todo. +This tutorial describes how to compute gradient saliency scores (sequence attributions) with respect to various statistics computed for a list of input genes specified in a .gtf file. This example uses the pre-trained, published Borzoi model to compute gradients. To download this model, run the script 'download_models.sh' in the 'borzoi' root folder. + +First, to compute input gradients with respect to the log-sum of coverage across the exons of the target gene, run the script 'run_gradients_expr_CFHR2.sh'. +```sh +conda activate borzoi_py310 +cd ~/borzoi/tutorials/legacy/interpret_sequence +./run_gradients_expr_CFHR2.sh +``` + +To compute input gradients with respect to the log-ratio of coverage immediately upstream and downstream of the distal polyA site of the target gene, run the script 'run_gradients_polya_CD99.sh'. +```sh +./run_gradients_polya_CD99.sh +``` + +To compute input gradients with respect to the log-ratio of coverage of an exon of the target gene relative to intronic coverage, run the script 'run_gradients_splice_GCFC2.sh'. +```sh +./run_gradients_splice_GCFC2.sh +``` +Currently, the splicing gradient script chooses one exon at random to compute gradients for. While this approach was favorable for the specific analysis of the manuscript, we acknowledge that this is not particularly useful to users wanting to investigate an exon of their choice. We plan on updating this script soon to allow users to specify which exon to calculate gradients for. + +*Notes*: +- The track scale, squashing exponentiation, and clip-soft threshold, are specific in the .py script arguments (flags: '--track_scale, '--track_transform', '--clip_soft'), and the values in the targets file are ignored. This means that the same data transformation parameters are applied to all tracks specified in the targets file. To calculate gradients for groups of tracks with different data transforms, separate these tracks into different targets files, and execute the gradient script on each group separately. +- The legacy data transforms are activated in all above .sh scripts with the flag '--untransform_old'. From 14c076333d00f23aaae422f79d2586b82a91c45c Mon Sep 17 00:00:00 2001 From: johli Date: Sat, 5 Oct 2024 10:29:00 -0700 Subject: [PATCH 20/32] Update README.md --- tutorials/latest/make_data/README.md | 41 +++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/tutorials/latest/make_data/README.md b/tutorials/latest/make_data/README.md index 035a37d..60f5692 100644 --- a/tutorials/latest/make_data/README.md +++ b/tutorials/latest/make_data/README.md @@ -1,3 +1,42 @@ ## Data Processing -Todo. +This tutorial decribes how to process a .bigwig sequencing experiment into compressed .w5 format, merge replicates, generate QC metrics, and finally create TFRecord files containing binned coverage values suitable for training Borzoi models. We will exemplify this for the ENCODE K562 RNA-seq experiment [ENCSR000AEL](https://www.encodeproject.org/experiments/ENCSR000AEL/). + +First, activate the conda environment and run the script 'download_dependencies.sh' to download required auxiliary files. +```sh +conda activate borzoi_py310 +cd ~/borzoi/tutorials/legacy/make_data +./download_dependencies.sh +``` + +Next, run the script 'download_bw.sh' to download sample ENCODE .bigwig files and arrange them in a folder structure. +```sh +./download_bw.sh +``` + +Then run script 'process_w5.sh' to generate compressed .w5 files (hdf5) from the input .bigwig files, merge the two replicates, and calculate basic QC metrics. This .sh script internally calls 'bw_h5.py' to generate .w5 files, 'w5_merge.py' to merge replicates, and 'w5_qc.py' to calculate QC metrics. +```sh +./process_w5.sh +``` + +Finally, run the Makefile to create genome-wide binned coverage tracks, stored as compressed TFRecords. +```sh +make +``` + +In this example, the Makefile creates 8 cross-validation folds of TFRecords with input sequences of length 393216 bp, generated with a genome-wide stride of 43691 bp. The output coverage tracks corresponding to each input sequence are not cropped in the latest version of Borzoi models. This results in 12288 coverage bins per 393kb sequence. The specific .w5 tracks to include in the TFRecord generation, and the scales and pooling transforms applied to the bins of each experiment, are given in the targets file 'targets_human.txt'. Below is a description of the columns in this file. + +*targets_human.txt*: +- (unnamed) => integer index of each track (must start from 0 when training a new model). +- 'identifier' => unique identifier of each experiment (and strand). +- 'file' => local file path to .w5 file. +- 'clip' => hard clipping threshold to be applied to each bin, after soft-clipping. +- 'clip_soft' => soft clipping (squashing) threshold. +- 'scale' => scale value applied to each bp-level position before clipping. +- 'sum_stat' => type of bin-level pooling operation ('sum_sqrt' = sum and square-root). +- 'strand_pair' => integer index of the other stranded track of an experiment (same index as current row if unstranded). +- 'description' => text description of experiment. + +*Notes*: +- See [here](https://github.com/calico/borzoi-paper/tree/main/data/training) for a description of the scripts called by the Makefile to create TFRecords. +- In the latest version of Borzoi models, a modified hg38 fasta genome is used in the Makefile where the allele with highest overall allele frequency (from gnomAD) is substituted at each position. From 2f28d872d5319efad8d25424d6670797797767be Mon Sep 17 00:00:00 2001 From: johli Date: Sat, 5 Oct 2024 10:38:28 -0700 Subject: [PATCH 21/32] Update README.md --- tutorials/latest/train_model/README.md | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tutorials/latest/train_model/README.md b/tutorials/latest/train_model/README.md index 1587061..7846acf 100644 --- a/tutorials/latest/train_model/README.md +++ b/tutorials/latest/train_model/README.md @@ -1,3 +1,19 @@ ## Model Training -Todo. +This tutorial describes how to train smaller Borzoi models on the example RNA-seq experiment processed in the [make_data tutorial](https://github.com/calico/borzoi/tree/main/tutorials/latest/make_data). + +To train a 'Mini Borzoi' ensemble (~40M parameters, 2 cross-validation folds), run the script 'train_mini.sh'. The model parameters are specified in 'params_mini.json'. This model can be trained with a batch size of 2 on a 24GB NVIDIA Titan RTX or RTX4090 GPU. +```sh +conda activate borzoi_py310 +cd ~/borzoi/tutorials/legacy/train_model +./train_mini.sh +``` + +Alternatively, to train an even smaller 'Micro Borzoi' ensemble (~5M parameters), run the script 'train_micro.sh'. This model can fit into the above GPU cards with a batch size of 4, which means the learning rate can be doubled and each epoch finished in half the time. +```sh +./train_micro.sh +``` + +*Notes*: +- See [here](https://github.com/calico/borzoi-paper/tree/main/model) for a description of the scripts called internally by the training .sh script. +- Rather than cropping the output predictions before applying the training loss, in the latest version of Borzoi models a smooth position-specific loss weight is applied that penalizes prediction errors less at the left/right boundaries. From e5050ad776d48de0aa95f06d9fd80d71706e7f4d Mon Sep 17 00:00:00 2001 From: johli Date: Sat, 5 Oct 2024 10:50:24 -0700 Subject: [PATCH 22/32] Update README.md --- tutorials/latest/score_variants/README.md | 26 ++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tutorials/latest/score_variants/README.md b/tutorials/latest/score_variants/README.md index 827434f..733fe59 100644 --- a/tutorials/latest/score_variants/README.md +++ b/tutorials/latest/score_variants/README.md @@ -1,3 +1,27 @@ ## Variant Scoring -Todo. +This tutorial describes how to predict variant effect scores for a small set of SNVs defined in a .vcf file. This example relies on the Mini Borzoi model trained on sample K562 RNA-seq data from the [train_model repository](https://github.com/calico/borzoi/tree/main/tutorials/latest/train_model), which clearly is a significantly weaker model than the pre-trained, published Borzoi model. For examples showcasing variant effect prediction at a larger scale with the pre-trained model (e.g. fine-mapped eQTL classification benchmarks), we refer the user to the [borzoi-paper respository](https://github.com/calico/borzoi-paper/tree/main). Additionally, we refer the user to the **legacy** version of [this tutorial](https://github.com/calico/borzoi/tree/main/tutorials/legacy/score_variants), which uses the pre-trained, published model. + +First, to calculate **gene-specific expression** scores, run the script 'score_expr_sed.sh'. Two different statistics are computed: (1) logSED (gene expression log fold change), and (2) logD2 (bin-level L2 norm across the coverage profile intersecting the exons of the gene). +```sh +conda activate borzoi_py310 +cd ~/borzoi/tutorials/legacy/score_variants +./score_expr_sed.sh +``` + +To calculate **gene-agnostic expression** scores, run the script 'score_expr_sad.sh'. One statistic is computed: logD2 (bin-level L2 norm across the entire predicted coverage track). +```sh +./score_expr_sad.sh +``` + +To calculate **gene-specific polyadenylation** scores, run the script 'score_polya.sh'. One statistic is computed: COVR (3' coverage ratio across pA junctions of the target gene). +```sh +./score_polya.sh +``` + +To calculate **gene-specific splicing** scores, run the script 'score_splice.sh'. One statistic is computed: nDi (normalized maximum absolute difference in coverage bins across the target gene span). +```sh +./score_splice.sh +``` + +Finally, the jupyter notebook 'run_variant_scripts.ipynb' is provided for convenience to execute all above scripts. The notebook also exemplifies how to navigate the variant prediction hdf5 files and print some example scores. From 9da1693bea05129a02709385e222bcabbd9c6ec8 Mon Sep 17 00:00:00 2001 From: johli Date: Sat, 5 Oct 2024 10:54:03 -0700 Subject: [PATCH 23/32] Update README.md --- tutorials/latest/interpret_sequence/README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tutorials/latest/interpret_sequence/README.md b/tutorials/latest/interpret_sequence/README.md index 1ac18dd..722cd03 100644 --- a/tutorials/latest/interpret_sequence/README.md +++ b/tutorials/latest/interpret_sequence/README.md @@ -1,3 +1,13 @@ ## Interpretation -Todo. +This tutorial describes how to compute gradient saliency scores (sequence attributions) with respect to various statistics computed for a list of input genes specified in a .gtf file. This example relies on the Mini Borzoi model trained on sample K562 RNA-seq data from the [train_model tutorial](https://github.com/calico/borzoi/tree/main/tutorials/latest/train_model), which clearly is a significantly weaker model than the pre-trained, published Borzoi model. + +To compute input gradients with respect to the log-sum of coverage across the exons of the example gene HBE1, run the script 'run_gradients_expr_HBE1.sh'. +```sh +conda activate borzoi_py310 +cd ~/borzoi/tutorials/latest/interpret_sequence +./run_gradients_expr_HBE1.sh +``` + +*Notes*: +- The track scale, squashing exponentiation, and clip-soft threshold, are specific in the .py script arguments (flags: '--track_scale, '--track_transform', '--clip_soft'), and the values in the targets file are ignored. This means that the same data transformation parameters are applied to all tracks specified in the targets file. To calculate gradients for groups of tracks with different data transforms, separate these tracks into different targets files, and execute the gradient script on each group separately. From 4196e432b95c456b18683b8e541b4ba5c65d693d Mon Sep 17 00:00:00 2001 From: johli Date: Sat, 5 Oct 2024 10:55:01 -0700 Subject: [PATCH 24/32] Update README.md --- tutorials/latest/score_variants/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorials/latest/score_variants/README.md b/tutorials/latest/score_variants/README.md index 733fe59..1920ca9 100644 --- a/tutorials/latest/score_variants/README.md +++ b/tutorials/latest/score_variants/README.md @@ -1,11 +1,11 @@ ## Variant Scoring -This tutorial describes how to predict variant effect scores for a small set of SNVs defined in a .vcf file. This example relies on the Mini Borzoi model trained on sample K562 RNA-seq data from the [train_model repository](https://github.com/calico/borzoi/tree/main/tutorials/latest/train_model), which clearly is a significantly weaker model than the pre-trained, published Borzoi model. For examples showcasing variant effect prediction at a larger scale with the pre-trained model (e.g. fine-mapped eQTL classification benchmarks), we refer the user to the [borzoi-paper respository](https://github.com/calico/borzoi-paper/tree/main). Additionally, we refer the user to the **legacy** version of [this tutorial](https://github.com/calico/borzoi/tree/main/tutorials/legacy/score_variants), which uses the pre-trained, published model. +This tutorial describes how to predict variant effect scores for a small set of SNVs defined in a .vcf file. This example relies on the Mini Borzoi model trained on sample K562 RNA-seq data from the [train_model tutorial](https://github.com/calico/borzoi/tree/main/tutorials/latest/train_model), which clearly is a significantly weaker model than the pre-trained, published Borzoi model. For examples showcasing variant effect prediction at a larger scale with the pre-trained model (e.g. fine-mapped eQTL classification benchmarks), we refer the user to the [borzoi-paper respository](https://github.com/calico/borzoi-paper/tree/main). Additionally, we refer the user to the **legacy** version of [this tutorial](https://github.com/calico/borzoi/tree/main/tutorials/legacy/score_variants), which uses the pre-trained, published model. First, to calculate **gene-specific expression** scores, run the script 'score_expr_sed.sh'. Two different statistics are computed: (1) logSED (gene expression log fold change), and (2) logD2 (bin-level L2 norm across the coverage profile intersecting the exons of the gene). ```sh conda activate borzoi_py310 -cd ~/borzoi/tutorials/legacy/score_variants +cd ~/borzoi/tutorials/latest/score_variants ./score_expr_sed.sh ``` From 5704f1806063a8b0f69a9a45a9b791b954b5601c Mon Sep 17 00:00:00 2001 From: johli Date: Sat, 5 Oct 2024 10:55:33 -0700 Subject: [PATCH 25/32] Update README.md --- tutorials/latest/train_model/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/latest/train_model/README.md b/tutorials/latest/train_model/README.md index 7846acf..bcd151c 100644 --- a/tutorials/latest/train_model/README.md +++ b/tutorials/latest/train_model/README.md @@ -5,7 +5,7 @@ This tutorial describes how to train smaller Borzoi models on the example RNA-se To train a 'Mini Borzoi' ensemble (~40M parameters, 2 cross-validation folds), run the script 'train_mini.sh'. The model parameters are specified in 'params_mini.json'. This model can be trained with a batch size of 2 on a 24GB NVIDIA Titan RTX or RTX4090 GPU. ```sh conda activate borzoi_py310 -cd ~/borzoi/tutorials/legacy/train_model +cd ~/borzoi/tutorials/latest/train_model ./train_mini.sh ``` From f865ad53435914a465a761544716cc35989f9d03 Mon Sep 17 00:00:00 2001 From: johli Date: Sat, 5 Oct 2024 10:56:18 -0700 Subject: [PATCH 26/32] Update README.md --- tutorials/latest/make_data/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorials/latest/make_data/README.md b/tutorials/latest/make_data/README.md index 60f5692..52df292 100644 --- a/tutorials/latest/make_data/README.md +++ b/tutorials/latest/make_data/README.md @@ -5,7 +5,7 @@ This tutorial decribes how to process a .bigwig sequencing experiment into compr First, activate the conda environment and run the script 'download_dependencies.sh' to download required auxiliary files. ```sh conda activate borzoi_py310 -cd ~/borzoi/tutorials/legacy/make_data +cd ~/borzoi/tutorials/latest/make_data ./download_dependencies.sh ``` @@ -39,4 +39,4 @@ In this example, the Makefile creates 8 cross-validation folds of TFRecords with *Notes*: - See [here](https://github.com/calico/borzoi-paper/tree/main/data/training) for a description of the scripts called by the Makefile to create TFRecords. -- In the latest version of Borzoi models, a modified hg38 fasta genome is used in the Makefile where the allele with highest overall allele frequency (from gnomAD) is substituted at each position. +- In the latest version of Borzoi models, a modified hg38 fasta genome is used in the Makefile where the allele with highest overall frequency (from gnomAD) is substituted at each position. From 6cf2976e340750c0a0daeb364f873477db30c353 Mon Sep 17 00:00:00 2001 From: Johannes Linder Date: Sun, 6 Oct 2024 20:29:02 -0700 Subject: [PATCH 27/32] Updated example notebooks. --- ...zoi_example_eqtl_chr10_116952944_T_C.ipynb | 111 +- ...ample_eqtl_chr10_116952944_T_C_fancy.ipynb | 1063 +++++++++++++ ...i_example_ipaqtl_chr10_116664061_G_A.ipynb | 77 +- ...zoi_example_paqtl_chr1_236763042_A_G.ipynb | 53 +- ...ample_paqtl_chr1_236763042_A_G_fancy.ipynb | 1153 ++++++++++++++ ...rzoi_example_sqtl_chr9_135548708_G_C.ipynb | 53 +- examples/borzoi_helpers.py | 1376 +++++++++++++---- 7 files changed, 3406 insertions(+), 480 deletions(-) create mode 100644 examples/borzoi_example_eqtl_chr10_116952944_T_C_fancy.ipynb create mode 100644 examples/borzoi_example_paqtl_chr1_236763042_A_G_fancy.ipynb diff --git a/examples/borzoi_example_eqtl_chr10_116952944_T_C.ipynb b/examples/borzoi_example_eqtl_chr10_116952944_T_C.ipynb index b66c8ae..a485a34 100644 --- a/examples/borzoi_example_eqtl_chr10_116952944_T_C.ipynb +++ b/examples/borzoi_example_eqtl_chr10_116952944_T_C.ipynb @@ -10,12 +10,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-26 17:59:51.449884: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-09-26 17:59:51.449959: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-09-26 17:59:51.451178: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-09-26 17:59:51.459254: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-10-06 17:16:44.159991: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-06 17:16:44.160215: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-06 17:16:44.484774: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-06 17:16:45.302876: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-09-26 17:59:53.076138: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-10-06 17:17:00.194555: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "a6315e46-79ce-4653-ba71-242e74516b47", "metadata": {}, "outputs": [ @@ -141,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "f3dfe8ad-5c40-44b1-aab6-58491694da5d", "metadata": {}, "outputs": [ @@ -151,7 +151,7 @@ "Faidx(\"hg38/assembly/ucsc/hg38.fa\")" ] }, - "execution_count": 6, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -162,10 +162,18 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "id": "e5fbf3da", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-06 09:35:29.222863: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10520 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1\n" + ] + } + ], "source": [ "#Model configuration\n", "\n", @@ -218,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "id": "6f010781", "metadata": { "scrolled": true @@ -246,7 +254,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "id": "12df90e4", "metadata": {}, "outputs": [], @@ -279,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 7, "id": "073e4711", "metadata": {}, "outputs": [ @@ -303,8 +311,8 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "4ad40138", + "execution_count": 8, + "id": "9d715d82", "metadata": { "scrolled": true }, @@ -313,9 +321,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-25 10:47:49.900745: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", - "2024-09-25 10:47:52.112099: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", - "2024-09-25 10:47:54.815324: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" + "2024-10-05 15:08:40.291698: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", + "2024-10-05 15:08:40.364169: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", + "2024-10-05 15:08:40.675239: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" ] }, { @@ -386,8 +394,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1min 6s, sys: 941 ms, total: 1min 7s\n", - "Wall time: 1min 18s\n" + "CPU times: user 57.7 s, sys: 937 ms, total: 58.7 s\n", + "Wall time: 1min 1s\n" ] } ], @@ -428,7 +436,7 @@ "bin_size = 32\n", "pad = 16\n", "\n", - "rescale_tracks = True\n", + "untransform_old = True\n", "normalize_counts = False\n", "\n", "anno_df = None #splice_df\n", @@ -467,12 +475,12 @@ " normalize_window=1 * plot_window,\n", " bin_size=bin_size,\n", " pad=pad,\n", - " rescale_tracks=rescale_tracks,\n", " normalize_counts=normalize_counts,\n", " save_figs=save_figs,\n", " save_suffix=save_suffix,\n", " gene_slice=gene_slice,\n", " anno_df=anno_df,\n", + " untransform_old=untransform_old,\n", ")\n" ] }, @@ -486,12 +494,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "1/1 [==============================] - 419s 419s/step\n", - "1/1 [==============================] - 220s 220s/step\n", - "1/1 [==============================] - 414s 414s/step\n", - "1/1 [==============================] - 220s 220s/step\n", - "CPU times: user 18min 50s, sys: 2min 46s, total: 21min 36s\n", - "Wall time: 23min 2s\n" + "1/1 [==============================] - 394s 394s/step\n", + "1/1 [==============================] - 206s 206s/step\n", + "1/1 [==============================] - 387s 387s/step\n", + "1/1 [==============================] - 206s 206s/step\n", + "CPU times: user 17min 35s, sys: 2min 38s, total: 20min 13s\n", + "Wall time: 22min 9s\n" ] } ], @@ -517,6 +525,7 @@ " use_logodds=False,\n", " subtract_avg=True,\n", " fold_index=np.arange(n_reps).tolist(),\n", + " untransform_old=True,\n", ")\n" ] }, @@ -570,14 +579,14 @@ " plot_start=(poses[0] - start) - 64,\n", " plot_end=(poses[0] - start) + 64,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_snp_4_reps_gtex_blood_cov_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_blood'\n", ")\n" ] }, { "cell_type": "code", "execution_count": 11, - "id": "d82c74db", + "id": "1920b078", "metadata": {}, "outputs": [ { @@ -586,8 +595,8 @@ "text": [ "example_ix = 0\n", "example_ix = 1\n", - "CPU times: user 16min 57s, sys: 1min 30s, total: 18min 28s\n", - "Wall time: 33min 52s\n" + "CPU times: user 12min 6s, sys: 2min 19s, total: 14min 26s\n", + "Wall time: 33min 48s\n" ] } ], @@ -613,6 +622,7 @@ " use_mean=True,\n", " use_ratio=False,\n", " use_logodds=False,\n", + " untransform_old=True,\n", ")\n" ] }, @@ -666,14 +676,14 @@ " plot_start=(poses[0] - start) - 64,\n", " plot_end=(poses[0] - start) + 64,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_snp_4_reps_gtex_blood_cov_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_blood'\n", ")\n" ] }, { "cell_type": "code", "execution_count": 13, - "id": "37e9f717", + "id": "67b3df59", "metadata": {}, "outputs": [ { @@ -682,8 +692,8 @@ "text": [ "example_ix = 0\n", "example_ix = 1\n", - "CPU times: user 1h 6min 55s, sys: 6min 8s, total: 1h 13min 3s\n", - "Wall time: 2h 15min 9s\n" + "CPU times: user 48min 34s, sys: 9min 1s, total: 57min 35s\n", + "Wall time: 2h 15min\n" ] } ], @@ -711,6 +721,7 @@ " use_mean=True,\n", " use_ratio=False,\n", " use_logodds=False,\n", + " untransform_old=True,\n", ")\n" ] }, @@ -764,13 +775,13 @@ " plot_start=(poses[0] - start) - 64,\n", " plot_end=(poses[0] - start) + 64,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_snp_4_reps_gtex_blood_cov_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_blood'\n", ")\n" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 8, "id": "17a8b494", "metadata": {}, "outputs": [], @@ -785,7 +796,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 9, "id": "738424ba", "metadata": {}, "outputs": [ @@ -796,8 +807,8 @@ "gtex_targets_wt.shape = (16352, 32)\n", "gtex_targets_mut.shape = (16352, 32)\n", "-- Counts --\n", - " - sum_wt = 27668.607\n", - " - sum_mut = 37653.69\n", + " - sum_wt = 28239.895\n", + " - sum_mut = 38557.145\n", " - max_y_wt = 241.0508\n", " - max_y_mut = 370.9016\n", " -- (max_y = 370.9016)\n" @@ -805,7 +816,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -829,13 +840,13 @@ "start = center_pos - seq_len // 2\n", "end = center_pos + seq_len // 2\n", "\n", - "blacklist_bed = \"/home/drk/common/data/genomes/hg38/blacklist/blacklist_hg38_all.bed\"\n", + "blacklist_bed = \"hg38/blacklist/blacklist_hg38_all.bed\"\n", "\n", "read_coverage_func_wt, close_coverage_func_wt = get_coverage_reader(cov_files_wt, 16384, 16, blacklist_bed)\n", "read_coverage_func_mut, close_coverage_func_mut = get_coverage_reader(cov_files_mut, 16384, 16, blacklist_bed)\n", "\n", - "gtex_targets_wt = read_coverage_func_wt(chrom, start, end, clip_soft=384., clip=768., scale=0.01)\n", - "gtex_targets_mut = read_coverage_func_mut(chrom, start, end, clip_soft=384., clip=768., scale=0.01)\n", + "gtex_targets_wt = read_coverage_func_wt(chrom, start, end, clip_soft=384., clip=768., scale=0.01, transform_old=True)\n", + "gtex_targets_mut = read_coverage_func_mut(chrom, start, end, clip_soft=384., clip=768., scale=0.01, transform_old=True)\n", "\n", "close_coverage_func_wt()\n", "close_coverage_func_mut()\n", @@ -849,7 +860,7 @@ "\n", "anno_df = None #splice_df\n", "\n", - "rescale_tracks = True\n", + "untransform_old = True\n", "normalize_counts = True\n", "\n", "#Tracks\n", @@ -890,12 +901,12 @@ " normalize_window=4 * plot_window,\n", " bin_size=bin_size,\n", " pad=pad,\n", - " rescale_tracks=rescale_tracks,\n", " normalize_counts=normalize_counts,\n", " save_figs=save_figs,\n", " save_suffix=save_suffix,\n", " gene_slice=gene_slice,\n", " anno_df=anno_df,\n", + " untransform_old=untransform_old,\n", ")\n" ] }, @@ -910,9 +921,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python [conda env:borzoi_py310_new2]", "language": "python", - "name": "python3" + "name": "conda-env-borzoi_py310_new2-py" }, "language_info": { "codemirror_mode": { @@ -924,7 +935,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.15" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/examples/borzoi_example_eqtl_chr10_116952944_T_C_fancy.ipynb b/examples/borzoi_example_eqtl_chr10_116952944_T_C_fancy.ipynb new file mode 100644 index 0000000..4414dee --- /dev/null +++ b/examples/borzoi_example_eqtl_chr10_116952944_T_C_fancy.ipynb @@ -0,0 +1,1063 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7dbf2734", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-06 17:20:38.510317: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-06 17:20:38.510373: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-06 17:20:38.511404: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-06 17:20:38.518121: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-10-06 17:20:40.222709: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + ] + } + ], + "source": [ + "import json\n", + "import os\n", + "import time\n", + "import warnings\n", + "\n", + "import h5py\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.patches as patches\n", + "import numpy as np\n", + "import pandas as pd\n", + "import pysam\n", + "import pyfaidx\n", + "import pybedtools\n", + "import csv\n", + "import tensorflow as tf\n", + "\n", + "from baskerville import seqnn\n", + "from baskerville import gene as bgene\n", + "from baskerville import dna\n", + "\n", + "from borzoi_helpers import *\n", + "\n", + "tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)\n", + "#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a6315e46-79ce-4653-ba71-242e74516b47", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "f3c0 model already exists.\n", + "f3c1 model already exists.\n", + "f3c2 model already exists.\n", + "f3c3 model already exists.\n", + "Gene annotation already exists.\n", + "Gene annotation (no read-through, protein-coding) already exists.\n", + "Gene annotation (protein-coding) already exists.\n", + "TSS annotation already exists.\n", + "Splice site annotation already exist.\n", + "Splice site annotation already exist.\n", + "PolyA site annotation already exist.\n", + "Human genome FASTA already exists.\n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "#Download model weights (data fold 3, 4 replicates)\n", + "for rep in f3c0,f0 f3c1,f1 f3c2,f2 f3c3,f3; do IFS=\",\"; set -- $rep; \n", + " mkdir -p \"saved_models/$1/train\"\n", + " local_model=\"saved_models/$1/train/model0_best.h5\"\n", + " if [ -f \"$local_model\" ]; then\n", + " echo \"$1 model already exists.\"\n", + " else\n", + " wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$2/model0_best.h5\" -O \"$local_model\"\n", + " fi\n", + "done\n", + "\n", + "#Download and uncompress annotation files\n", + "mkdir -p hg38/genes/gencode41\n", + "mkdir -p hg38/genes/polyadb\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_nort.gtf ]; then\n", + " echo \"Gene annotation already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_nort.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_nort_protein.gtf ]; then\n", + " echo \"Gene annotation (no read-through, protein-coding) already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort_protein.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_nort_protein.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein.gtf ]; then\n", + " echo \"Gene annotation (protein-coding) already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_protein.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_tss2.bed ]; then\n", + " echo \"TSS annotation already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_tss2.bed.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_tss2.bed\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz ]; then\n", + " echo \"Splice site annotation already exist.\"\n", + "else\n", + " wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz -O hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein_splice.gff ]; then\n", + " echo \"Splice site annotation already exist.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.gff.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_protein_splice.gff\n", + "fi\n", + "\n", + "if [ -f hg38/genes/polyadb/polyadb_human_v3.csv.gz ]; then\n", + " echo \"PolyA site annotation already exist.\"\n", + "else\n", + " wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz -O hg38/genes/polyadb/polyadb_human_v3.csv.gz\n", + "fi\n", + "\n", + "#Download and index hg38 genome\n", + "mkdir -p hg38/assembly/ucsc\n", + "\n", + "if [ -f hg38/assembly/ucsc/hg38.fa ]; then\n", + " echo \"Human genome FASTA already exists.\"\n", + "else\n", + " wget -O - http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38/assembly/ucsc/hg38.fa\n", + "fi\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f3dfe8ad-5c40-44b1-aab6-58491694da5d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Faidx(\"hg38/assembly/ucsc/hg38.fa\")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pyfaidx.Faidx('hg38/assembly/ucsc/hg38.fa')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e5fbf3da", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-06 17:22:20.829701: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10520 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1\n" + ] + } + ], + "source": [ + "#Model configuration\n", + "\n", + "params_file = 'params_pred.json'\n", + "targets_file = 'targets_gtex.txt' #Subset of targets_human.txt\n", + "\n", + "seq_len = 524288\n", + "n_reps = 1 #To use only one model replicate, set to 'n_reps = 1'. To use all four replicates, set 'n_reps = 4'.\n", + "rc = True #Average across reverse-complement prediction\n", + "\n", + "#Read model parameters\n", + "\n", + "with open(params_file) as params_open :\n", + " \n", + " params = json.load(params_open)\n", + " \n", + " params_model = params['model']\n", + " params_train = params['train']\n", + "\n", + "#Remove cropping\n", + "params_model['trunk'][-2]['cropping'] = 0\n", + "\n", + "#Read targets\n", + "\n", + "targets_df = pd.read_csv(targets_file, index_col=0, sep='\\t')\n", + "target_index = targets_df.index\n", + "\n", + "#Create local index of strand_pair (relative to sliced targets)\n", + "if rc :\n", + " strand_pair = targets_df.strand_pair\n", + " \n", + " target_slice_dict = {ix : i for i, ix in enumerate(target_index.values.tolist())}\n", + " slice_pair = np.array([\n", + " target_slice_dict[ix] if ix in target_slice_dict else ix for ix in strand_pair.values.tolist()\n", + " ], dtype='int32')\n", + "\n", + "#Initialize model ensemble\n", + "\n", + "models = []\n", + "for rep_ix in range(n_reps) :\n", + " \n", + " model_file = \"saved_models/f3c\" + str(rep_ix) + \"/train/model0_best.h5\"\n", + "\n", + " seqnn_model = seqnn.SeqNN(params_model)\n", + " seqnn_model.restore(model_file, 0)\n", + " seqnn_model.build_slice(target_index)\n", + " if rc :\n", + " seqnn_model.strand_pair.append(slice_pair)\n", + " seqnn_model.build_ensemble(rc, [0])\n", + " \n", + " models.append(seqnn_model)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e70d467b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "len(apa_df_utr) = 114605\n", + "len(apa_df_intron) = 83473\n", + "len(tss_df) = 116649\n" + ] + } + ], + "source": [ + "#Load genome fasta and gene annotations\n", + "\n", + "#Initialize fasta sequence extractor\n", + "fasta_open = pysam.Fastafile('hg38/assembly/ucsc/hg38.fa')\n", + "\n", + "#Load gene/exon annotation\n", + "gtf_file = 'hg38/genes/gencode41/gencode41_basic_nort_protein.gtf'\n", + "\n", + "transcriptome = bgene.Transcriptome(gtf_file)\n", + "\n", + "#Get gene span bedtool\n", + "bedt_span = transcriptome.bedtool_span()\n", + "\n", + "#Load APA atlas\n", + "apa_df = pd.read_csv('hg38/genes/polyadb/polyadb_human_v3.csv.gz', sep='\\t', compression='gzip')\n", + "apa_df = apa_df[['pas_id', 'gene', 'chrom', 'position_hg38', 'strand', 'site_num', 'num_sites', 'site_type', 'pas_type', 'total_count']]\n", + "\n", + "apa_df.loc[apa_df['pas_type'] == 'NoPAS', 'pas_type'] = 'No_CSE'\n", + "\n", + "#Only consider 3' UTR sites\n", + "apa_df_utr = apa_df.query(\"site_type == '3\\\\' most exon'\").copy().reset_index(drop=True)\n", + "\n", + "#Or intronic sites\n", + "apa_df_intron = apa_df.query(\"site_type == 'Intron' and pas_type != 'No_CSE'\").copy().reset_index(drop=True)\n", + "\n", + "print(\"len(apa_df_utr) = \" + str(len(apa_df_utr)))\n", + "print(\"len(apa_df_intron) = \" + str(len(apa_df_intron)))\n", + "\n", + "#Load TSS atlas\n", + "tss_df = pd.read_csv('hg38/genes/gencode41/gencode41_basic_tss2.bed', sep='\\t', names=['chrom', 'position_hg38', 'end', 'tss_id', 'feat1', 'strand'])\n", + "tss_df['gene'] = tss_df['tss_id'].apply(lambda x: x.split(\"/\")[1] if \"/\" in x else x)\n", + "\n", + "print(\"len(tss_df) = \" + str(len(tss_df)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2417da7c", + "metadata": {}, + "outputs": [], + "source": [ + "#Get reference/alternate sequence for variant, and annotations for target gene\n", + "\n", + "search_gene = 'ENSG00000187164'\n", + "\n", + "center_pos = 116952944\n", + "\n", + "chrom = 'chr10'\n", + "poses = [116952944]\n", + "alts = ['C']\n", + "\n", + "start = center_pos - seq_len // 2\n", + "end = center_pos + seq_len // 2\n", + "\n", + "load_isoforms = True\n", + "\n", + "#Get exon bin range\n", + "gene_keys = [gene_key for gene_key in transcriptome.genes.keys() if search_gene in gene_key]\n", + "\n", + "gene = transcriptome.genes[gene_keys[0]]\n", + "gene_strand = gene.strand\n", + "\n", + "if chrom is None or start is None or end is None :\n", + " chrom = gene.chrom\n", + " g_start, g_end = gene.span()\n", + " mid = (g_start + g_end) // 2\n", + " start = mid - seq_len // 2\n", + " end = mid + seq_len // 2\n", + "\n", + "#Determine output sequence start\n", + "seq_out_start = start + seqnn_model.model_strides[0]*seqnn_model.target_crops[0]\n", + "seq_out_len = seqnn_model.model_strides[0]*seqnn_model.target_lengths[0]\n", + "\n", + "#Determine output positions of gene exons\n", + "gene_slice = gene.output_slice(seq_out_start, seq_out_len, seqnn_model.model_strides[0], False, old_version=True)\n", + "\n", + "#Get sequence bedtool\n", + "seq_bedt = pybedtools.BedTool('%s %d %d' % (chrom, start, end), from_string=True)\n", + "\n", + "#Get all genes (exons and strands) overlapping input window\n", + "gene_ids = sorted(list(set([overlap[3] for overlap in bedt_span.intersect(seq_bedt, wo=True) if search_gene not in overlap[3]])))\n", + "gene_slices = []\n", + "gene_strands = []\n", + "for gene_id in gene_ids :\n", + " gene_slices.append(transcriptome.genes[gene_id].output_slice(seq_out_start, seq_out_len, seqnn_model.model_strides[0], False, old_version=True))\n", + " gene_strands.append(transcriptome.genes[gene_id].strand)\n", + "\n", + "#Get 3' UTR pA sites for gene\n", + "apa_df_gene_utr = apa_df_utr.query(\"gene == '\" + gene.name + \"'\").copy().reset_index(drop=True)[['chrom', 'gene', 'strand', 'position_hg38']]\n", + "apa_df_gene_intron = apa_df_intron.query(\"gene == '\" + gene.name + \"'\").copy().reset_index(drop=True)[['chrom', 'gene', 'strand', 'position_hg38']]\n", + "\n", + "#Get TSS sites for gene\n", + "tss_df_gene = tss_df.loc[tss_df['gene'].str.contains(search_gene)].copy().reset_index(drop=True)[['chrom', 'gene', 'strand', 'position_hg38']]\n", + "\n", + "def _switch_transcript_id(id_str) :\n", + " return id_str.replace(\"gene_id\", \"gene_id_orig\").replace(\"transcript_id\", \"gene_id\")\n", + "\n", + "#Get gene isoforms\n", + "isoform_slices = None\n", + "if load_isoforms :\n", + " gtf_df = pd.read_csv(gtf_file, sep='\\t', skiprows=5, names=['chrom', 'havana_str', 'feature', 'start', 'end', 'feat1', 'strand', 'feat2', 'id_str'])\n", + " gtf_df = gtf_df.loc[gtf_df['id_str'].str.contains(search_gene)].copy().reset_index(drop=True)\n", + " gtf_df = gtf_df.loc[gtf_df['id_str'].str.contains(\"transcript_id\")].copy().reset_index(drop=True)\n", + " gtf_df = gtf_df.loc[gtf_df['feature'] == 'exon'].copy().reset_index(drop=True)\n", + " \n", + " transcript_ids = gtf_df['id_str'].apply(lambda x: x.split(\"transcript_id \\\"\")[1].split(\"\\\";\")[0]).unique().tolist()\n", + " gtf_df['id_str'] = gtf_df['id_str'].apply(_switch_transcript_id)\n", + " \n", + " gtf_df.to_csv('borzoi_gene_isoforms.gtf', sep='\\t', index=False, header=False, quoting=csv.QUOTE_NONE)\n", + " \n", + " transcriptome_iso = bgene.Transcriptome('borzoi_gene_isoforms.gtf')\n", + " \n", + " isoform_slices = []\n", + " for transcript_id in transcript_ids :\n", + " isoform_slices.append(transcriptome_iso.genes[transcript_id].output_slice(seq_out_start, seq_out_len, seqnn_model.model_strides[0], False, old_version=True))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7e86cc79", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-06 17:23:21.136047: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", + "2024-10-06 17:23:21.204776: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", + "2024-10-06 17:23:21.514564: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 17.1 s, sys: 286 ms, total: 17.4 s\n", + "Wall time: 59.2 s\n" + ] + } + ], + "source": [ + "%%time\n", + "#Predict for chr10_116952944_T_C\n", + "# (~6 minutes on CPU w 1 replicate; ~2 minutes on GPU)\n", + "\n", + "save_figs = False\n", + "save_suffix = '_chr10_116952944_T_C'\n", + "\n", + "sequence_one_hot_wt = process_sequence(fasta_open, chrom, start, end)\n", + "\n", + "#Induce mutation(s)\n", + "sequence_one_hot_mut = np.copy(sequence_one_hot_wt)\n", + "\n", + "for pos, alt in zip(poses, alts) :\n", + " alt_ix = -1\n", + " if alt == 'A' :\n", + " alt_ix = 0\n", + " elif alt == 'C' :\n", + " alt_ix = 1\n", + " elif alt == 'G' :\n", + " alt_ix = 2\n", + " elif alt == 'T' :\n", + " alt_ix = 3\n", + "\n", + " sequence_one_hot_mut[pos-start-1] = 0.\n", + " sequence_one_hot_mut[pos-start-1, alt_ix] = 1.\n", + "\n", + "#Make predictions\n", + "y_wt = predict_tracks(models, sequence_one_hot_wt)\n", + "y_mut = predict_tracks(models, sequence_one_hot_mut)\n", + "\n", + "\n", + "#Visualize coverage tracks\n", + "plot_start = seq_len // 2 - 131072 // 2\n", + "plot_end = seq_len // 2 + 131072 // 2\n", + "bin_size = 32\n", + "pad = 0\n", + "\n", + "#Tracks\n", + "track_indices = [\n", + " np.nonzero((targets_df['description'] == 'RNA:blood').values)[0].tolist(),\n", + "]\n", + "\n", + "track_names = [\n", + " 'GTEx Blood',\n", + "]\n", + "\n", + "track_colors = [\n", + " ['deepskyblue', 'red'],\n", + "]\n", + "\n", + "track_labels = [\n", + " ['Ref', 'Alt'],\n", + "]\n", + "\n", + "track_scale = 0.01\n", + "track_transform = 3./4.\n", + "soft_clip = 384.\n", + "\n", + "untransform_old = True\n", + "\n", + "#Plot coverage\n", + "plot_coverage_tracks(\n", + " y_wt,\n", + " track_indices,\n", + " track_names,\n", + " track_colors,\n", + " track_labels,\n", + " track_scale,\n", + " track_transform,\n", + " soft_clip,\n", + " start,\n", + " y_2_in=y_mut,\n", + " plot_pair=True,\n", + " pair_order=[1, 0],\n", + " pair_alpha=1.,\n", + " log_scale=False,\n", + " same_scale=True,\n", + " plot_start_rel=plot_start,\n", + " plot_end_rel=plot_end,\n", + " bin_size=bin_size,\n", + " pad=pad,\n", + " save_figs=save_figs,\n", + " save_suffix=save_suffix,\n", + " gene_slice=gene_slice,\n", + " gene_slices=gene_slices,\n", + " isoform_slices=isoform_slices,\n", + " gene_strand=gene_strand,\n", + " chrom=chrom,\n", + " search_gene=search_gene,\n", + " gene_strands=gene_strands,\n", + " apa_df_gene_utr=apa_df_gene_utr,\n", + " apa_df_gene_intron=apa_df_gene_intron,\n", + " tss_df_gene=tss_df_gene,\n", + " annotate_utr_apa=False,\n", + " annotate_intron_apa=False,\n", + " annotate_tss=False,\n", + " plot_strands=True,\n", + " plot_other_genes=False,\n", + " plot_other_gene_strands=False,\n", + " plot_isoforms=False,\n", + " plot_isoform_strands=False,\n", + " gene_color='black',\n", + " isoform_color='dimgray',\n", + " other_gene_color='black',\n", + " max_isoforms=5,\n", + " isoform_height_frac=0.,\n", + " plot_as_bars=False,\n", + " fig_size=(10, 1.5),\n", + " untransform_old=untransform_old,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "0ebeb2f5", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Re-plot coverage tracks and annotate isoforms\n", + "\n", + "plot_coverage_tracks(\n", + " y_wt,\n", + " track_indices,\n", + " track_names,\n", + " track_colors,\n", + " track_labels,\n", + " track_scale,\n", + " track_transform,\n", + " soft_clip,\n", + " start,\n", + " y_2_in=y_mut,\n", + " plot_pair=True,\n", + " pair_order=[1, 0],\n", + " pair_alpha=1.,\n", + " log_scale=False,\n", + " same_scale=True,\n", + " plot_start_rel=plot_start,\n", + " plot_end_rel=plot_end,\n", + " bin_size=bin_size,\n", + " pad=pad,\n", + " save_figs=save_figs,\n", + " save_suffix=save_suffix,\n", + " gene_slice=gene_slice,\n", + " gene_slices=gene_slices,\n", + " isoform_slices=isoform_slices,\n", + " gene_strand=gene_strand,\n", + " chrom=chrom,\n", + " search_gene=search_gene,\n", + " gene_strands=gene_strands,\n", + " apa_df_gene_utr=apa_df_gene_utr,\n", + " apa_df_gene_intron=apa_df_gene_intron,\n", + " tss_df_gene=tss_df_gene,\n", + " annotate_utr_apa=False,\n", + " annotate_intron_apa=False,\n", + " annotate_tss=False,\n", + " plot_strands=True,\n", + " plot_other_genes=False,\n", + " plot_other_gene_strands=False,\n", + " plot_isoforms=True,\n", + " plot_isoform_strands=True,\n", + " gene_color='black',\n", + " isoform_color='dimgray',\n", + " other_gene_color='black',\n", + " max_isoforms=5,\n", + " isoform_height_frac=0.5,\n", + " plot_as_bars=False,\n", + " fig_size=(10, 2),\n", + " untransform_old=untransform_old,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "046d763d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1/1 [==============================] - 394s 394s/step\n", + "1/1 [==============================] - 206s 206s/step\n", + "1/1 [==============================] - 387s 387s/step\n", + "1/1 [==============================] - 206s 206s/step\n", + "CPU times: user 17min 35s, sys: 2min 38s, total: 20min 13s\n", + "Wall time: 22min 9s\n" + ] + } + ], + "source": [ + "%%time\n", + "#Get contribution scores (gradient) for blood GTEX tracks\n", + "# (~20 minutes on CPU; otherwise runnable only on 40GB GPU cards, e.g. A100)\n", + "\n", + "_, _, [pred_grad_wt, pred_grad_mut] = get_prediction_gradient_w_rc(\n", + " models,\n", + " [sequence_one_hot_wt, sequence_one_hot_mut],\n", + " prox_bin_start=0,\n", + " prox_bin_end=1,\n", + " dist_bin_start=0,\n", + " dist_bin_end=1,\n", + " track_index=target_index[[9, 10, 11]].tolist(),\n", + " track_scale=0.01,\n", + " track_transform=3./4.,\n", + " clip_soft=384.,\n", + " dist_bin_index=gene_slice.tolist(),\n", + " use_mean=False,\n", + " use_ratio=False,\n", + " use_logodds=False,\n", + " subtract_avg=True,\n", + " fold_index=np.arange(n_reps).tolist(),\n", + " untransform_old=True,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "199003f5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- WT ---\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- Mut ---\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize gradient contribution scores (128 bp window centered on variant)\n", + "\n", + "visualize_input_gradient_pair(\n", + " pred_grad_wt,\n", + " pred_grad_mut,\n", + " plot_start=(poses[0] - start) - 64,\n", + " plot_end=(poses[0] - start) + 64,\n", + " save_figs=False,\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_blood'\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "1920b078", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "example_ix = 0\n", + "example_ix = 1\n", + "CPU times: user 12min 6s, sys: 2min 19s, total: 14min 26s\n", + "Wall time: 33min 48s\n" + ] + } + ], + "source": [ + "%%time\n", + "#Get contribution scores (ISM) for blood GTEX tracks\n", + "# (not feasible to run on CPU; ~33 minutes on GPU)\n", + "\n", + "[pred_ism_wt, pred_ism_mut] = get_ism(\n", + " models,\n", + " [sequence_one_hot_wt, sequence_one_hot_mut],\n", + " ism_start=(poses[0] - start) - 64,\n", + " ism_end=(poses[0] - start) + 64,\n", + " prox_bin_start=0,\n", + " prox_bin_end=1,\n", + " dist_bin_start=0,\n", + " dist_bin_end=1,\n", + " track_index=[9, 10, 11],\n", + " track_scale=0.01,\n", + " track_transform=3./4.,\n", + " clip_soft=384.,\n", + " dist_bin_index=gene_slice.tolist(),\n", + " use_mean=True,\n", + " use_ratio=False,\n", + " use_logodds=False,\n", + " untransform_old=True,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "0b2ecd61", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- WT ---\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- Mut ---\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize ISM contribution scores (128 bp window centered on variant)\n", + "\n", + "visualize_input_gradient_pair(\n", + " pred_ism_wt,\n", + " pred_ism_mut,\n", + " plot_start=(poses[0] - start) - 64,\n", + " plot_end=(poses[0] - start) + 64,\n", + " save_figs=False,\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_blood'\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "67b3df59", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "example_ix = 0\n", + "example_ix = 1\n", + "CPU times: user 48min 34s, sys: 9min 1s, total: 57min 35s\n", + "Wall time: 2h 15min\n" + ] + } + ], + "source": [ + "%%time\n", + "#Get contribution scores (ISM shuffle) for blood GTEX tracks\n", + "# (not feasible to run on CPU; ~135 minutes on GPU)\n", + "\n", + "[pred_ism_wt, pred_ism_mut] = get_ism_shuffle(\n", + " models,\n", + " [sequence_one_hot_wt, sequence_one_hot_mut],\n", + " ism_start=(poses[0] - start) - 64,\n", + " ism_end=(poses[0] - start) + 64,\n", + " prox_bin_start=0,\n", + " prox_bin_end=1,\n", + " dist_bin_start=0,\n", + " dist_bin_end=1,\n", + " track_index=[9, 10, 11],\n", + " track_scale=0.01,\n", + " track_transform=3./4.,\n", + " clip_soft=384.,\n", + " window_size=5,\n", + " n_samples=12,\n", + " dist_bin_index=gene_slice.tolist(),\n", + " use_mean=True,\n", + " use_ratio=False,\n", + " use_logodds=False,\n", + " untransform_old=True,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "a7fcc94b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- WT ---\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- Mut ---\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize ISM Shuffle contribution scores (128 bp window centered on variant)\n", + "\n", + "visualize_input_gradient_pair(\n", + " pred_ism_wt,\n", + " pred_ism_mut,\n", + " plot_start=(poses[0] - start) - 64,\n", + " plot_end=(poses[0] - start) + 64,\n", + " save_figs=False,\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_blood'\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "17a8b494", + "metadata": {}, + "outputs": [], + "source": [ + "#Load samples with reference- and alternate alleles respectively\n", + "\n", + "#These files are protected by dbGaP - email to request access\n", + "\n", + "cov_files_wt = pd.read_csv(\"gtex_ref_chr10_116952944_T_C.txt\", sep='\\t', names=['file'])['file'].values.tolist()\n", + "cov_files_mut = pd.read_csv(\"gtex_alt_chr10_116952944_T_C.txt\", sep='\\t', names=['file'])['file'].values.tolist()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "738424ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gtex_targets_wt.shape = (16384, 32)\n", + "gtex_targets_mut.shape = (16384, 32)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize measured coverage tracks for chr10_116952944_T_C\n", + "\n", + "save_figs = False\n", + "save_suffix = '_chr10_116952944_T_C_meas_32_subjects'\n", + "\n", + "center_pos = 116952944\n", + "\n", + "chrom = 'chr10'\n", + "poses = [116952944]\n", + "alts = ['C']\n", + "\n", + "start = center_pos - seq_len // 2\n", + "end = center_pos + seq_len // 2\n", + "\n", + "blacklist_bed = \"hg38/blacklist/blacklist_hg38_all.bed\"\n", + "\n", + "read_coverage_func_wt, close_coverage_func_wt = get_coverage_reader(cov_files_wt, 16384, 0, blacklist_bed)\n", + "read_coverage_func_mut, close_coverage_func_mut = get_coverage_reader(cov_files_mut, 16384, 0, blacklist_bed)\n", + "\n", + "gtex_targets_wt = read_coverage_func_wt(chrom, start, end, clip_soft=384., clip=768., scale=0.01, transform_old=True)\n", + "gtex_targets_mut = read_coverage_func_mut(chrom, start, end, clip_soft=384., clip=768., scale=0.01, transform_old=True)\n", + "\n", + "close_coverage_func_wt()\n", + "close_coverage_func_mut()\n", + "\n", + "print(\"gtex_targets_wt.shape = \" + str(gtex_targets_wt.shape))\n", + "print(\"gtex_targets_mut.shape = \" + str(gtex_targets_mut.shape))\n", + "\n", + "#Visualize coverage tracks\n", + "plot_start = seq_len // 2 - 131072 // 2\n", + "plot_end = seq_len // 2 + 131072 // 2\n", + "bin_size = 32\n", + "pad = 0\n", + "\n", + "#Tracks\n", + "track_indices = [\n", + " np.arange(gtex_targets_wt.shape[1], dtype='int32').tolist(),\n", + "]\n", + "\n", + "track_names = [\n", + " 'GTEx Blood',\n", + "]\n", + "\n", + "track_colors = [\n", + " ['deepskyblue', 'red'],\n", + "]\n", + "\n", + "track_labels = [\n", + " ['Ref', 'Alt'],\n", + "]\n", + "\n", + "track_scale = 0.01\n", + "track_transform = 3./4.\n", + "soft_clip = 384.\n", + "\n", + "untransform_old = True\n", + "\n", + "#Plot coverage\n", + "plot_coverage_tracks(\n", + " gtex_targets_wt[None, None, ...],\n", + " track_indices,\n", + " track_names,\n", + " track_colors,\n", + " track_labels,\n", + " track_scale,\n", + " track_transform,\n", + " soft_clip,\n", + " start,\n", + " y_2_in=gtex_targets_mut[None, None, ...],\n", + " plot_pair=True,\n", + " pair_order=[1, 0],\n", + " pair_alpha=1.,\n", + " log_scale=False,\n", + " same_scale=True,\n", + " plot_start_rel=plot_start,\n", + " plot_end_rel=plot_end,\n", + " normalize_counts=True,\n", + " normalize_start_rel=0 + pad * bin_size,\n", + " normalize_end_rel=524288 - pad * bin_size,\n", + " bin_size=bin_size,\n", + " pad=pad,\n", + " save_figs=save_figs,\n", + " save_suffix=save_suffix,\n", + " gene_slice=gene_slice,\n", + " gene_slices=gene_slices,\n", + " isoform_slices=isoform_slices,\n", + " gene_strand=gene_strand,\n", + " chrom=chrom,\n", + " search_gene=search_gene,\n", + " gene_strands=gene_strands,\n", + " apa_df_gene_utr=apa_df_gene_utr,\n", + " apa_df_gene_intron=apa_df_gene_intron,\n", + " tss_df_gene=tss_df_gene,\n", + " annotate_utr_apa=False,\n", + " annotate_intron_apa=False,\n", + " annotate_tss=False,\n", + " plot_strands=True,\n", + " plot_other_genes=False,\n", + " plot_other_gene_strands=False,\n", + " plot_isoforms=False,\n", + " plot_isoform_strands=False,\n", + " gene_color='black',\n", + " isoform_color='dimgray',\n", + " other_gene_color='black',\n", + " max_isoforms=5,\n", + " isoform_height_frac=0.,\n", + " plot_as_bars=False,\n", + " fig_size=(10, 1.5),\n", + " untransform_old=untransform_old,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "671a9c8e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/borzoi_example_ipaqtl_chr10_116664061_G_A.ipynb b/examples/borzoi_example_ipaqtl_chr10_116664061_G_A.ipynb index a412d73..26d3813 100644 --- a/examples/borzoi_example_ipaqtl_chr10_116664061_G_A.ipynb +++ b/examples/borzoi_example_ipaqtl_chr10_116664061_G_A.ipynb @@ -10,12 +10,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-26 18:00:42.776653: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-09-26 18:00:42.776733: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-09-26 18:00:42.777952: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-09-26 18:00:42.787432: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-10-06 10:14:14.263859: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-06 10:14:14.263912: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-06 10:14:14.264958: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-06 10:14:14.271837: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-09-26 18:00:44.230820: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-10-06 10:14:15.998526: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -170,7 +170,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-25 11:00:38.644220: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10232 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1\n" + "2024-10-06 10:14:18.639467: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10520 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1\n" ] } ], @@ -276,21 +276,12 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "id": "3bd4e6c7", "metadata": { "scrolled": true }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-09-25 11:00:55.770144: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", - "2024-09-25 11:00:55.859364: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", - "2024-09-25 11:00:56.180294: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -353,8 +344,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 21.5 s, sys: 395 ms, total: 21.9 s\n", - "Wall time: 24.7 s\n" + "CPU times: user 5.26 s, sys: 313 ms, total: 5.57 s\n", + "Wall time: 8.55 s\n" ] } ], @@ -404,7 +395,7 @@ "bin_size = 32\n", "pad = 16\n", "\n", - "rescale_tracks = True\n", + "untransform_old = True\n", "normalize_counts = False\n", "\n", "anno_df = splice_df\n", @@ -441,12 +432,12 @@ "]\n", "\n", "print(\"-- Counts --\")\n", - "plot_coverage_track_pair_bins(y_wt, y_mut, chrom, start, center_pos, poses, track_indices, track_names, track_scales, track_transforms, soft_clips, plot_window=plot_window, normalize_window=8*plot_window, bin_size=bin_size, pad=pad, rescale_tracks=rescale_tracks, normalize_counts=normalize_counts, save_figs=save_figs, save_suffix=save_suffix, anno_df=anno_df)\n" + "plot_coverage_track_pair_bins(y_wt, y_mut, chrom, start, center_pos, poses, track_indices, track_names, track_scales, track_transforms, soft_clips, plot_window=plot_window, normalize_window=8*plot_window, bin_size=bin_size, pad=pad, normalize_counts=normalize_counts, save_figs=save_figs, save_suffix=save_suffix, anno_df=anno_df, untransform_old=untransform_old)\n" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "id": "f3bb0e25", "metadata": {}, "outputs": [ @@ -497,7 +488,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, "id": "9a310c06", "metadata": {}, "outputs": [], @@ -551,6 +542,7 @@ " use_logodds=False,\n", " subtract_avg=True,\n", " fold_index=np.arange(n_reps).tolist(),\n", + " untransform_old=True,\n", ")\n" ] }, @@ -604,7 +596,7 @@ " plot_start=seq_len - (poses[0] - start) - 64 - 1,\n", " plot_end=seq_len - (poses[0] - start) + 64 - 1,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_snp_4_reps_gtex_cov_nerve_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_nerve'\n", ")\n" ] }, @@ -645,7 +637,8 @@ " 384.,\n", " use_mean=False,\n", " use_ratio=True,\n", - " use_logodds=False\n", + " use_logodds=False,\n", + " untransform_old=True,\n", ")\n" ] }, @@ -699,7 +692,7 @@ " plot_start=seq_len - (poses[0] - start) - 64 - 1,\n", " plot_end=seq_len - (poses[0] - start) + 64 - 1,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_snp_4_reps_gtex_cov_nerve_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_nerve'\n", ")\n" ] }, @@ -742,7 +735,8 @@ " n_samples=12,\n", " use_mean=False,\n", " use_ratio=True,\n", - " use_logodds=False\n", + " use_logodds=False,\n", + " untransform_old=True,\n", ")\n" ] }, @@ -796,13 +790,13 @@ " plot_start=seq_len - (poses[0] - start) - 64 - 1,\n", " plot_end=seq_len - (poses[0] - start) + 64 - 1,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_snp_4_reps_gtex_cov_nerve_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_nerve'\n", ")\n" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "id": "35c74bd3", "metadata": {}, "outputs": [], @@ -858,6 +852,7 @@ " use_logodds=False,\n", " subtract_avg=True,\n", " fold_index=np.arange(n_reps).tolist(),\n", + " untransform_old=True,\n", ")\n" ] }, @@ -911,7 +906,7 @@ " plot_start=seq_len - (pas_pos - start) - 64 - 1,\n", " plot_end=seq_len - (pas_pos - start) + 64 - 1,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_snp_4_reps_gtex_pas_cov_nerve_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_nerve'\n", ")\n" ] }, @@ -952,7 +947,8 @@ " 384.,\n", " use_mean=False,\n", " use_ratio=True,\n", - " use_logodds=False\n", + " use_logodds=False,\n", + " untransform_old=True,\n", ")\n" ] }, @@ -1006,7 +1002,7 @@ " plot_start=seq_len - (pas_pos - start) - 64 - 1,\n", " plot_end=seq_len - (pas_pos - start) + 64 - 1,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_snp_4_reps_gtex_pas_cov_nerve_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_nerve'\n", ")\n" ] }, @@ -1051,7 +1047,8 @@ " n_samples=12,\n", " use_mean=False,\n", " use_ratio=True,\n", - " use_logodds=False\n", + " use_logodds=False,\n", + " untransform_old=True,\n", ")\n" ] }, @@ -1105,13 +1102,13 @@ " plot_start=seq_len - (pas_pos - start) - 64 - 1,\n", " plot_end=seq_len - (pas_pos - start) + 64 - 1,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_snp_4_reps_gtex_pas_cov_nerve_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_nerve'\n", ")\n" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "78d47c3a", "metadata": {}, "outputs": [], @@ -1170,13 +1167,13 @@ "start = center_pos - seq_len // 2\n", "end = center_pos + seq_len // 2\n", "\n", - "blacklist_bed = \"/home/drk/common/data/genomes/hg38/blacklist/blacklist_hg38_all.bed\"\n", + "blacklist_bed = \"hg38/blacklist/blacklist_hg38_all.bed\"\n", "\n", "read_coverage_func_wt, close_coverage_func_wt = get_coverage_reader(cov_files_wt, 16384, 16, blacklist_bed)\n", "read_coverage_func_mut, close_coverage_func_mut = get_coverage_reader(cov_files_mut, 16384, 16, blacklist_bed)\n", "\n", - "gtex_targets_wt = read_coverage_func_wt(chrom, start, end, clip_soft=384., clip=768., scale=0.01)\n", - "gtex_targets_mut = read_coverage_func_mut(chrom, start, end, clip_soft=384., clip=768., scale=0.01)\n", + "gtex_targets_wt = read_coverage_func_wt(chrom, start, end, clip_soft=384., clip=768., scale=0.01, transform_old=True)\n", + "gtex_targets_mut = read_coverage_func_mut(chrom, start, end, clip_soft=384., clip=768., scale=0.01, transform_old=True)\n", "\n", "close_coverage_func_wt()\n", "close_coverage_func_mut()\n", @@ -1188,7 +1185,7 @@ "bin_size = 32\n", "pad = 16\n", "\n", - "rescale_tracks = True\n", + "untransform_old = True\n", "normalize_counts = True\n", "\n", "#Tracks\n", @@ -1229,11 +1226,11 @@ " normalize_window=8*plot_window,\n", " bin_size=bin_size,\n", " pad=pad,\n", - " rescale_tracks=rescale_tracks,\n", " normalize_counts=normalize_counts,\n", " save_figs=save_figs,\n", " save_suffix=save_suffix,\n", - " anno_df=anno_df\n", + " anno_df=anno_df,\n", + " untransform_old=untransform_old,\n", ")\n" ] }, diff --git a/examples/borzoi_example_paqtl_chr1_236763042_A_G.ipynb b/examples/borzoi_example_paqtl_chr1_236763042_A_G.ipynb index 13b5ea0..c6d3f06 100644 --- a/examples/borzoi_example_paqtl_chr1_236763042_A_G.ipynb +++ b/examples/borzoi_example_paqtl_chr1_236763042_A_G.ipynb @@ -10,12 +10,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-26 18:01:09.612911: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-09-26 18:01:09.612989: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-09-26 18:01:09.614154: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-09-26 18:01:09.622849: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-10-06 09:44:30.273299: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-06 09:44:30.273355: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-06 09:44:30.274405: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-06 09:44:30.281200: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-09-26 18:01:11.655064: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-10-06 09:44:32.051541: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -172,7 +172,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-25 10:56:52.699671: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10232 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1\n" + "2024-10-06 09:53:30.683809: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10520 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1\n" ] } ], @@ -296,9 +296,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-25 10:57:41.889919: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", - "2024-09-25 10:57:41.984645: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", - "2024-09-25 10:57:42.317045: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" + "2024-10-06 09:54:14.772189: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", + "2024-10-06 09:54:14.845016: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", + "2024-10-06 09:54:15.159703: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" ] }, { @@ -363,8 +363,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 20.2 s, sys: 443 ms, total: 20.7 s\n", - "Wall time: 23.5 s\n" + "CPU times: user 18.1 s, sys: 682 ms, total: 18.8 s\n", + "Wall time: 21.7 s\n" ] } ], @@ -414,7 +414,7 @@ "bin_size = 32\n", "pad = 16\n", "\n", - "rescale_tracks = True\n", + "untransform_old = True\n", "normalize_counts = False\n", "\n", "anno_df = apa_df\n", @@ -453,11 +453,11 @@ " normalize_window=8*plot_window,\n", " bin_size=bin_size,\n", " pad=pad,\n", - " rescale_tracks=rescale_tracks,\n", " normalize_counts=normalize_counts,\n", " save_figs=save_figs,\n", " save_suffix=save_suffix,\n", - " anno_df=anno_df\n", + " anno_df=anno_df,\n", + " untransform_old=untransform_old,\n", ")\n" ] }, @@ -564,6 +564,7 @@ " use_logodds=False,\n", " subtract_avg=True,\n", " fold_index=np.arange(n_reps).tolist(),\n", + " untransform_old=True,\n", ")\n" ] }, @@ -617,7 +618,7 @@ " plot_start=(prox_pas_pos - start) + 3 - 64,\n", " plot_end=(prox_pas_pos - start) + 3 + 64,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_snp_4_reps_gtex_cov_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex'\n", ")\n" ] }, @@ -658,7 +659,8 @@ " 384.,\n", " use_mean=False,\n", " use_ratio=True,\n", - " use_logodds=False\n", + " use_logodds=False,\n", + " untransform_old=True,\n", ")\n" ] }, @@ -712,7 +714,7 @@ " plot_start=(prox_pas_pos - start) + 3 - 64,\n", " plot_end=(prox_pas_pos - start) + 3 + 64,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_snp_4_reps_gtex_cov_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex'\n", ")\n" ] }, @@ -757,7 +759,8 @@ " n_samples=12,\n", " use_mean=False,\n", " use_ratio=True,\n", - " use_logodds=False\n", + " use_logodds=False,\n", + " untransform_old=True,\n", ")\n" ] }, @@ -811,7 +814,7 @@ " plot_start=(prox_pas_pos - start) + 3 - 64,\n", " plot_end=(prox_pas_pos - start) + 3 + 64,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_snp_4_reps_gtex_cov_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex'\n", ")\n" ] }, @@ -932,13 +935,13 @@ "start = center_pos - seq_len // 2\n", "end = center_pos + seq_len // 2\n", "\n", - "blacklist_bed = \"/home/drk/common/data/genomes/hg38/blacklist/blacklist_hg38_all.bed\"\n", + "blacklist_bed = \"hg38/blacklist/blacklist_hg38_all.bed\"\n", "\n", "read_coverage_func_wt, close_coverage_func_wt = get_coverage_reader(cov_files_wt, 16384, 16, blacklist_bed)\n", "read_coverage_func_mut, close_coverage_func_mut = get_coverage_reader(cov_files_mut, 16384, 16, blacklist_bed)\n", "\n", - "gtex_targets_wt = read_coverage_func_wt(chrom, start, end, clip_soft=384., clip=768., scale=0.01)\n", - "gtex_targets_mut = read_coverage_func_mut(chrom, start, end, clip_soft=384., clip=768., scale=0.01)\n", + "gtex_targets_wt = read_coverage_func_wt(chrom, start, end, clip_soft=384., clip=768., scale=0.01, transform_old=True)\n", + "gtex_targets_mut = read_coverage_func_mut(chrom, start, end, clip_soft=384., clip=768., scale=0.01, transform_old=True)\n", "\n", "close_coverage_func_wt()\n", "close_coverage_func_mut()\n", @@ -952,7 +955,7 @@ "\n", "anno_df = apa_df\n", "\n", - "rescale_tracks = True\n", + "untransform_old = True\n", "normalize_counts = True\n", "\n", "#Tracks\n", @@ -990,11 +993,11 @@ " normalize_window=8*plot_window,\n", " bin_size=bin_size,\n", " pad=pad,\n", - " rescale_tracks=rescale_tracks,\n", " normalize_counts=normalize_counts,\n", " save_figs=save_figs,\n", " save_suffix=save_suffix,\n", - " anno_df=anno_df\n", + " anno_df=anno_df,\n", + " untransform_old=untransform_old,\n", ")\n" ] }, diff --git a/examples/borzoi_example_paqtl_chr1_236763042_A_G_fancy.ipynb b/examples/borzoi_example_paqtl_chr1_236763042_A_G_fancy.ipynb new file mode 100644 index 0000000..f3f0272 --- /dev/null +++ b/examples/borzoi_example_paqtl_chr1_236763042_A_G_fancy.ipynb @@ -0,0 +1,1153 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7dbf2734", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-06 18:03:39.445294: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-06 18:03:39.445350: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-06 18:03:39.446381: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-06 18:03:39.453231: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-10-06 18:03:40.984763: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + ] + } + ], + "source": [ + "import json\n", + "import os\n", + "import time\n", + "import warnings\n", + "\n", + "import h5py\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.patches as patches\n", + "import numpy as np\n", + "import pandas as pd\n", + "import pysam\n", + "import pyfaidx\n", + "import pybedtools\n", + "import csv\n", + "import tensorflow as tf\n", + "\n", + "from baskerville import seqnn\n", + "from baskerville import gene as bgene\n", + "from baskerville import dna\n", + "\n", + "from borzoi_helpers import *\n", + "\n", + "tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)\n", + "#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c89f34f0", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "f3c0 model already exists.\n", + "f3c1 model already exists.\n", + "f3c2 model already exists.\n", + "f3c3 model already exists.\n", + "Gene annotation already exists.\n", + "Gene annotation (no read-through, protein-coding) already exists.\n", + "Gene annotation (protein-coding) already exists.\n", + "TSS annotation already exists.\n", + "Splice site annotation already exist.\n", + "Splice site annotation already exist.\n", + "PolyA site annotation already exist.\n", + "Human genome FASTA already exists.\n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "#Download model weights (data fold 3, 4 replicates)\n", + "for rep in f3c0,f0 f3c1,f1 f3c2,f2 f3c3,f3; do IFS=\",\"; set -- $rep; \n", + " mkdir -p \"saved_models/$1/train\"\n", + " local_model=\"saved_models/$1/train/model0_best.h5\"\n", + " if [ -f \"$local_model\" ]; then\n", + " echo \"$1 model already exists.\"\n", + " else\n", + " wget --progress=bar:force \"https://storage.googleapis.com/seqnn-share/borzoi/$2/model0_best.h5\" -O \"$local_model\"\n", + " fi\n", + "done\n", + "\n", + "#Download and uncompress annotation files\n", + "mkdir -p hg38/genes/gencode41\n", + "mkdir -p hg38/genes/polyadb\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_nort.gtf ]; then\n", + " echo \"Gene annotation already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_nort.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_nort_protein.gtf ]; then\n", + " echo \"Gene annotation (no read-through, protein-coding) already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_nort_protein.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_nort_protein.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein.gtf ]; then\n", + " echo \"Gene annotation (protein-coding) already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein.gtf.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_protein.gtf\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_tss2.bed ]; then\n", + " echo \"TSS annotation already exists.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_tss2.bed.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_tss2.bed\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz ]; then\n", + " echo \"Splice site annotation already exist.\"\n", + "else\n", + " wget https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.csv.gz -O hg38/genes/gencode41/gencode41_basic_protein_splice.csv.gz\n", + "fi\n", + "\n", + "if [ -f hg38/genes/gencode41/gencode41_basic_protein_splice.gff ]; then\n", + " echo \"Splice site annotation already exist.\"\n", + "else\n", + " wget -O - https://storage.googleapis.com/seqnn-share/helper/gencode41_basic_protein_splice.gff.gz | gunzip -c > hg38/genes/gencode41/gencode41_basic_protein_splice.gff\n", + "fi\n", + "\n", + "if [ -f hg38/genes/polyadb/polyadb_human_v3.csv.gz ]; then\n", + " echo \"PolyA site annotation already exist.\"\n", + "else\n", + " wget https://storage.googleapis.com/seqnn-share/helper/polyadb_human_v3.csv.gz -O hg38/genes/polyadb/polyadb_human_v3.csv.gz\n", + "fi\n", + "\n", + "#Download and index hg38 genome\n", + "mkdir -p hg38/assembly/ucsc\n", + "\n", + "if [ -f hg38/assembly/ucsc/hg38.fa ]; then\n", + " echo \"Human genome FASTA already exists.\"\n", + "else\n", + " wget -O - http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz | gunzip -c > hg38/assembly/ucsc/hg38.fa\n", + "fi\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "66b85810", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Faidx(\"hg38/assembly/ucsc/hg38.fa\")" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pyfaidx.Faidx('hg38/assembly/ucsc/hg38.fa')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4c0c1b39", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-06 18:05:25.100489: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10520 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1\n" + ] + } + ], + "source": [ + "#Model configuration\n", + "\n", + "params_file = 'params_pred.json'\n", + "targets_file = 'targets_gtex.txt' #Subset of targets_human.txt\n", + "\n", + "seq_len = 524288\n", + "n_reps = 1 #To use only one model replicate, set to 'n_reps = 1'. To use all four replicates, set 'n_reps = 4'.\n", + "rc = True #Average across reverse-complement prediction\n", + "\n", + "#Read model parameters\n", + "\n", + "with open(params_file) as params_open :\n", + " \n", + " params = json.load(params_open)\n", + " \n", + " params_model = params['model']\n", + " params_train = params['train']\n", + "\n", + "#Remove cropping\n", + "params_model['trunk'][-2]['cropping'] = 0\n", + "\n", + "#Read targets\n", + "\n", + "targets_df = pd.read_csv(targets_file, index_col=0, sep='\\t')\n", + "target_index = targets_df.index\n", + "\n", + "#Create local index of strand_pair (relative to sliced targets)\n", + "if rc :\n", + " strand_pair = targets_df.strand_pair\n", + " \n", + " target_slice_dict = {ix : i for i, ix in enumerate(target_index.values.tolist())}\n", + " slice_pair = np.array([\n", + " target_slice_dict[ix] if ix in target_slice_dict else ix for ix in strand_pair.values.tolist()\n", + " ], dtype='int32')\n", + "\n", + "#Initialize model ensemble\n", + "\n", + "models = []\n", + "for rep_ix in range(n_reps) :\n", + " \n", + " model_file = \"saved_models/f3c\" + str(rep_ix) + \"/train/model0_best.h5\"\n", + "\n", + " seqnn_model = seqnn.SeqNN(params_model)\n", + " seqnn_model.restore(model_file, 0)\n", + " seqnn_model.build_slice(target_index)\n", + " if rc :\n", + " seqnn_model.strand_pair.append(slice_pair)\n", + " seqnn_model.build_ensemble(rc, [0])\n", + " \n", + " models.append(seqnn_model)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6f010781", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "len(apa_df_utr) = 114605\n", + "len(apa_df_intron) = 83473\n", + "len(tss_df) = 116649\n" + ] + } + ], + "source": [ + "#Load genome fasta and gene annotations\n", + "\n", + "#Initialize fasta sequence extractor\n", + "fasta_open = pysam.Fastafile('hg38/assembly/ucsc/hg38.fa')\n", + "\n", + "#Load gene/exon annotation\n", + "gtf_file = 'hg38/genes/gencode41/gencode41_basic_nort_protein.gtf'\n", + "\n", + "transcriptome = bgene.Transcriptome(gtf_file)\n", + "\n", + "#Get gene span bedtool\n", + "bedt_span = transcriptome.bedtool_span()\n", + "\n", + "#Load APA atlas\n", + "apa_df = pd.read_csv('hg38/genes/polyadb/polyadb_human_v3.csv.gz', sep='\\t', compression='gzip')\n", + "apa_df = apa_df[['pas_id', 'gene', 'chrom', 'position_hg38', 'strand', 'site_num', 'num_sites', 'site_type', 'pas_type', 'total_count']]\n", + "\n", + "apa_df.loc[apa_df['pas_type'] == 'NoPAS', 'pas_type'] = 'No_CSE'\n", + "\n", + "#Only consider 3' UTR sites\n", + "apa_df_utr = apa_df.query(\"site_type == '3\\\\' most exon'\").copy().reset_index(drop=True)\n", + "\n", + "#Or intronic sites\n", + "apa_df_intron = apa_df.query(\"site_type == 'Intron' and pas_type != 'No_CSE'\").copy().reset_index(drop=True)\n", + "\n", + "print(\"len(apa_df_utr) = \" + str(len(apa_df_utr)))\n", + "print(\"len(apa_df_intron) = \" + str(len(apa_df_intron)))\n", + "\n", + "#Load TSS atlas\n", + "tss_df = pd.read_csv('hg38/genes/gencode41/gencode41_basic_tss2.bed', sep='\\t', names=['chrom', 'position_hg38', 'end', 'tss_id', 'feat1', 'strand'])\n", + "tss_df['gene'] = tss_df['tss_id'].apply(lambda x: x.split(\"/\")[1] if \"/\" in x else x)\n", + "\n", + "print(\"len(tss_df) = \" + str(len(tss_df)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d2658266", + "metadata": {}, + "outputs": [], + "source": [ + "#Get reference/alternate sequence for variant, and annotations for target gene\n", + "\n", + "search_gene = 'ENSG00000077522'\n", + "\n", + "center_pos = 236763042\n", + "\n", + "chrom = 'chr1'\n", + "poses = [236763042]\n", + "alts = ['G']\n", + "\n", + "start = center_pos - seq_len // 2\n", + "end = center_pos + seq_len // 2\n", + "\n", + "load_isoforms = True\n", + "\n", + "#Get exon bin range\n", + "gene_keys = [gene_key for gene_key in transcriptome.genes.keys() if search_gene in gene_key]\n", + "\n", + "gene = transcriptome.genes[gene_keys[0]]\n", + "gene_strand = gene.strand\n", + "\n", + "if chrom is None or start is None or end is None :\n", + " chrom = gene.chrom\n", + " g_start, g_end = gene.span()\n", + " mid = (g_start + g_end) // 2\n", + " start = mid - seq_len // 2\n", + " end = mid + seq_len // 2\n", + "\n", + "#Determine output sequence start\n", + "seq_out_start = start + seqnn_model.model_strides[0]*seqnn_model.target_crops[0]\n", + "seq_out_len = seqnn_model.model_strides[0]*seqnn_model.target_lengths[0]\n", + "\n", + "#Determine output positions of gene exons\n", + "gene_slice = gene.output_slice(seq_out_start, seq_out_len, seqnn_model.model_strides[0], False, old_version=True)\n", + "\n", + "#Get sequence bedtool\n", + "seq_bedt = pybedtools.BedTool('%s %d %d' % (chrom, start, end), from_string=True)\n", + "\n", + "#Get all genes (exons and strands) overlapping input window\n", + "gene_ids = sorted(list(set([overlap[3] for overlap in bedt_span.intersect(seq_bedt, wo=True) if search_gene not in overlap[3]])))\n", + "gene_slices = []\n", + "gene_strands = []\n", + "for gene_id in gene_ids :\n", + " gene_slices.append(transcriptome.genes[gene_id].output_slice(seq_out_start, seq_out_len, seqnn_model.model_strides[0], False, old_version=True))\n", + " gene_strands.append(transcriptome.genes[gene_id].strand)\n", + "\n", + "#Get 3' UTR pA sites for gene\n", + "apa_df_gene_utr = apa_df_utr.query(\"gene == '\" + gene.name + \"'\").copy().reset_index(drop=True)[['chrom', 'gene', 'strand', 'position_hg38']]\n", + "apa_df_gene_intron = apa_df_intron.query(\"gene == '\" + gene.name + \"'\").copy().reset_index(drop=True)[['chrom', 'gene', 'strand', 'position_hg38']]\n", + "\n", + "#Get TSS sites for gene\n", + "tss_df_gene = tss_df.loc[tss_df['gene'].str.contains(search_gene)].copy().reset_index(drop=True)[['chrom', 'gene', 'strand', 'position_hg38']]\n", + "\n", + "def _switch_transcript_id(id_str) :\n", + " return id_str.replace(\"gene_id\", \"gene_id_orig\").replace(\"transcript_id\", \"gene_id\")\n", + "\n", + "#Get gene isoforms\n", + "isoform_slices = None\n", + "if load_isoforms :\n", + " gtf_df = pd.read_csv(gtf_file, sep='\\t', skiprows=5, names=['chrom', 'havana_str', 'feature', 'start', 'end', 'feat1', 'strand', 'feat2', 'id_str'])\n", + " gtf_df = gtf_df.loc[gtf_df['id_str'].str.contains(search_gene)].copy().reset_index(drop=True)\n", + " gtf_df = gtf_df.loc[gtf_df['id_str'].str.contains(\"transcript_id\")].copy().reset_index(drop=True)\n", + " gtf_df = gtf_df.loc[gtf_df['feature'] == 'exon'].copy().reset_index(drop=True)\n", + " \n", + " transcript_ids = gtf_df['id_str'].apply(lambda x: x.split(\"transcript_id \\\"\")[1].split(\"\\\";\")[0]).unique().tolist()\n", + " gtf_df['id_str'] = gtf_df['id_str'].apply(_switch_transcript_id)\n", + " \n", + " gtf_df.to_csv('borzoi_gene_isoforms.gtf', sep='\\t', index=False, header=False, quoting=csv.QUOTE_NONE)\n", + " \n", + " transcriptome_iso = bgene.Transcriptome('borzoi_gene_isoforms.gtf')\n", + " \n", + " isoform_slices = []\n", + " for transcript_id in transcript_ids :\n", + " isoform_slices.append(transcriptome_iso.genes[transcript_id].output_slice(seq_out_start, seq_out_len, seqnn_model.model_strides[0], False, old_version=True))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0639cf22", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-10-06 18:06:54.594911: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", + "2024-10-06 18:06:54.664734: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", + "2024-10-06 18:06:54.961769: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 17 s, sys: 284 ms, total: 17.3 s\n", + "Wall time: 20.1 s\n" + ] + } + ], + "source": [ + "%%time\n", + "#Predict for chr1_236763042_A_G\n", + "# (~6 minutes on CPU w 1 replicate; ~15 seconds on GPU)\n", + "\n", + "save_figs = False\n", + "save_suffix = '_chr1_236763042_A_G'\n", + "\n", + "sequence_one_hot_wt = process_sequence(fasta_open, chrom, start, end)\n", + "\n", + "#Induce mutation(s)\n", + "sequence_one_hot_mut = np.copy(sequence_one_hot_wt)\n", + "\n", + "for pos, alt in zip(poses, alts) :\n", + " alt_ix = -1\n", + " if alt == 'A' :\n", + " alt_ix = 0\n", + " elif alt == 'C' :\n", + " alt_ix = 1\n", + " elif alt == 'G' :\n", + " alt_ix = 2\n", + " elif alt == 'T' :\n", + " alt_ix = 3\n", + "\n", + " sequence_one_hot_mut[pos-start-1] = 0.\n", + " sequence_one_hot_mut[pos-start-1, alt_ix] = 1.\n", + "\n", + "#Make predictions\n", + "y_wt = predict_tracks(models, sequence_one_hot_wt)\n", + "y_mut = predict_tracks(models, sequence_one_hot_mut)\n", + "\n", + "\n", + "#Visualize coverage tracks\n", + "plot_start = seq_len // 2 - 4096 // 2\n", + "plot_end = seq_len // 2 + 4096 // 2\n", + "bin_size = 32\n", + "pad = 0\n", + "\n", + "highlight_covr_poses_rel = [236763033 - start, 236764539 - start]\n", + "covr_orientation = 'before'\n", + "covr_agg = 'mean'\n", + "covr_width = 5\n", + "\n", + "#Tracks\n", + "track_indices = [\n", + " np.nonzero((targets_df['identifier'].str.contains('GTEX-') | targets_df['identifier'].str.contains('K-562')).values)[0].tolist(),\n", + "]\n", + "\n", + "track_names = [\n", + " 'GTEx Pooled',\n", + "]\n", + "\n", + "track_colors = [\n", + " ['deepskyblue', 'red'],\n", + "]\n", + "\n", + "track_labels = [\n", + " ['Ref', 'Alt'],\n", + "]\n", + "\n", + "track_scale = 0.01\n", + "track_transform = 3./4.\n", + "soft_clip = 384.\n", + "\n", + "untransform_old = True\n", + "\n", + "#Plot coverage\n", + "plot_coverage_tracks(\n", + " y_wt,\n", + " track_indices,\n", + " track_names,\n", + " track_colors,\n", + " track_labels,\n", + " track_scale,\n", + " track_transform,\n", + " soft_clip,\n", + " start,\n", + " y_2_in=y_mut,\n", + " plot_pair=True,\n", + " pair_order=[1, 0],\n", + " pair_alpha=0.7,\n", + " log_scale=False,\n", + " same_scale=True,\n", + " plot_start_rel=plot_start,\n", + " plot_end_rel=plot_end,\n", + " highlight_covr_poses_rel=highlight_covr_poses_rel,\n", + " covr_orientation=covr_orientation,\n", + " covr_agg=covr_agg,\n", + " covr_width=covr_width,\n", + " bin_size=bin_size,\n", + " pad=pad,\n", + " save_figs=save_figs,\n", + " save_suffix=save_suffix,\n", + " gene_slice=gene_slice,\n", + " gene_slices=gene_slices,\n", + " isoform_slices=isoform_slices,\n", + " gene_strand=gene_strand,\n", + " chrom=chrom,\n", + " search_gene=search_gene,\n", + " gene_strands=gene_strands,\n", + " apa_df_gene_utr=apa_df_gene_utr,\n", + " apa_df_gene_intron=apa_df_gene_intron,\n", + " tss_df_gene=tss_df_gene,\n", + " annotate_utr_apa=True,\n", + " annotate_intron_apa=False,\n", + " annotate_tss=False,\n", + " plot_strands=True,\n", + " plot_other_genes=False,\n", + " plot_other_gene_strands=False,\n", + " plot_isoforms=False,\n", + " plot_isoform_strands=False,\n", + " gene_color='black',\n", + " isoform_color='dimgray',\n", + " other_gene_color='black',\n", + " max_isoforms=5,\n", + " isoform_height_frac=0.,\n", + " plot_as_bars=False,\n", + " fig_size=(10, 1.5),\n", + " untransform_old=untransform_old,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5e8fda51", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Re-plot coverage tracks and annotate isoforms\n", + "\n", + "plot_coverage_tracks(\n", + " y_wt,\n", + " track_indices,\n", + " track_names,\n", + " track_colors,\n", + " track_labels,\n", + " track_scale,\n", + " track_transform,\n", + " soft_clip,\n", + " start,\n", + " y_2_in=y_mut,\n", + " plot_pair=True,\n", + " pair_order=[1, 0],\n", + " pair_alpha=0.7,\n", + " log_scale=False,\n", + " same_scale=True,\n", + " plot_start_rel=plot_start,\n", + " plot_end_rel=plot_end,\n", + " highlight_covr_poses_rel=highlight_covr_poses_rel,\n", + " covr_orientation=covr_orientation,\n", + " covr_agg=covr_agg,\n", + " covr_width=covr_width,\n", + " bin_size=bin_size,\n", + " pad=pad,\n", + " save_figs=save_figs,\n", + " save_suffix=save_suffix,\n", + " gene_slice=gene_slice,\n", + " gene_slices=gene_slices,\n", + " isoform_slices=isoform_slices,\n", + " gene_strand=gene_strand,\n", + " chrom=chrom,\n", + " search_gene=search_gene,\n", + " gene_strands=gene_strands,\n", + " apa_df_gene_utr=apa_df_gene_utr,\n", + " apa_df_gene_intron=apa_df_gene_intron,\n", + " tss_df_gene=tss_df_gene,\n", + " annotate_utr_apa=True,\n", + " annotate_intron_apa=False,\n", + " annotate_tss=False,\n", + " plot_strands=True,\n", + " plot_other_genes=False,\n", + " plot_other_gene_strands=False,\n", + " plot_isoforms=True,\n", + " plot_isoform_strands=True,\n", + " gene_color='black',\n", + " isoform_color='dimgray',\n", + " other_gene_color='black',\n", + " max_isoforms=5,\n", + " isoform_height_frac=0.5,\n", + " plot_as_bars=False,\n", + " fig_size=(10, 2),\n", + " untransform_old=untransform_old,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f3bb0e25", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mut_bin = 8192\n", + "\n", + "pas_ix = 0: bin = 8165 (8163 - 8168)\n", + "pas_ix = 1: bin = 8175 (8173 - 8178)\n", + "pas_ix = 2: bin = 8180 (8178 - 8183)\n", + "pas_ix = 3: bin = 8204 (8202 - 8207)\n", + "pas_ix = 4: bin = 8222 (8220 - 8225)\n" + ] + } + ], + "source": [ + "#Print polyA site bin positions and other info\n", + "\n", + "plot_start = center_pos - 4096 // 2\n", + "plot_end = center_pos + 4096 // 2\n", + "\n", + "plot_start_bin = (plot_start - start) // bin_size - pad\n", + "plot_end_bin = (plot_end - start) // bin_size - pad\n", + "\n", + "mut_bin = (poses[0] - start) // bin_size - pad\n", + "\n", + "#Get polyA site positions\n", + "pas_poses = apa_df.query(\"chrom == '\" + chrom + \"' and position_hg38 >= \" + str(plot_start) + \" and position_hg38 < \" + str(plot_end))['position_hg38'].values.tolist()\n", + "\n", + "print(\"mut_bin = \" + str(mut_bin))\n", + "print(\"\")\n", + "\n", + "#Print polyA site positions\n", + "for pas_ix, pas_pos in enumerate(pas_poses) :\n", + " \n", + " pas_bin = int((pas_pos - start) // 32) - 16\n", + " \n", + " bin_end = pas_bin + 3\n", + " bin_start = bin_end - 5\n", + " \n", + " print(\"pas_ix = \" + str(pas_ix) + \": bin = \" + str(pas_bin) + \" (\" + str(bin_start) + \" - \" + str(bin_end) + \")\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b9b9f717", + "metadata": {}, + "outputs": [], + "source": [ + "#Choose polyA sites to use for attributions\n", + "\n", + "prox_pas_pos = pas_poses[1]\n", + "\n", + "prox_pas_bin = 8175\n", + "dist_pas_bin = 8222\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a6cb111f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1/1 [==============================] - 414s 414s/step\n", + "1/1 [==============================] - 221s 221s/step\n", + "1/1 [==============================] - 411s 411s/step\n", + "1/1 [==============================] - 220s 220s/step\n", + "CPU times: user 18min 49s, sys: 2min 44s, total: 21min 33s\n", + "Wall time: 22min 16s\n" + ] + } + ], + "source": [ + "%%time\n", + "#Get contribution scores (gradient) for pooled GTEX tracks\n", + "# (~20 minutes on CPU; otherwise runnable only on 40GB GPU cards, e.g. A100)\n", + "\n", + "_, _, [pred_grad_wt, pred_grad_mut] = get_prediction_gradient_w_rc(\n", + " models,\n", + " [sequence_one_hot_wt, sequence_one_hot_mut],\n", + " dist_pas_bin-5,\n", + " dist_pas_bin,\n", + " prox_pas_bin-5,\n", + " prox_pas_bin,\n", + " target_index[np.arange(0, 89).tolist()].tolist(),\n", + " 0.01,\n", + " 3./4.,\n", + " clip_soft=384.,\n", + " use_mean=False,\n", + " use_ratio=True,\n", + " use_logodds=False,\n", + " subtract_avg=True,\n", + " fold_index=np.arange(n_reps).tolist(),\n", + " untransform_old=True,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d6e2fafe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- WT ---\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAABZCAYAAACjWLKDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAMCklEQVR4nO3dbWxU153H8e/Yxs9jAwZqHAyIBLAJhGipNymCQLpJxG77ZqO2UlUFdVu1itJsWqGs2kZ502hFVlUrRRGREm200qpVVOVNq81Wu9mKVkma2KG1OwbsjIFCNybGxmMMHj+M7fHcfTGxA8TYY66xYfl+JMujo+Mz/7ljzczvnnPuRIIgCJAkSZKkEPIWuwBJkiRJtz6DhSRJkqTQDBaSJEmSQjNYSJIkSQrNYCFJkiQpNIOFJEmSpNAMFpIkSZJCK8ilUyaToauri2g0SiQSudE1SZIkSboJBEFAMpmkpqaGvLyZ5yRyChZdXV3U1tbOS3GSJEmSbi2dnZ2sWbNmxj45BYtoNDo1YEVFRfjKJEmSJN30BgYGqK2tncoDM8kpWEwuf6qoqDBYSJIkSbeZXLZDuHlbkiRJUmgGC0mSJEmhGSwkSZIkhWawkCRJkhSawUKSJElSaAYLSZIkSaEZLCRJkiSFZrCQJEmSFJrBQpIkSVJoBgtJkiRJoRksJEmSJIVmsJAkSZIUmsFCkiRJUmgGC0mSJEmhGSwkSZIkhWawkCRJkhSawUKSJElSaAYLSZIkSaEZLCRJkiSFZrCQJEmSFJrBQpIkSVJoBgtJkiRJoRksJEmSJIVmsJAkSZIUmsFCkiRJUmgGC0mSJEmhGSwkSZIkhWawkCRJkhSawUKSJElSaAYLSZIkSaEZLCRJkiSFZrCQJEmSFJrBQpIkSVJoBgtJkiRJoRksJEmSJIVmsJAkSZIUmsFCkiRJUmgFi12ApFlEItO3B8HC1iFJkjQDZywkSZIkhWawkCRJkhSawUKSJElSaO6xkG52k3spJvdauLdCkiTdhJyxkBZBEARTP2EkhhNUPF9B/Uv181SZJEnS9TFYSIvgz/1/Ju+5POpeqgs1TjwRJzmWJJ6IMzQ2NE/VSZIkzZ3BQloEbefbADh14RSpdGqqPTma5JnDz/BC0ws5jRNPxKdun+g7MWv/sYkxHv/Px/n+b74/t4IlSZJmYbCQFkFbbzZYZILMFYGgtaeV53//PD88/EPSmfSs48QTcaKF0anbs/mg9wNeaX6FH7/3YwZGB66zekmSpE8zWEiLoL23nfVL1wOfzF4AxLpjAKTSKU72nZx1nHgiTsMdDVQWVdLR1zHVnhxN8sXXvshjv3zsiv6T4wMc6zl2/Q9AkiTpKgYLaRG09baxa+0ulhUvo723fao91h1jdfnqqduz6ejrYOPyjWys2njFjMWRj47w65O/5udHf073YPdUe2tPK6vLV5Mfyc9pfEmSpFwZLKQFNpGZIJ6IU1dVR92KuqllUZANE3vX72VV2Spae1pnHGc0Pcrp/tPZYLH8ymDReLaRwvzC7O3OxivG31Gzg01VmwwWkiRpXhkspAV25uIZUukUdSuuDBbjE+McP3+cbau2sW3Vtlk/+J+6cIpMkGFjVTZYnOg7QSbIANlgsXvtbqrLq2k8mw0WQRAQ646xdeVWtq7aOmtwkSRJmguDhbTAJvdUbF6xmc1Vmzl14RSj6VE6+joYnRhl22dyCxaTMxTrl65nw7INjKRH6LzUSRAENJ1toqGmgYaahqlg0TnQSX+qn62rssHi2PljOW0QlyRJyoXfvC0tsMk9Fc8cfoZzg+fIBBk6+jo42nMUgGhhlFVlq+gZ6qF7sJvq8uppx5kMFttf3n5FWyqd4sLIBRruaKB0SSkHf3+Q8YlxWruzMxSlS0qpLKoklU5xou8EW1ZuuZEPV5Ik3SYMFtICm1z69MaJN6ba2nvbp2Yo9v773qn2WHeMfXftm3acy68CNSmeiHNu8BwA73W+R89QD6l0itae1qnxH3390an+rd2tBgvl7PRp+NWvoKsLSkvhK1+BrVsXuypJ0s3CYCEtsLbeNqpKqthUtQmA9z96n7bzbdMufZopWMQTcdZVruOROx8B4LVjr9HR18FEZgKAnzb+dKpvY2cjsZ7px//qtq+GfET/z6V64dQrMPQXKFwGQQArd0Lto7P+6S0rEvlU0ztvBzz0EBw4AN/6FgwPQyazCLVdbpo6CYKFr0OSBBgspAU1eUWo7zR8h5888hMA6l+qp603Gyy+ce83ePFvXwSyS5yutc8iCALiiTj7t+/n0N8dAuDY+WPEE3ESwwkaahp4ouEJAJ797bM0nm0k1h3jS1u+xM/+/mcA7Pq3XdOGjdvG5IfSqz+IXt3+zqMw1g/7miHRBJfaYJ73psS6Y3zzP75JVUkVu9fu5tkHniUy3YfmWaTSKV5teZWR8REeWPcA96257/oKmnzskcjU7bf+GcbG4LvfherpV+dd22XjzGv/aeoEOHkS4nHIy8v+rFsHWy6fmJtrPZKknMwpWMRiMcrLy29ULXOTHoGxPsgrgbwCCDIQiRAUVPBBX5zmrmZWR1fTM9TDrtpdrCpax9mzUFYGJSXZIQoLYWAAXn8d6upgU/YEMiUlV71xXv7mdZsaHBtkfGIcgMriSkaG8zh4EJYuhYcfhqKi7NnLu++eeZyR9Ahj6THyInmUF5YzNhbhRz/KHvMvfAGKi2F8HLZvn3mcm9KOHdnfzc3X7NJ5qZPUhylK1pfQ0tICQHWymiNnjtCX7GNl9Uo6jmeXONUO19L0hyZaNrRcOUhLC71DvST/kqSk5pNxlvcvp+lsExdGLvD4Zx/nnsw9AGxJb+F37/2O7sFu9pXuo/1odo9HzWAN77a/S8uWq8YP61rHIYfjc0PMdr8t13j8k+3JnXD6X2Hkn6CwCv73NYgsgZ13zamMiyMX+cXxXxDriVFdXs3w+DBfv/frpNIpnv6fp9lZu5O78u/ihV++QPvRdg587gBdyS66kl1Ei6KMpkepX1lPUX7RJ+2FUUYnRqlbUUf3YDc/OPwDypeUc9+a+zj4+kH2b9/PY9sfI++zDZ8uKNfn4ePj0NCQXfb04IPw+c9DKgUbN8LfPDRBrDtGa3cr1dFq+kf62bN+D2sq1kx/PHN1nf2bm+HJJ+GJJ2Dfx5N9Z85k652ufyIBb7+dDR/V1dmX+ZISWLZsbnefkx075vb/P9f+knQDDA4O5tw3EgSzn7YZGBigsrIyVFGSJEmSbk2XLl2ioqJixj5zmrF46623bp4ZC+VkfGKcVDrFkvwlFBcUL3Y5i2PyjPXlrnUWMAig9x0YTUBxDeTlZ9sq67OnMofPQWYMIh9fqblsPRSU3LDSb7ggA80HIJ2EDf8AI12Q6oGl90L78xDdCPf+C/T9AYY/hJI7oPrB+bnvU6/CQAd8Zi+kh2BiBCq3kgh2cPRodvaqtDQ7E7Z5M5Q9MM3zCAR//CMvvv8iTR81UVVSRXlhOc/tfY7CgsL5qVOLLjGc4NCRQywtWkpBfgHf/qtvL/rzmxxNMjQ2RFlhGdGi6KLVMT4Ohw9DMglr1kB+PpSXX7X0K0fNXc0cePMA37v/e7zc/DL779nP1+752vwXfZXeoV6e+u+nqF9Rz7ud7/L0557m4TsfnttrdyYN3b+BsQEoq4VIAQQTTFRs59DLpVy4ADt3wpIl2eVxe/fCm29CZ2d2tULhx/9O998P/f3ZWa78fCgoyL4FTF6kIAiyxzyTyf5N3uUX7b9qdvRS6hJP/ddT3Ln8To6fP87utbv5x79+Ej56A0Z7s6+vkSVAABX1UDR/02SZIMPF1EUiRFhWciOm33Q7GRwcZM+ePTn1ndOMRS5JRdItaGIs+0Y3kYKCciheCelB6P4tDHdm+0TyoWYflG9Y3Fqv4cNLH5IJMqyrXHdd+xMkZa8m19jZyNrKtXz57i8v2P2m0il6BnsoKyxjRemKBbvfG21wbJA/nfsTxQXFNNwxzZJE6RYwlxxgsJAkSZI0rbnkAL95W5IkSVJoBgtJkiRJoRksJEmSJIVmsJAkSZIUmsFCkiRJUmgGC0mSJEmh5fQFeZNXpB0YGLihxUiSJEm6eUx+/s/hGypyCxbJZBKA2traEGVJkiRJuhUlk0kqKytn7JPTF+RlMhm6urqIRqN+o60kSZJ0mwiCgGQySU1NDXl5M++iyClYSJIkSdJM3LwtSZIkKTSDhSRJkqTQDBaSJEmSQjNYSJIkSQrNYCFJkiQpNIOFJEmSpNAMFpIkSZJC+z/+jRR7kl0JDQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- Mut ---\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize gradient contribution scores (128 bp window centered on variant)\n", + "\n", + "visualize_input_gradient_pair(\n", + " pred_grad_wt,\n", + " pred_grad_mut,\n", + " plot_start=(prox_pas_pos - start) + 3 - 64,\n", + " plot_end=(prox_pas_pos - start) + 3 + 64,\n", + " save_figs=False,\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex'\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d1128450", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "example_ix = 0\n", + "example_ix = 1\n", + "CPU times: user 16min 8s, sys: 1min 21s, total: 17min 30s\n", + "Wall time: 33min 52s\n" + ] + } + ], + "source": [ + "%%time\n", + "#Get contribution scores (ISM) for pooled GTEX tracks\n", + "# (not feasible to run on CPU; ~30 minutes on GPU)\n", + "\n", + "[pred_ism_wt, pred_ism_mut] = get_ism(\n", + " models,\n", + " [sequence_one_hot_wt, sequence_one_hot_mut],\n", + " (prox_pas_pos - start) + 3 - 64,\n", + " (prox_pas_pos - start) + 3 + 64,\n", + " dist_pas_bin-5,\n", + " dist_pas_bin,\n", + " prox_pas_bin-5,\n", + " prox_pas_bin,\n", + " np.arange(0, 89).tolist(),\n", + " 0.01,\n", + " 3./4.,\n", + " 384.,\n", + " use_mean=False,\n", + " use_ratio=True,\n", + " use_logodds=False,\n", + " untransform_old=True,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "6c3bb68d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- WT ---\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- Mut ---\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize ISM contribution scores (128 bp window centered on variant)\n", + "\n", + "visualize_input_gradient_pair(\n", + " pred_ism_wt,\n", + " pred_ism_mut,\n", + " plot_start=(prox_pas_pos - start) + 3 - 64,\n", + " plot_end=(prox_pas_pos - start) + 3 + 64,\n", + " save_figs=False,\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex'\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "1bd0ce10", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "example_ix = 0\n", + "example_ix = 1\n", + "CPU times: user 1h 4min 15s, sys: 5min 30s, total: 1h 9min 46s\n", + "Wall time: 2h 15min 18s\n" + ] + } + ], + "source": [ + "%%time\n", + "#Get contribution scores (ISM shuffle) for pooled GTEX tracks\n", + "# (not feasible to run on CPU; ~135 minutes on GPU)\n", + "\n", + "[pred_ism_wt, pred_ism_mut] = get_ism_shuffle(\n", + " models,\n", + " [sequence_one_hot_wt, sequence_one_hot_mut],\n", + " (prox_pas_pos - start) + 3 - 64,\n", + " (prox_pas_pos - start) + 3 + 64,\n", + " dist_pas_bin-5,\n", + " dist_pas_bin,\n", + " prox_pas_bin-5,\n", + " prox_pas_bin,\n", + " np.arange(0, 89).tolist(),\n", + " 0.01,\n", + " 3./4.,\n", + " 384.,\n", + " window_size=5,\n", + " n_samples=12,\n", + " use_mean=False,\n", + " use_ratio=True,\n", + " use_logodds=False,\n", + " untransform_old=True,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d3d9dde9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- WT ---\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--- Mut ---\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize ISM Shuffle contribution scores (128 bp window centered on variant)\n", + "\n", + "visualize_input_gradient_pair(\n", + " pred_ism_wt,\n", + " pred_ism_mut,\n", + " plot_start=(prox_pas_pos - start) + 3 - 64,\n", + " plot_end=(prox_pas_pos - start) + 3 + 64,\n", + " save_figs=False,\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex'\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "63e636fe", + "metadata": {}, + "outputs": [], + "source": [ + "#Load samples with reference- and alternate alleles respectively\n", + "\n", + "#These files are protected by dbGaP - email to request access\n", + "\n", + "cov_files_wt = pd.read_csv(\"gtex_ref_chr1_236763042_A_G.txt\", sep='\\t', names=['file'])['file'].values.tolist()\n", + "cov_files_mut = pd.read_csv(\"gtex_alt_chr1_236763042_A_G.txt\", sep='\\t', names=['file'])['file'].values.tolist()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ec6fc12b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gtex_targets_wt.shape = (16384, 6)\n", + "gtex_targets_mut.shape = (16384, 6)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#Visualize measured coverage tracks for chr10_116952944_T_C\n", + "\n", + "save_figs = False\n", + "save_suffix = '_chr1_236763042_A_G_meas_2_subjects'\n", + "\n", + "center_pos = 236763042\n", + "\n", + "chrom = 'chr1'\n", + "poses = [236763042]\n", + "alts = ['G']\n", + "\n", + "start = center_pos - seq_len // 2\n", + "end = center_pos + seq_len // 2\n", + "\n", + "blacklist_bed = \"hg38/blacklist/blacklist_hg38_all.bed\"\n", + "\n", + "read_coverage_func_wt, close_coverage_func_wt = get_coverage_reader(cov_files_wt, 16384, 0, blacklist_bed)\n", + "read_coverage_func_mut, close_coverage_func_mut = get_coverage_reader(cov_files_mut, 16384, 0, blacklist_bed)\n", + "\n", + "gtex_targets_wt = read_coverage_func_wt(chrom, start, end, clip_soft=384., clip=768., scale=0.01, transform_old=True)\n", + "gtex_targets_mut = read_coverage_func_mut(chrom, start, end, clip_soft=384., clip=768., scale=0.01, transform_old=True)\n", + "\n", + "close_coverage_func_wt()\n", + "close_coverage_func_mut()\n", + "\n", + "print(\"gtex_targets_wt.shape = \" + str(gtex_targets_wt.shape))\n", + "print(\"gtex_targets_mut.shape = \" + str(gtex_targets_mut.shape))\n", + "\n", + "#Visualize coverage tracks\n", + "plot_start = seq_len // 2 - 4096 // 2\n", + "plot_end = seq_len // 2 + 4096 // 2\n", + "bin_size = 32\n", + "pad = 0\n", + "\n", + "highlight_covr_poses_rel = [236763033 - start, 236764539 - start]\n", + "covr_orientation = 'before'\n", + "covr_agg = 'mean'\n", + "covr_width = 5\n", + "\n", + "#Tracks\n", + "track_indices = [\n", + " np.arange(gtex_targets_wt.shape[1], dtype='int32').tolist(),\n", + "]\n", + "\n", + "track_names = [\n", + " 'GTEx Blood',\n", + "]\n", + "\n", + "track_colors = [\n", + " ['deepskyblue', 'red'],\n", + "]\n", + "\n", + "track_labels = [\n", + " ['Ref', 'Alt'],\n", + "]\n", + "\n", + "track_scale = 0.01\n", + "track_transform = 3./4.\n", + "soft_clip = 384.\n", + "\n", + "untransform_old = True\n", + "\n", + "#Plot coverage\n", + "plot_coverage_tracks(\n", + " gtex_targets_wt[None, None, ...],\n", + " track_indices,\n", + " track_names,\n", + " track_colors,\n", + " track_labels,\n", + " track_scale,\n", + " track_transform,\n", + " soft_clip,\n", + " start,\n", + " y_2_in=gtex_targets_mut[None, None, ...],\n", + " plot_pair=True,\n", + " pair_order=[1, 0],\n", + " pair_alpha=0.7,\n", + " log_scale=False,\n", + " same_scale=True,\n", + " plot_start_rel=plot_start,\n", + " plot_end_rel=plot_end,\n", + " normalize_counts=True,\n", + " normalize_start_rel=524288 // 2 - 2048 * 8,\n", + " normalize_end_rel=524288 // 2 + 2048 * 8,\n", + " highlight_covr_poses_rel=highlight_covr_poses_rel,\n", + " covr_orientation=covr_orientation,\n", + " covr_agg=covr_agg,\n", + " covr_width=covr_width,\n", + " bin_size=bin_size,\n", + " pad=pad,\n", + " save_figs=save_figs,\n", + " save_suffix=save_suffix,\n", + " gene_slice=gene_slice,\n", + " gene_slices=gene_slices,\n", + " isoform_slices=isoform_slices,\n", + " gene_strand=gene_strand,\n", + " chrom=chrom,\n", + " search_gene=search_gene,\n", + " gene_strands=gene_strands,\n", + " apa_df_gene_utr=apa_df_gene_utr,\n", + " apa_df_gene_intron=apa_df_gene_intron,\n", + " tss_df_gene=tss_df_gene,\n", + " annotate_utr_apa=True,\n", + " annotate_intron_apa=False,\n", + " annotate_tss=False,\n", + " plot_strands=True,\n", + " plot_other_genes=False,\n", + " plot_other_gene_strands=False,\n", + " plot_isoforms=False,\n", + " plot_isoform_strands=False,\n", + " gene_color='black',\n", + " isoform_color='dimgray',\n", + " other_gene_color='black',\n", + " max_isoforms=5,\n", + " isoform_height_frac=0.,\n", + " plot_as_bars=False,\n", + " fig_size=(10, 1.5),\n", + " untransform_old=untransform_old,\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5d2a60f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/borzoi_example_sqtl_chr9_135548708_G_C.ipynb b/examples/borzoi_example_sqtl_chr9_135548708_G_C.ipynb index c6ef04b..83b916c 100644 --- a/examples/borzoi_example_sqtl_chr9_135548708_G_C.ipynb +++ b/examples/borzoi_example_sqtl_chr9_135548708_G_C.ipynb @@ -10,12 +10,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-26 18:01:12.492280: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-09-26 18:01:12.493991: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-09-26 18:01:12.495545: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-09-26 18:01:12.508859: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-10-06 10:06:41.610527: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", + "2024-10-06 10:06:41.610584: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", + "2024-10-06 10:06:41.611618: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "2024-10-06 10:06:41.618418: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-09-26 18:01:14.676533: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-10-06 10:06:43.358245: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -170,7 +170,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-25 10:53:57.292179: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10232 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1\n" + "2024-10-06 10:07:00.831023: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10520 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1\n" ] } ], @@ -284,9 +284,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-09-25 10:54:21.155675: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", - "2024-09-25 10:54:21.243945: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", - "2024-09-25 10:54:21.567826: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" + "2024-10-06 10:08:03.084851: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907\n", + "2024-10-06 10:08:03.154520: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n", + "2024-10-06 10:08:03.472584: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory\n" ] }, { @@ -332,8 +332,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 19.5 s, sys: 437 ms, total: 20 s\n", - "Wall time: 22.8 s\n" + "CPU times: user 17.6 s, sys: 593 ms, total: 18.2 s\n", + "Wall time: 21.1 s\n" ] } ], @@ -383,7 +383,7 @@ "bin_size = 32\n", "pad = 16\n", "\n", - "rescale_tracks = True\n", + "untransform_old = True\n", "normalize_counts = False\n", "\n", "anno_df = splice_df\n", @@ -420,11 +420,11 @@ " normalize_window=8*plot_window,\n", " bin_size=bin_size,\n", " pad=pad,\n", - " rescale_tracks=rescale_tracks,\n", " normalize_counts=normalize_counts,\n", " save_figs=save_figs,\n", " save_suffix=save_suffix,\n", - " anno_df=anno_df\n", + " anno_df=anno_df,\n", + " untransform_old=untransform_old,\n", ")\n" ] }, @@ -542,6 +542,7 @@ " use_logodds=False,\n", " subtract_avg=True,\n", " fold_index=np.arange(n_reps).tolist(),\n", + " untransform_old=True,\n", ")\n" ] }, @@ -595,7 +596,7 @@ " plot_start=(poses[0] - start) - 64,\n", " plot_end=(poses[0] - start) + 64,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_snp_4_reps_gtex_cov_testis_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_grad_gtex_testis'\n", ")\n" ] }, @@ -636,7 +637,8 @@ " 384.,\n", " use_mean=False,\n", " use_ratio=True,\n", - " use_logodds=False\n", + " use_logodds=False,\n", + " untransform_old=True,\n", ")\n" ] }, @@ -690,7 +692,7 @@ " plot_start=(poses[0] - start) - 64,\n", " plot_end=(poses[0] - start) + 64,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_snp_4_reps_gtex_cov_testis_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_gtex_testis'\n", ")\n" ] }, @@ -733,7 +735,8 @@ " n_samples=12,\n", " use_mean=False,\n", " use_ratio=True,\n", - " use_logodds=False\n", + " use_logodds=False,\n", + " untransform_old=True,\n", ")\n" ] }, @@ -787,7 +790,7 @@ " plot_start=(poses[0] - start) - 64,\n", " plot_end=(poses[0] - start) + 64,\n", " save_figs=False,\n", - " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_snp_4_reps_gtex_cov_testis_undo_clip'\n", + " fig_name=chrom + '_' + str(poses[0]) + '_prediction_ism_shuffle_gtex_testis'\n", ")\n" ] }, @@ -850,13 +853,13 @@ "start = center_pos - seq_len // 2\n", "end = center_pos + seq_len // 2\n", "\n", - "blacklist_bed = \"/home/drk/common/data/genomes/hg38/blacklist/blacklist_hg38_all.bed\"\n", + "blacklist_bed = \"hg38/blacklist/blacklist_hg38_all.bed\"\n", "\n", "read_coverage_func_wt, close_coverage_func_wt = get_coverage_reader(cov_files_wt, 16384, 16, blacklist_bed)\n", "read_coverage_func_mut, close_coverage_func_mut = get_coverage_reader(cov_files_mut, 16384, 16, blacklist_bed)\n", "\n", - "gtex_targets_wt = read_coverage_func_wt(chrom, start, end, clip_soft=384., clip=768., scale=0.01)\n", - "gtex_targets_mut = read_coverage_func_mut(chrom, start, end, clip_soft=384., clip=768., scale=0.01)\n", + "gtex_targets_wt = read_coverage_func_wt(chrom, start, end, clip_soft=384., clip=768., scale=0.01, transform_old=True)\n", + "gtex_targets_mut = read_coverage_func_mut(chrom, start, end, clip_soft=384., clip=768., scale=0.01, transform_old=True)\n", "\n", "close_coverage_func_wt()\n", "close_coverage_func_mut()\n", @@ -870,7 +873,7 @@ "\n", "anno_df = splice_df\n", "\n", - "rescale_tracks = True\n", + "untransform_old = True\n", "normalize_counts = True\n", "\n", "#Tracks\n", @@ -911,11 +914,11 @@ " normalize_window=8*plot_window,\n", " bin_size=bin_size,\n", " pad=pad,\n", - " rescale_tracks=rescale_tracks,\n", " normalize_counts=normalize_counts,\n", " save_figs=save_figs,\n", " save_suffix=save_suffix,\n", - " anno_df=anno_df\n", + " anno_df=anno_df,\n", + " untransform_old=untransform_old,\n", ")\n" ] }, diff --git a/examples/borzoi_helpers.py b/examples/borzoi_helpers.py index bbd3d67..25235bd 100644 --- a/examples/borzoi_helpers.py +++ b/examples/borzoi_helpers.py @@ -31,7 +31,7 @@ import gc -# Helper functions (prediction, attribution, visualization) +# Helper functions for prediction, attribution, and visualization # Make one-hot coded sequence def make_seq_1hot(genome_open, chrm, start, end, seq_len): @@ -48,18 +48,22 @@ def make_seq_1hot(genome_open, chrm, start, end, seq_len): return seq_1hot -# Predict tracks +# Predict coverage tracks def predict_tracks(models, sequence_one_hot): predicted_tracks = [] - for fold_ix in range(len(models)): + + #Loop over model replicates + for rep_ix in range(len(models)): - yh = models[fold_ix](sequence_one_hot[None, ...])[:, None, ...].astype( + #Predict coverage and store as float16 + yh = models[rep_ix](sequence_one_hot[None, ...])[:, None, ...].astype( "float16" ) predicted_tracks.append(yh) + #Concatenate across replicates predicted_tracks = np.concatenate(predicted_tracks, axis=1) return predicted_tracks @@ -80,12 +84,14 @@ def process_sequence(fasta_open, chrom, start, end, seq_len=524288): return sequence_one_hot.astype("float32") +#Function to plot a DNA letter at a specified coordinate in a subplot axis def dna_letter_at(letter, x, y, yscale=1, ax=None, color=None, alpha=1.0): fp = FontProperties(family="DejaVu Sans", weight="bold") globscale = 1.35 + #Letter graphics parameters LETTERS = { "T": TextPath((-0.305, 0), "T", size=1, prop=fp), "G": TextPath((-0.384, 0), "G", size=1, prop=fp), @@ -98,6 +104,7 @@ def dna_letter_at(letter, x, y, yscale=1, ax=None, color=None, alpha=1.0): ")": TextPath((-0.1, 0), ")", size=1, prop=fp), } + #Letter colors COLOR_SCHEME = { "G": "orange", "A": "green", @@ -112,22 +119,29 @@ def dna_letter_at(letter, x, y, yscale=1, ax=None, color=None, alpha=1.0): text = LETTERS[letter] + #Optionally override default color chosen_color = COLOR_SCHEME[letter] if color is not None: chosen_color = color + #Calculate transformed coordinates t = ( mpl.transforms.Affine2D().scale(1 * globscale, yscale * globscale) + mpl.transforms.Affine2D().translate(x, y) + ax.transData ) + + #Draw patch p = PathPatch(text, lw=0, fc=chosen_color, alpha=alpha, transform=t) + #Add patch into axis subplot if ax != None: ax.add_artist(p) + return p +#Tensorflow helper function to compute gradient of a given statistic predicted by the model def _prediction_input_grad( input_sequence, model, @@ -145,13 +159,14 @@ def _prediction_input_grad( subtract_avg, prox_bin_index, dist_bin_index, + untransform_old, ): mean_dist_prox_ratio = None with tf.GradientTape() as tape: tape.watch(input_sequence) - # predict + #Predict coverage for chosen tracks preds = tf.gather( model(input_sequence, training=False), tf.tile( @@ -162,28 +177,48 @@ def _prediction_input_grad( batch_dims=1, ) - # undo scale - preds = preds / track_scale + #Undo transformations + if untransform_old : + + #Undo scale + preds = preds / track_scale - # undo soft_clip - if clip_soft is not None: - preds = tf.where( - preds > clip_soft, (preds - clip_soft) ** 2 + clip_soft, preds - ) + #Undo clip-soft + if clip_soft is not None: + preds = tf.where( + preds > clip_soft, (preds - clip_soft) ** 2 + clip_soft, preds + ) + + #Undo sqrt + preds = preds ** (1. / track_transform) + else : + + #Undo clip-soft + if clip_soft is not None : + preds = tf.where( + preds > clip_soft, (preds - clip_soft + 1)**2 + clip_soft - 1, preds + ) + + #Undo sqrt + preds = (preds + 1)**(1. / track_transform) - 1 - # undo sqrt - preds = preds ** (1.0 / track_transform) + #Undo scale + preds = preds / track_scale - # aggregate over tracks (average) + #Aggregate over tracks (average) pred = tf.reduce_mean(preds, axis=2) + #Aggregate coverage across positions if not use_mean: + #Sum over a range or an array of bins (distal) if dist_bin_index is None: mean_dist = tf.reduce_sum(pred[:, dist_bin_start:dist_bin_end], axis=1) else: mean_dist = tf.reduce_sum( tf.gather(pred, dist_bin_index, axis=1), axis=1 ) + + #Sum over a range or an array of bins (proximal) if prox_bin_index is None: mean_prox = tf.reduce_sum(pred[:, prox_bin_start:prox_bin_end], axis=1) else: @@ -191,28 +226,35 @@ def _prediction_input_grad( tf.gather(pred, prox_bin_index, axis=1), axis=1 ) else: + #Average over a range or an array of bins (distal) if dist_bin_index is None: mean_dist = tf.reduce_mean(pred[:, dist_bin_start:dist_bin_end], axis=1) else: mean_dist = tf.reduce_mean( tf.gather(pred, dist_bin_index, axis=1), axis=1 ) + + #Average over a range or an array of bins (proximal) if prox_bin_index is None: mean_prox = tf.reduce_mean(pred[:, prox_bin_start:prox_bin_end], axis=1) else: mean_prox = tf.reduce_mean( tf.gather(pred, prox_bin_index, axis=1), axis=1 ) + + #Apply a log transform (or a log ratio transform) if not use_ratio: mean_dist_prox_ratio = tf.math.log(mean_dist + 1e-6) else: + #Apply a log ratio or log odds ratio transform if not use_logodds: mean_dist_prox_ratio = tf.math.log(mean_dist / mean_prox + 1e-6) else: mean_dist_prox_ratio = tf.math.log( - (mean_dist / mean_prox) / (1.0 - (mean_dist / mean_prox)) + 1e-6 + (mean_dist / mean_prox) / (1. - (mean_dist / mean_prox)) + 1e-6 ) + #Get the gradient and mean-subtract the result input_grad = tape.gradient(mean_dist_prox_ratio, input_sequence) if subtract_avg: input_grad = input_grad - tf.reduce_mean(input_grad, axis=-1, keepdims=True) @@ -222,6 +264,7 @@ def _prediction_input_grad( return input_grad +#Function to compute the average input gradient for the sequence and its reverse-complement def get_prediction_gradient_w_rc( models, sequence_one_hots, @@ -240,9 +283,10 @@ def get_prediction_gradient_w_rc( use_logodds=False, subtract_avg=False, fold_index=[0, 1, 2, 3], + untransform_old=False, ): - # Get gradients for fwd + #Get gradients for sequence pred_grads = get_prediction_gradient( models, sequence_one_hots, @@ -261,32 +305,37 @@ def get_prediction_gradient_w_rc( use_logodds, subtract_avg, fold_index, + untransform_old, ) - # Get gradients for rev + #Get reverse-complemented sequence sequence_one_hots_rc = [ sequence_one_hots[example_ix][::-1, ::-1] for example_ix in range(len(sequence_one_hots)) ] + #Get reverse-complemented positions prox_bin_start_rc = models[0].target_lengths[0] - prox_bin_start - 1 prox_bin_end_rc = models[0].target_lengths[0] - prox_bin_end - 1 dist_bin_start_rc = models[0].target_lengths[0] - dist_bin_start - 1 dist_bin_end_rc = models[0].target_lengths[0] - dist_bin_end - 1 + #Reverse-complement position indices (if they are given as arguments); proximal prox_bin_index_rc = None if prox_bin_index is not None: prox_bin_index_rc = [ models[0].target_lengths[0] - prox_bin - 1 for prox_bin in prox_bin_index ] + #Reverse-complement position indices (if they are given as arguments); distal dist_bin_index_rc = None if dist_bin_index is not None: dist_bin_index_rc = [ models[0].target_lengths[0] - dist_bin - 1 for dist_bin in dist_bin_index ] + #Get gradients for reverse-complemented sequence pred_grads_rc = get_prediction_gradient( models, sequence_one_hots_rc, @@ -305,16 +354,19 @@ def get_prediction_gradient_w_rc( use_logodds, subtract_avg, fold_index, + untransform_old, ) + #Average gradient saliencies pred_grads_avg = [ - (pred_grads[example_ix] + pred_grads_rc[example_ix][::-1, ::-1]) / 2.0 + (pred_grads[example_ix] + pred_grads_rc[example_ix][::-1, ::-1]) / 2. for example_ix in range(len(sequence_one_hots)) ] return pred_grads, pred_grads_rc, pred_grads_avg +#Function to compute input-gated, mean-subtracted gradient saliencies for a list of sequences def get_prediction_gradient( models, sequence_one_hots, @@ -333,16 +385,22 @@ def get_prediction_gradient( use_logodds=False, subtract_avg=False, fold_index=[0, 1, 2, 3], + untransform_old=False, ): + #Initialize structure to record gradients for multiple model replicates pred_grads = np.zeros((len(sequence_one_hots), len(fold_index), 524288, 4)) - for fold_i, fold_ix in enumerate(fold_index): + #Loop over model replicates + for fold_i, fold_ix in enumerate(fold_index) : + #Get model prediction_model = models[fold_ix].model.layers[1] + #Initialize new keras input layer input_sequence = tf.keras.layers.Input(shape=(524288, 4), name="sequence") + #Make a lambda layer with the gradient statistic tensorflow function input_grad = tf.keras.layers.Lambda( lambda x: _prediction_input_grad( x, @@ -361,14 +419,21 @@ def get_prediction_gradient( subtract_avg, prox_bin_index, dist_bin_index, + untransform_old, ), name="inp_grad", )(input_sequence) + #Compile a new model to calculate the gradient grad_model = tf.keras.models.Model(input_sequence, input_grad) + #Run gradient calculation on CPU with tf.device("/cpu:0"): - for example_ix in range(len(sequence_one_hots)): + + #Loop over sequences + for example_ix in range(len(sequence_one_hots)) : + + #Calculate and store input-gated gradient pred_grads[example_ix, fold_i, ...] = ( sequence_one_hots[example_ix] * grad_model.predict( @@ -378,216 +443,14 @@ def get_prediction_gradient( )[0, ...] ) - # Run garbage collection before next fold - prediction_model = None - gc.collect() - - pred_grads = np.mean(pred_grads, axis=1) - pred_grads = [ - np.sum(pred_grads[example_ix, ...], axis=-1, keepdims=True) - * sequence_one_hots[example_ix] - for example_ix in range(len(sequence_one_hots)) - ] - - return pred_grads - - -def get_prediction_gradient_noisy_w_rc( - models, - sequence_one_hots, - prox_bin_start, - prox_bin_end, - dist_bin_start, - dist_bin_end, - track_index, - track_scale, - track_transform, - clip_soft=None, - prox_bin_index=None, - dist_bin_index=None, - use_mean=False, - use_ratio=True, - use_logodds=False, - subtract_avg=False, - fold_index=[0, 1, 2, 3], - n_samples=5, - sample_prob=0.75, -): - - # Get gradients for fwd - pred_grads = get_prediction_gradient_noisy( - models, - sequence_one_hots, - prox_bin_start, - prox_bin_end, - dist_bin_start, - dist_bin_end, - track_index, - track_scale, - track_transform, - clip_soft, - prox_bin_index, - dist_bin_index, - use_mean, - use_ratio, - use_logodds, - subtract_avg, - fold_index, - n_samples, - sample_prob, - ) - - # Get gradients for rev - sequence_one_hots_rc = [ - sequence_one_hots[example_ix][::-1, ::-1] - for example_ix in range(len(sequence_one_hots)) - ] - - prox_bin_start_rc = models[0].target_lengths[0] - prox_bin_start - 1 - prox_bin_end_rc = models[0].target_lengths[0] - prox_bin_end - 1 - - dist_bin_start_rc = models[0].target_lengths[0] - dist_bin_start - 1 - dist_bin_end_rc = models[0].target_lengths[0] - dist_bin_end - 1 - - prox_bin_index_rc = None - if prox_bin_index is not None: - prox_bin_index_rc = [ - models[0].target_lengths[0] - prox_bin - 1 for prox_bin in prox_bin_index - ] - - dist_bin_index_rc = None - if dist_bin_index is not None: - dist_bin_index_rc = [ - models[0].target_lengths[0] - dist_bin - 1 for dist_bin in dist_bin_index - ] - - pred_grads_rc = get_prediction_gradient_noisy( - models, - sequence_one_hots_rc, - prox_bin_end_rc, - prox_bin_start_rc, - dist_bin_end_rc, - dist_bin_start_rc, - track_index, - track_scale, - track_transform, - clip_soft, - prox_bin_index_rc, - dist_bin_index_rc, - use_mean, - use_ratio, - use_logodds, - subtract_avg, - fold_index, - n_samples, - sample_prob, - ) - - pred_grads_avg = [ - (pred_grads[example_ix] + pred_grads_rc[example_ix][::-1, ::-1]) / 2.0 - for example_ix in range(len(sequence_one_hots)) - ] - - return pred_grads, pred_grads_rc, pred_grads_avg - - -def get_prediction_gradient_noisy( - models, - sequence_one_hots, - prox_bin_start, - prox_bin_end, - dist_bin_start, - dist_bin_end, - track_index, - track_scale, - track_transform, - clip_soft=None, - prox_bin_index=None, - dist_bin_index=None, - use_mean=False, - use_ratio=True, - use_logodds=False, - subtract_avg=False, - fold_index=[0, 1, 2, 3], - n_samples=5, - sample_prob=0.75, -): - - pred_grads = np.zeros((len(sequence_one_hots), len(fold_index), 524288, 4)) - - for fold_i, fold_ix in enumerate(fold_index): - - print("fold_ix = " + str(fold_ix)) - - prediction_model = models[fold_ix].model.layers[1] - - input_sequence = tf.keras.layers.Input(shape=(524288, 4), name="sequence") - - input_grad = tf.keras.layers.Lambda( - lambda x: _prediction_input_grad( - x, - prediction_model, - prox_bin_start, - prox_bin_end, - dist_bin_start, - dist_bin_end, - track_index, - track_scale, - track_transform, - clip_soft, - use_mean, - use_ratio, - use_logodds, - subtract_avg, - prox_bin_index, - dist_bin_index, - ), - name="inp_grad", - )(input_sequence) - - grad_model = tf.keras.models.Model(input_sequence, input_grad) - - with tf.device("/cpu:0"): - for example_ix in range(len(sequence_one_hots)): - - print("example_ix = " + str(example_ix)) - - inp = sequence_one_hots[example_ix][None, ...] - - for sample_ix in range(n_samples): - - print("sample_ix = " + str(sample_ix)) - - inp_corrupted = np.copy(inp) - - corrupt_index = np.nonzero( - np.random.rand(inp.shape[1]) >= sample_prob - )[0] - - rand_nt_index = np.random.choice( - [0, 1, 2, 3], size=(corrupt_index.shape[0],) - ) - - inp_corrupted[0, corrupt_index, :] = 0.0 - inp_corrupted[0, corrupt_index, rand_nt_index] = 1.0 - - pred_grads[example_ix, fold_i, ...] += ( - sequence_one_hots[example_ix] - * grad_model.predict( - x=[inp_corrupted], batch_size=1, verbose=True - )[0, ...] - ) - - pred_grads[example_ix, fold_i, ...] /= float(n_samples) - - # Run garbage collection before next example - gc.collect() - - # Run garbage collection before next fold + #Run garbage collection before next gradient computation prediction_model = None gc.collect() + #Average across model replications pred_grads = np.mean(pred_grads, axis=1) + + #Project to nucleotides again pred_grads = [ np.sum(pred_grads[example_ix, ...], axis=-1, keepdims=True) * sequence_one_hots[example_ix] @@ -597,6 +460,7 @@ def get_prediction_gradient_noisy( return pred_grads +#Helper function to compute summary statistic from predicted coverage track def _prediction_ism_score( pred, prox_bin_start, @@ -610,38 +474,50 @@ def _prediction_ism_score( dist_bin_index, ): + #Aggregate across positions if not use_mean: + #Sum over a range or an array of positions (distal) if dist_bin_index is None: mean_dist = np.sum(pred[:, dist_bin_start:dist_bin_end], axis=1) else: mean_dist = np.sum(pred[:, dist_bin_index], axis=1) + + #Sum over a range or an array of positions (proximal) if prox_bin_index is None: mean_prox = np.sum(pred[:, prox_bin_start:prox_bin_end], axis=1) else: mean_prox = np.sum(pred[:, prox_bin_index], axis=1) else: + + #Average over a range or an array of positions (distal) if dist_bin_index is None: mean_dist = np.mean(pred[:, dist_bin_start:dist_bin_end], axis=1) else: mean_dist = np.mean(pred[:, dist_bin_index], axis=1) + + + #Average over a range or an array of positions (proximal) if prox_bin_index is None: mean_prox = np.mean(pred[:, prox_bin_start:prox_bin_end], axis=1) else: mean_prox = np.mean(pred[:, prox_bin_index], axis=1) + #Apply a log transform (or a log ratio transform) if not use_ratio: mean_dist_prox_ratio = np.log(mean_dist + 1e-6) else: + #Apply a log ratio or log odds ratio transform if not use_logodds: mean_dist_prox_ratio = np.log(mean_dist / mean_prox + 1e-6) else: mean_dist_prox_ratio = np.log( - (mean_dist / mean_prox) / (1.0 - (mean_dist / mean_prox)) + 1e-6 + (mean_dist / mean_prox) / (1. - (mean_dist / mean_prox)) + 1e-6 ) return mean_dist_prox_ratio +#Function to compute ISM maps for a list of sequences def get_ism( models, sequence_one_hots, @@ -660,39 +536,61 @@ def get_ism( use_mean=False, use_ratio=True, use_logodds=False, + untransform_old=False, ): + #Initialize array to store ISM results across model replicates pred_ism = np.zeros((len(sequence_one_hots), len(models), 524288, 4)) bases = [0, 1, 2, 3] + #Loop over sequences for example_ix in range(len(sequence_one_hots)): print("example_ix = " + str(example_ix)) sequence_one_hot_wt = sequence_one_hots[example_ix] - # get pred + #Get pred y_wt = predict_tracks(models, sequence_one_hot_wt)[0, ...][ ..., track_index ].astype("float32") - # undo scale - y_wt /= track_scale + #Undo transforms + + if untransform_old : + + #Undo scale + y_wt /= track_scale + + #Undo clip-soft + if clip_soft is not None: + y_wt_unclipped = (y_wt - clip_soft) ** 2 + clip_soft + unclip_mask_wt = y_wt > clip_soft + + y_wt[unclip_mask_wt] = y_wt_unclipped[unclip_mask_wt] - # undo soft_clip - if clip_soft is not None: - y_wt_unclipped = (y_wt - clip_soft) ** 2 + clip_soft - unclip_mask_wt = y_wt > clip_soft + #Undo sqrt + y_wt = y_wt ** (1. / track_transform) + else : + + #Undo clip-soft + if clip_soft is not None : + y_wt_unclipped = (y_wt - clip_soft + 1)**2 + clip_soft - 1 + unclip_mask_wt = (y_wt > clip_soft) - y_wt[unclip_mask_wt] = y_wt_unclipped[unclip_mask_wt] + y_wt[unclip_mask_wt] = y_wt_unclipped[unclip_mask_wt] - # undo sqrt - y_wt = y_wt ** (1.0 / track_transform) + #Undo sqrt + y_wt = (y_wt + 1)**(1. / track_transform) - 1 - # aggregate over tracks (average) + #Undo scale + y_wt /= track_scale + + #Aggregate over tracks (average) y_wt = np.mean(y_wt, axis=-1) + #Calculate reference statistic score_wt = _prediction_ism_score( y_wt, prox_bin_start, @@ -706,34 +604,58 @@ def get_ism( dist_bin_index, ) + #Loop over ISM positions for j in range(ism_start, ism_end): + + #Loop over nucleotides for b in bases: - if sequence_one_hot_wt[j, b] != 1.0: + + #Calculate ISM score if nucleotide is different from reference + if sequence_one_hot_wt[j, b] != 1.: + + #Copy sequence and induce mutation sequence_one_hot_mut = np.copy(sequence_one_hot_wt) - sequence_one_hot_mut[j, :] = 0.0 - sequence_one_hot_mut[j, b] = 1.0 + sequence_one_hot_mut[j, :] = 0. + sequence_one_hot_mut[j, b] = 1. - # get pred + #Get pred y_mut = predict_tracks(models, sequence_one_hot_mut)[0, ...][ ..., track_index ].astype("float32") - # undo scale - y_mut /= track_scale + #Undo transforms + + if untransform_old : + #Undo scale + y_mut /= track_scale - # undo soft_clip - if clip_soft is not None: - y_mut_unclipped = (y_mut - clip_soft) ** 2 + clip_soft - unclip_mask_mut = y_mut > clip_soft + #Undo clip-soft + if clip_soft is not None: + y_mut_unclipped = (y_mut - clip_soft) ** 2 + clip_soft + unclip_mask_mut = y_mut > clip_soft - y_mut[unclip_mask_mut] = y_mut_unclipped[unclip_mask_mut] + y_mut[unclip_mask_mut] = y_mut_unclipped[unclip_mask_mut] + + #Undo sqrt + y_mut = y_mut ** (1. / track_transform) + else : + #Undo clip-soft + if clip_soft is not None : + y_mut_unclipped = (y_mut - clip_soft + 1)**2 + clip_soft - 1 + unclip_mask_mut = (y_mut > clip_soft) + + y_mut[unclip_mask_mut] = y_mut_unclipped[unclip_mask_mut] - # undo sqrt - y_mut = y_mut ** (1.0 / track_transform) + #Undo sqrt + y_mut = (y_mut + 1)**(1. / track_transform) - 1 - # aggregate over tracks (average) + #Undo scale + y_mut /= track_scale + + #Aggregate over tracks (average) y_mut = np.mean(y_mut, axis=-1) + #Calculate variant statistic score_mut = _prediction_ism_score( y_mut, prox_bin_start, @@ -749,11 +671,13 @@ def get_ism( pred_ism[example_ix, :, j, b] = score_wt - score_mut + #Average across mutations per positions and broadcast back to nucleotides pred_ism[example_ix, ...] = ( np.tile(np.mean(pred_ism[example_ix, ...], axis=-1)[..., None], (1, 1, 4)) * sequence_one_hots[example_ix][None, ...] ) + #Average across model replicates pred_ism = np.mean(pred_ism, axis=1) pred_ism = [ pred_ism[example_ix, ...] for example_ix in range(len(sequence_one_hots)) @@ -762,6 +686,7 @@ def get_ism( return pred_ism +#Function to compute ISM Shuffle maps for a list of sequences def get_ism_shuffle( models, sequence_one_hots, @@ -784,40 +709,62 @@ def get_ism_shuffle( use_mean=False, use_ratio=True, use_logodds=False, + untransform_old=False, ): + #Initialize array to store shuffle results across model replicates pred_shuffle = np.zeros((len(sequence_one_hots), len(models), 524288, n_samples)) pred_ism = np.zeros((len(sequence_one_hots), len(models), 524288, 4)) bases = [0, 1, 2, 3] + #Loop over sequences for example_ix in range(len(sequence_one_hots)): print("example_ix = " + str(example_ix)) sequence_one_hot_wt = sequence_one_hots[example_ix] - # get pred + #Get pred y_wt = predict_tracks(models, sequence_one_hot_wt)[0, ...][ ..., track_index ].astype("float32") - # undo scale - y_wt /= track_scale + #Undo transforms + + if untransform_old : + + #Undo scale + y_wt /= track_scale + + #Undo clip-soft + if clip_soft is not None: + y_wt_unclipped = (y_wt - clip_soft) ** 2 + clip_soft + unclip_mask_wt = y_wt > clip_soft + + y_wt[unclip_mask_wt] = y_wt_unclipped[unclip_mask_wt] - # undo soft_clip - if clip_soft is not None: - y_wt_unclipped = (y_wt - clip_soft) ** 2 + clip_soft - unclip_mask_wt = y_wt > clip_soft + #Undo sqrt + y_wt = y_wt ** (1. / track_transform) + else : + + #Undo clip-soft + if clip_soft is not None : + y_wt_unclipped = (y_wt - clip_soft + 1)**2 + clip_soft - 1 + unclip_mask_wt = (y_wt > clip_soft) - y_wt[unclip_mask_wt] = y_wt_unclipped[unclip_mask_wt] + y_wt[unclip_mask_wt] = y_wt_unclipped[unclip_mask_wt] - # undo sqrt - y_wt = y_wt ** (1.0 / track_transform) + #Undo sqrt + y_wt = (y_wt + 1)**(1. / track_transform) - 1 - # aggregate over tracks (average) + #Undo scale + y_wt /= track_scale + + #Aggregate over tracks (average) y_wt = np.mean(y_wt, axis=-1) + #Calculate reference statistic score_wt = _prediction_ism_score( y_wt, prox_bin_start, @@ -831,20 +778,24 @@ def get_ism_shuffle( dist_bin_index, ) + #Loop over shuffle positions for j in range(ism_start, ism_end): + #Calculate local window positions (to shuffle) j_start = j - window_size // 2 j_end = j + window_size // 2 + 1 pos_index = np.arange(j_end - j_start) + j_start + #Loop over the number of independent shuffle samples for sample_ix in range(n_samples): sequence_one_hot_mut = np.copy(sequence_one_hot_wt) - sequence_one_hot_mut[j_start:j_end, :] = 0.0 + sequence_one_hot_mut[j_start:j_end, :] = 0. + #Randomly mutate or mono-nucleotide-shuffle if not mononuc_shuffle and not dinuc_shuffle: nt_index = np.random.choice(bases, size=(j_end - j_start,)).tolist() - sequence_one_hot_mut[pos_index, nt_index] = 1.0 + sequence_one_hot_mut[pos_index, nt_index] = 1. elif mononuc_shuffle: shuffled_pos_index = np.copy(pos_index) np.random.shuffle(shuffled_pos_index) @@ -852,7 +803,9 @@ def get_ism_shuffle( sequence_one_hot_mut[shuffled_pos_index, :] = sequence_one_hot_wt[ pos_index, : ] - else: # dinuc-shuffle + else: #Or di-nucleotide-shuffle + + #Get a list of shuffled dinucleotides (shift sequence by 1 every other sample) if sample_ix % 2 == 0: shuffled_pos_index = [ [pos_index[pos_j], pos_index[pos_j + 1]] @@ -869,43 +822,63 @@ def get_ism_shuffle( for pos_j in range(0, pos_index_rev.shape[0], 2) ] + #Shuffle list of dinucleotide indices shuffled_shuffle_index = np.arange( len(shuffled_pos_index), dtype="int32" ) np.random.shuffle(shuffled_shuffle_index) + #Reconstruct new list of dinucleotides shuffled_pos_index_new = [] for pos_tuple_i in range(len(shuffled_pos_index)): shuffled_pos_index_new.extend( shuffled_pos_index[shuffled_shuffle_index[pos_tuple_i]] ) + #Reconstruct sequence shuffled_pos_index = np.array(shuffled_pos_index_new, dtype="int32") sequence_one_hot_mut[shuffled_pos_index, :] = sequence_one_hot_wt[ pos_index, : ] - # get pred + #Get pred y_mut = predict_tracks(models, sequence_one_hot_mut)[0, ...][ ..., track_index ].astype("float32") - # undo scale - y_mut /= track_scale + #Undo transforms + + if untransform_old : + #Undo scale + y_mut /= track_scale - # undo soft_clip - if clip_soft is not None: - y_mut_unclipped = (y_mut - clip_soft) ** 2 + clip_soft - unclip_mask_mut = y_mut > clip_soft + #Undo clip-soft + if clip_soft is not None: + y_mut_unclipped = (y_mut - clip_soft) ** 2 + clip_soft + unclip_mask_mut = y_mut > clip_soft + + y_mut[unclip_mask_mut] = y_mut_unclipped[unclip_mask_mut] - y_mut[unclip_mask_mut] = y_mut_unclipped[unclip_mask_mut] + #Undo sqrt + y_mut = y_mut ** (1. / track_transform) + else : + #Undo clip-soft + if clip_soft is not None : + y_mut_unclipped = (y_mut - clip_soft + 1)**2 + clip_soft - 1 + unclip_mask_mut = (y_mut > clip_soft) - # undo sqrt - y_mut = y_mut ** (1.0 / track_transform) + y_mut[unclip_mask_mut] = y_mut_unclipped[unclip_mask_mut] + + #Undo sqrt + y_mut = (y_mut + 1)**(1. / track_transform) - 1 + + #Undo scale + y_mut /= track_scale - # aggregate over tracks (average) + #Aggregate over tracks (average) y_mut = np.mean(y_mut, axis=-1) + #Calculate variant statistic score_mut = _prediction_ism_score( y_mut, prox_bin_start, @@ -921,6 +894,7 @@ def get_ism_shuffle( pred_shuffle[example_ix, :, j, sample_ix] = score_wt - score_mut + #Average across mutations at each position and broadcast back to nucleotides pred_ism[example_ix, ...] = ( np.tile( np.mean(pred_shuffle[example_ix, ...], axis=-1)[..., None], (1, 1, 4) @@ -928,6 +902,7 @@ def get_ism_shuffle( * sequence_one_hots[example_ix][None, ...] ) + #Average across model replicates pred_ism = np.mean(pred_ism, axis=1) pred_ism = [ pred_ism[example_ix, ...] for example_ix in range(len(sequence_one_hots)) @@ -935,7 +910,7 @@ def get_ism_shuffle( return pred_ism - +#Function to visualize attribution scores as a sequence logo def plot_seq_scores( importance_scores, figsize=(16, 2), @@ -946,14 +921,17 @@ def plot_seq_scores( fig_name="default", ): + #Transpose score matrix importance_scores = importance_scores.T fig = plt.figure(figsize=figsize) ref_seq = "" + #Loop over one-hot pattern and decode sequence for j in range(importance_scores.shape[1]): argmax_nt = np.argmax(np.abs(importance_scores[:, j])) + #Decode the corresponding nucleotide that was set to 'high' if argmax_nt == 0: ref_seq += "A" elif argmax_nt == 1: @@ -965,6 +943,7 @@ def plot_seq_scores( ax = plt.gca() + #Loop over positions in the sequence and plot a DNA letter for i in range(0, len(ref_seq)): mutability_score = np.sum(importance_scores[:, i]) color = None @@ -978,11 +957,13 @@ def plot_seq_scores( # plt.axis('off') + #Remove y ticks by default if plot_y_ticks: plt.yticks(fontsize=12) else: plt.yticks([], []) + #Set logo height if y_min is not None and y_max is not None: plt.ylim(y_min, y_max) elif y_min is not None: @@ -993,6 +974,7 @@ def plot_seq_scores( np.max(importance_scores) + 0.1 * np.max(np.abs(importance_scores)), ) + #Plot bottom line in the logo plt.axhline(y=0.0, color="black", linestyle="-", linewidth=1) # for axis in fig.axes : @@ -1001,28 +983,35 @@ def plot_seq_scores( plt.tight_layout() + #Optionally save figure if save_figs: plt.savefig(fig_name + ".png", transparent=True, dpi=300) plt.savefig(fig_name + ".eps") plt.show() - +#Function to visualize a pair of sequence logos with matched scales def visualize_input_gradient_pair( - att_grad_wt, att_grad_mut, plot_start=0, plot_end=100, save_figs=False, fig_name="" + grad_wt, grad_mut, plot_start=0, plot_end=100, save_figs=False, fig_name="" ): - scores_wt = att_grad_wt[plot_start:plot_end, :] - scores_mut = att_grad_mut[plot_start:plot_end, :] + #Slice out sequence logo subplot + scores_wt = grad_wt[plot_start:plot_end, :] + scores_mut = grad_mut[plot_start:plot_end, :] + #Calculate min/max range y_min = min(np.min(scores_wt), np.min(scores_mut)) y_max = max(np.max(scores_wt), np.max(scores_mut)) + #Calculate absolute-valued max y_max_abs = max(np.abs(y_min), np.abs(y_max)) + #Add symmetric amount of padding to logos y_min = y_min - 0.05 * y_max_abs y_max = y_max + 0.05 * y_max_abs + #Plot ref logo + print("--- WT ---") plot_seq_scores( scores_wt, @@ -1034,6 +1023,8 @@ def visualize_input_gradient_pair( fig_name=fig_name + "_wt", ) + #Plot alt logo + print("--- Mut ---") plot_seq_scores( scores_mut, @@ -1045,7 +1036,644 @@ def visualize_input_gradient_pair( fig_name=fig_name + "_mut", ) +#Function to visualize coverage tracks and gene annotations +def plot_coverage_tracks( + y_1_in, + track_indices, + track_names, + track_colors, + track_labels, + track_scale, + track_transform, + clip_soft, + start, + y_2_in=None, + log_scale=False, + plot_pair=True, + pair_alpha=0.5, + pair_order=[0, 1], + plot_start_rel=512, + plot_end_rel=524288-512, + normalize_start_rel=512, + normalize_end_rel=524288-512, + normalize_counts=False, + highlight_pos_rel=None, + highlight_covr_poses_rel=None, + covr_orientation='before', + covr_agg='mean', + covr_width=4, + bin_size=32, + pad=16, + same_scale=True, + save_figs=False, + save_suffix='default', + fig_size=(12, 2), + gene_slice=None, + gene_slices=None, + isoform_slices=None, + gene_strand=None, + chrom=None, + search_gene=None, + gene_strands=None, + apa_df_gene_utr=None, + apa_df_gene_intron=None, + tss_df_gene=None, + only_count_within_range=True, + plot_other_genes=False, + plot_other_gene_strands=False, + plot_isoforms=False, + plot_isoform_strands=False, + max_isoforms=5, + isoform_height_frac=0., + plot_strands=True, + gene_color='black', + isoform_color='black', + other_gene_color='black', + plot_as_bars=False, + annotate_utr_apa=False, + annotate_intron_apa=False, + annotate_tss=False, + untransform_old=False +) : + + #Calculate plot start and end bin positions + plot_start = start + plot_start_rel + plot_end = start + plot_end_rel + + plot_start_bin = plot_start_rel // bin_size - pad + plot_end_bin = plot_end_rel // bin_size - pad + + #Calculate coverage normalization start and end bin positions + normalize_start = start + normalize_start_rel + normalize_end = start + normalize_end_rel + + normalize_start_bin = normalize_start_rel // bin_size - pad + normalize_end_bin = normalize_end_rel // bin_size - pad + + #Calculate highlight coverage bin for optional annotation + highlight_bin = None + if highlight_pos_rel is not None : + highlight_bin = highlight_pos_rel // bin_size - pad + + #Calculate highlight coverage bins for coverage ratio annotations + highlight_covr_bins_rel = None + if highlight_covr_poses_rel is not None : + highlight_covr_bins_rel = [ + highlight_covr_poses_rel[0] // bin_size - pad, + highlight_covr_poses_rel[1] // bin_size - pad, + ] + + #Get gene exons + gene_exons = [] + + gene_exon = [] + for exon_ix in gene_slice.tolist() : + if len(gene_exon) == 0 or gene_exon[-1] == exon_ix - 1 : + gene_exon.append(exon_ix) + else : + gene_exons.append(gene_exon) + gene_exon = [exon_ix] + + if len(gene_exon) > 0 : + gene_exons.append(gene_exon) + + #Get exons from other genes + other_exons = [] + for other_ix in range(len(gene_slices)) : + other_gene_exons = [] + + other_gene_exon = [] + for exon_ix in gene_slices[other_ix].tolist() : + if len(other_gene_exon) == 0 or other_gene_exon[-1] == exon_ix - 1 : + other_gene_exon.append(exon_ix) + else : + other_gene_exons.append(other_gene_exon) + other_gene_exon = [exon_ix] + + if len(other_gene_exon) > 0 : + other_gene_exons.append(other_gene_exon) + + other_exons.append(other_gene_exons) + + #Get isoform exons + isoform_exons = [] + for other_ix in range(min(len(isoform_slices), max_isoforms)) : + other_isoform_exons = [] + + other_isoform_exon = [] + for exon_ix in isoform_slices[other_ix].tolist() : + if len(other_isoform_exon) == 0 or other_isoform_exon[-1] == exon_ix - 1 : + other_isoform_exon.append(exon_ix) + else : + other_isoform_exons.append(other_isoform_exon) + other_isoform_exon = [exon_ix] + + if len(other_isoform_exon) > 0 : + other_isoform_exons.append(other_isoform_exon) + + isoform_exons.append(other_isoform_exons) + + if y_2_in is None : + y_2_in = np.zeros(y_1_in.shape, dtype='float32') + + #Copy coverage tensors + y_1 = np.array(np.copy(y_1_in), dtype=np.float32) + y_2 = np.array(np.copy(y_2_in), dtype=np.float32) + + #Broadcast data transformation parameters + track_scales = None + clip_softs = None + track_transforms = None + + if not isinstance(track_scale, np.ndarray) : + track_scales = np.array([track_scale] if not isinstance(track_scale, list) else track_scale, dtype='float32') + else : + track_scales = track_scale + + if not isinstance(clip_soft, np.ndarray) : + clip_softs = np.array([clip_soft] if not isinstance(clip_soft, list) else clip_soft, dtype='float32') + else : + clip_softs = clip_soft + + if not isinstance(track_transform, np.ndarray) : + track_transforms = np.array([track_transform] if not isinstance(track_transform, list) else track_transform, dtype='float32') + else : + track_transforms = track_transform + + track_scales = track_scales[None, None, None, :] + clip_softs = clip_softs[None, None, None, :] + track_transforms = track_transforms[None, None, None, :] + + #Undo transformations + + if untransform_old : + + #Undo scale + y_1 /= track_scales + y_2 /= track_scales + + #Undo clip-soft + if clip_soft is not None : + y_1_unclipped = (y_1 - clip_softs)**2 + clip_softs + y_2_unclipped = (y_2 - clip_softs)**2 + clip_softs + + unclip_mask_1 = (y_1 > clip_softs) + unclip_mask_2 = (y_2 > clip_softs) + + y_1[unclip_mask_1] = y_1_unclipped[unclip_mask_1] + y_2[unclip_mask_2] = y_2_unclipped[unclip_mask_2] + + #Undo sqrt + y_1 = y_1**(1. / track_transforms) + y_2 = y_2**(1. / track_transforms) + else : + + #Undo clip-soft + if clip_soft is not None : + y_1_unclipped = (y_1 - clip_softs + 1)**2 + clip_softs - 1 + y_2_unclipped = (y_2 - clip_softs + 1)**2 + clip_softs - 1 + + unclip_mask_1 = (y_1 > clip_softs) + unclip_mask_2 = (y_2 > clip_softs) + + y_1[unclip_mask_1] = y_1_unclipped[unclip_mask_1] + y_2[unclip_mask_2] = y_2_unclipped[unclip_mask_2] + + #Undo sqrt + y_1 = (y_1 + 1)**(1. / track_transforms) - 1 + y_2 = (y_2 + 1)**(1. / track_transforms) - 1 + + #Undo scale + y_1 /= track_scales + y_2 /= track_scales + + #Pool replicate tracks + y_1_pooled = [] + y_2_pooled = [] + for track_index in track_indices : + y_1_pooled.append(np.mean(y_1[..., track_index], axis=(0, 1, 3))[:, None]) + y_2_pooled.append(np.mean(y_2[..., track_index], axis=(0, 1, 3))[:, None]) + + y_1 = np.concatenate(y_1_pooled, axis=-1) + y_2 = np.concatenate(y_2_pooled, axis=-1) + + #Optionally normalize coverage track pair counts + if normalize_counts : + c_1 = np.sum(y_1[normalize_start_bin:normalize_end_bin, :], axis=0)[None, :] + c_2 = np.sum(y_2[normalize_start_bin:normalize_end_bin, :], axis=0)[None, :] + + #Normalize to densities + y_1 /= c_1 + y_2 /= c_2 + + #Bring back to count space (same reference) + y_1 *= c_1 + y_2 *= c_1 + + #Calculate globally largest value among track pair + max_y = 0. + if same_scale : + if not log_scale : + max_y = np.max(y_1[plot_start_bin:plot_end_bin, :]) + if plot_pair : + max_y = max(np.max(y_1[plot_start_bin:plot_end_bin, :]), np.max(y_2[plot_start_bin:plot_end_bin, :])) + else: + max_y = np.max(np.log2(y_1[plot_start_bin:plot_end_bin, :] + 1.)) + if plot_pair : + max_y = max(np.log2(y_1[plot_start_bin:plot_end_bin, :] + 1.), np.log2(y_2[plot_start_bin:plot_end_bin, :] + 1.)) + + #Plot track densities as vertical-layout subplots + f, ax = plt.subplots(len(track_labels), 1, figsize=(fig_size[0], fig_size[1] * len(track_labels)), dpi=600) + if len(track_labels) == 1 : + ax = [ax] + + #Loop over tracks + for track_i, [track_name, track_color, track_label] in enumerate(zip(track_names, track_colors, track_labels)) : + + #Get coverage tracks for current target index + y_1_i = y_1[..., track_i] + y_2_i = y_2[..., track_i] + + #Aggregate coverage across target gene + sum_1_i = 0. + sum_2_i = 0. + if gene_slice is not None : + if not only_count_within_range : + sum_1_i = np.sum(y_1_i[gene_slice]) + sum_2_i = np.sum(y_2_i[gene_slice]) + else : + sum_1_i = np.sum(y_1_i[gene_slice[(gene_slice >= plot_start_bin) & (gene_slice < plot_end_bin)]]) + sum_2_i = np.sum(y_2_i[gene_slice[(gene_slice >= plot_start_bin) & (gene_slice < plot_end_bin)]]) + + #Save a copy of the raw coverage tracks + y_1_i_raw = np.copy(y_1_i) + y_2_i_raw = np.copy(y_2_i) + + #Slice out position interval + y_1_i = y_1_i[plot_start_bin:plot_end_bin] + y_2_i = y_2_i[plot_start_bin:plot_end_bin] + + #Optional log+1 transform + if log_scale : + y_1_i = np.log2(y_1_i + 1.) + y_2_i = np.log2(y_2_i + 1.) + + #Calculate max values per track + max_1_i = np.max(y_1_i) + max_2_i = np.max(y_2_i) + + if plot_pair : + max_y_i = max(max_1_i, max_2_i) + else : + max_y_i = max_1_i + + if same_scale : + max_y_i = max_y + + plt.sca(ax[track_i]) + + legend_handles = [] + + #Plot tracks as colored curve areas + if not plot_as_bars : + h1 = ax[track_i].fill_between( + np.arange(plot_end_bin - plot_start_bin) + plot_start_bin, + y_1_i, + color=track_color[0], + alpha=pair_alpha, + label=track_label[0] + " - " + track_name, + zorder=pair_order[0], + rasterized=True + ) + legend_handles.append(h1) + + if plot_pair : + h2 = ax[track_i].fill_between( + np.arange(plot_end_bin - plot_start_bin) + plot_start_bin, + y_2_i, + color=track_color[1], + alpha=pair_alpha, + label=track_label[1] + " - " + track_name, + zorder=pair_order[1], + rasterized=True + ) + legend_handles.append(h2) + else : #Or plot tracks as bars (non-rasterized) + plt.bar( + np.arange(plot_end_bin - plot_start_bin) + plot_start_bin, + y_1_i, + width=1, + color=track_color[0], + alpha=pair_alpha, + label=track_label[0] + " - " + track_name, + zorder=pair_order[0] + ) + + if plot_pair : + plt.bar( + np.arange(plot_end_bin - plot_start_bin) + plot_start_bin, + y_2_i, + width=1, + color=track_color[1], + alpha=pair_alpha, + label=track_label[1] + " - " + track_name, + zorder=pair_order[1] + ) + #Annotate gene exons + for gene_exon_i, gene_exon in enumerate(gene_exons) : + exon_start_bin = gene_exon[0] - 0.5 + exon_end_bin = gene_exon[-1] + 0.5 + + #Plot shaded blue area around exon coverage peaks + if (gene_exon[-1] >= plot_start_bin and gene_exon[0] < plot_end_bin) : + ax[track_i].fill_between([exon_start_bin, exon_end_bin], max_y_i * 0.9995, color='deepskyblue', alpha=0.1, zorder=3) + + plt.sca(ax[track_i]) + + #Plot annotation graphics for the current gene (union of exons) + if gene_slice is not None : + #Plot entire gene span as line + plt.plot([gene_exons[0][0], gene_exons[-1][-1]], [-0.075 * max_y_i, -0.075 * max_y_i], zorder=5, color=gene_color, linewidth=0.5, linestyle='--') + + #Loop over exon starts and ends + for gene_exon_i, gene_exon in enumerate(gene_exons) : + exon_start_bin = gene_exon[0] - 0.5 + exon_end_bin = gene_exon[-1] + 0.5#1.5 + + #Plot exon as rectangle within gene span + rect = patches.Rectangle((exon_start_bin, -0.10 * max_y_i), (exon_end_bin - exon_start_bin), 0.05 * max_y_i, linewidth=0.5, edgecolor=gene_color, facecolor=gene_color, zorder=6) + ax[track_i].add_patch(rect) + + #Optionally plot gene strandedness as arrows along introns in gene span + if plot_strands and gene_exon_i < len(gene_exons) - 1 : + next_exon_start_bin = gene_exons[gene_exon_i+1][0] - 0.5 + intron_mid = (exon_end_bin + next_exon_start_bin) / 2. + + arrow_len = 0.004 * (plot_end_bin - plot_start_bin) + intron_len = next_exon_start_bin - exon_end_bin + + #Only plot if the arrow fits neatly within the intron + if intron_len >= 2 * arrow_len : + strand_sign = -1. if gene_strand == '-' else 1. + strand_arrow = patches.FancyArrow(intron_mid - (arrow_len/2.) * strand_sign, -0.075 * max_y_i, arrow_len * strand_sign, 0., length_includes_head=True, width=0., head_width=0.04 * max_y_i, head_length=arrow_len, zorder=7, color=gene_color) + ax[track_i].add_patch(strand_arrow) + + #Optionally highlight bin of interest + if highlight_bin is not None : + l1 = plt.plot([highlight_bin, highlight_bin], [0., max_y * 0.9995], color='black', linewidth=0.5, linestyle='--', alpha=0.5, zorder=10, label='Highlight') + #legend_handles.append(l1[0]) + + #Optionally annotate pA sites (3' UTR) + if annotate_utr_apa : + site_poses = apa_df_gene_utr.query("chrom == '" + chrom + "' and position_hg38 >= " + str(plot_start) + " and position_hg38 < " + str(plot_end))['position_hg38'].values.tolist() + + #Loop over pA sites + for site_ix, site_pos in enumerate(site_poses) : + site_bin = int((site_pos - start) // bin_size) - pad + #site_bin = int(np.round((site_pos - start) / bin_size)) - pad + + l1 = plt.plot([site_bin, site_bin], [0., max_y_i * 0.9995], color='maroon', linewidth=0.5, alpha=0.5, linestyle='--', zorder=10, label='PAS') + #if site_ix == 0 : + # legend_handles.append(l1[0]) + + #Optionally annotate intronic pA sites + if annotate_intron_apa : + site_poses = apa_df_gene_intron.query("chrom == '" + chrom + "' and position_hg38 >= " + str(plot_start) + " and position_hg38 < " + str(plot_end))['position_hg38'].values.tolist() + + #Loop over intronic pA sites + for site_ix, site_pos in enumerate(site_poses) : + site_bin = int((site_pos - start) // bin_size) - pad + #site_bin = int(np.round((site_pos - start) / bin_size)) - pad + + plt.plot([site_bin, site_bin], [0., max_y_i * 0.9995], color='maroon', linewidth=0.5, alpha=0.5, linestyle='--', zorder=10) + + #Optionally annotate TSS positions + if annotate_tss : + site_poses = tss_df_gene.query("chrom == '" + chrom + "' and position_hg38 >= " + str(plot_start) + " and position_hg38 < " + str(plot_end))['position_hg38'].values.tolist() + + #Loop over TSS positions + for site_ix, site_pos in enumerate(site_poses) : + site_bin = int((site_pos - start) // bin_size) - pad + #site_bin = int(np.round((site_pos - start) / bin_size)) - pad + + l1 = plt.plot([site_bin, site_bin], [0., max_y_i * 0.9995], color='darkgreen', linewidth=0.5, alpha=0.5, linestyle='--', zorder=10, label='TSS') + #if site_ix == 0 : + # legend_handles.append(l1[0]) + + #Optionally annotate regions used to estimate coverage ratios + y_1_site_1_cov = 0. + y_2_site_1_cov = 0. + y_1_site_2_cov = 0. + y_2_site_2_cov = 0. + if highlight_covr_bins_rel is not None : + + site_1_bin = highlight_covr_bins_rel[0] + site_2_bin = highlight_covr_bins_rel[1] + + bin_1_start = None + bin_1_end = None + bin_2_start = None + bin_2_end = None + if covr_orientation == 'before' : + if gene_strand == '+' : + bin_1_end = site_1_bin + 1 + bin_1_start = bin_1_end - covr_width + bin_2_end = site_2_bin + 1 + bin_2_start = bin_2_end - covr_width + else : + bin_1_start = site_1_bin + bin_1_end = bin_1_start + covr_width + bin_2_start = site_2_bin + bin_2_end = bin_2_start + covr_width + else : + if gene_strand == '+' : + bin_1_start = site_1_bin + bin_1_end = bin_1_start + covr_width + bin_2_start = site_2_bin + bin_2_end = bin_2_start + covr_width + else : + bin_1_end = site_1_bin + 1 + bin_1_start = bin_1_end - covr_width + bin_2_end = site_2_bin + 1 + bin_2_start = bin_2_end - covr_width + + if covr_agg == 'mean' : + y_1_site_1_cov = np.mean(y_1_i_raw[bin_1_start:bin_1_end]) + y_1_site_2_cov = np.mean(y_1_i_raw[bin_2_start:bin_2_end]) + y_2_site_1_cov = np.mean(y_2_i_raw[bin_1_start:bin_1_end]) + y_2_site_2_cov = np.mean(y_2_i_raw[bin_2_start:bin_2_end]) + elif covr_agg == 'max' : + y_1_site_1_cov = np.max(y_1_i_raw[bin_1_start:bin_1_end]) + y_1_site_2_cov = np.max(y_1_i_raw[bin_2_start:bin_2_end]) + y_2_site_1_cov = np.max(y_2_i_raw[bin_1_start:bin_1_end]) + y_2_site_2_cov = np.max(y_2_i_raw[bin_2_start:bin_2_end]) + + plt.plot([bin_1_start-0.5, bin_1_end-1+0.5], [0.99 * max_y_i, 0.99 * max_y_i], linewidth=0.5, linestyle='-', color='black', zorder=11) + plt.plot([bin_1_start-0.5, bin_1_start-0.5], [0.95 * max_y_i, 0.99 * max_y_i], linewidth=0.5, linestyle='-', color='black', zorder=11) + plt.plot([bin_1_end-1+0.5, bin_1_end-1+0.5], [0.95 * max_y_i, 0.99 * max_y_i], linewidth=0.5, linestyle='-', color='black', zorder=11) + + plt.plot([bin_2_start-0.5, bin_2_end-1+0.5], [0.99 * max_y_i, 0.99 * max_y_i], linewidth=0.5, linestyle='-', color='black', zorder=11) + plt.plot([bin_2_start-0.5, bin_2_start-0.5], [0.95 * max_y_i, 0.99 * max_y_i], linewidth=0.5, linestyle='-', color='black', zorder=11) + plt.plot([bin_2_end-1+0.5, bin_2_end-1+0.5], [0.95 * max_y_i, 0.99 * max_y_i], linewidth=0.5, linestyle='-', color='black', zorder=11) + + rect_1 = patches.Rectangle((bin_1_start-0.5, 0.975 * max_y_i), (bin_1_end - bin_1_start), (0.99 - 0.975) * max_y_i, linewidth=0., facecolor='lightcoral', alpha=0.35, zorder=11) + rect_2 = patches.Rectangle((bin_2_start-0.5, 0.975 * max_y_i), (bin_2_end - bin_2_start), (0.99 - 0.975) * max_y_i, linewidth=0., facecolor='lightcoral', alpha=0.35, zorder=11) + ax[track_i].add_patch(rect_1) + ax[track_i].add_patch(rect_2) + + #Optionally plot the union of exons of other genes + if plot_other_genes : + + #Loop over other genes + for other_ix in range(len(other_exons)) : + plt.plot([other_exons[other_ix][0][0], other_exons[other_ix][-1][-1]], [(-0.075 - 0.10 - isoform_height_frac) * max_y_i, (-0.075 - 0.10 - isoform_height_frac) * max_y_i], zorder=5, color=other_gene_color, linewidth=0.5, linestyle='--') + + #Loop over the exons of the current other gene + for gene_exon_i, gene_exon in enumerate(other_exons[other_ix]) : + exon_start_bin = gene_exon[0] - 0.5 + exon_end_bin = gene_exon[-1] + 0.5 + + #Plot exon graphic + rect = patches.Rectangle((exon_start_bin, (-0.10 - 0.10 - isoform_height_frac) * max_y_i), (exon_end_bin - exon_start_bin), 0.05 * max_y_i, linewidth=0.5, edgecolor=other_gene_color, facecolor=other_gene_color, zorder=6) + ax[track_i].add_patch(rect) + + #Plot gene strandedness of other genes + if plot_other_gene_strands and gene_exon_i < len(other_exons[other_ix]) - 1 : + next_exon_start_bin = other_exons[other_ix][gene_exon_i+1][0] - 0.5 + intron_mid = (exon_end_bin + next_exon_start_bin) / 2. + + arrow_len = 0.004 * (plot_end_bin - plot_start_bin) + intron_len = next_exon_start_bin - exon_end_bin + + #Plot arrow only if intron is wide enough + if intron_len >= 2 * arrow_len : + strand_sign = -1. if gene_strands[other_ix] == '-' else 1. + strand_arrow = patches.FancyArrow(intron_mid - (arrow_len/2.) * strand_sign, (-0.075 - 0.10 - isoform_height_frac) * max_y_i, arrow_len * strand_sign, 0., length_includes_head=True, width=0., head_width=0.04 * max_y_i, head_length=arrow_len, zorder=7, color=other_gene_color) + ax[track_i].add_patch(strand_arrow) + + #Annotate a selection of isoforms of the target gene + if plot_isoforms : + + #Loop over isoforms + for isoform_ix in range(len(isoform_exons)) : + isoform_offset = (isoform_ix + 1) * 0.10 + next_isoform_offset = (isoform_ix + 2) * 0.10 + + #Plot only if isoform will fit in alloted relative area within subplot + if isoform_ix == len(isoform_exons) - 1 or next_isoform_offset <= isoform_height_frac : + plt.plot([isoform_exons[isoform_ix][0][0], isoform_exons[isoform_ix][-1][-1]], [(-0.075 - isoform_offset) * max_y_i, (-0.075 - isoform_offset) * max_y_i], zorder=5, color=isoform_color, linewidth=0.5, linestyle='--') + + #Loop over the exons of the current isoform + for gene_exon_i, gene_exon in enumerate(isoform_exons[isoform_ix]) : + exon_start_bin = gene_exon[0] - 0.5 + exon_end_bin = gene_exon[-1] + 0.5 + + rect = patches.Rectangle((exon_start_bin, (-0.10 - isoform_offset) * max_y_i), (exon_end_bin - exon_start_bin), 0.05 * max_y_i, linewidth=0.5, edgecolor=isoform_color, facecolor=isoform_color, zorder=6) + ax[track_i].add_patch(rect) + + #Plot gene strandedness along isoform + if plot_isoform_strands and gene_exon_i < len(isoform_exons[isoform_ix]) - 1 : + next_exon_start_bin = isoform_exons[isoform_ix][gene_exon_i+1][0] - 0.5 + intron_mid = (exon_end_bin + next_exon_start_bin) / 2. + + arrow_len = 0.004 * (plot_end_bin - plot_start_bin) + intron_len = next_exon_start_bin - exon_end_bin + + #Plot arrow only if intron is wide enough + if intron_len >= 2 * arrow_len : + strand_sign = -1. if gene_strand == '-' else 1. + strand_arrow = patches.FancyArrow(intron_mid - (arrow_len/2.) * strand_sign, (-0.075 - isoform_offset) * max_y_i, arrow_len * strand_sign, 0., length_includes_head=True, width=0., head_width=0.04 * max_y_i, head_length=arrow_len, zorder=7, color=isoform_color) + ax[track_i].add_patch(strand_arrow) + + #Plot text if there are too many isoforms to show + else : + missing_isoforms = len(isoform_slices) - isoform_ix + plt.text((plot_start_bin + (plot_end_bin-1)) / 2., (-0.075 - isoform_offset) * max_y_i, "(+" + str(missing_isoforms) + " not shown...)", horizontalalignment='center', verticalalignment='center', fontsize=6, zorder=10) + break + + plt.axvline(x=plot_start_bin, linewidth=1, linestyle='-', color='black') + + #Apply subplot limits + plt.xlim(plot_start_bin, plot_end_bin-1) + if gene_slice is not None : + if plot_other_genes : + plt.ylim((-0.25 - isoform_height_frac) * max_y_i, max_y_i) + else : + plt.ylim((-0.15 - isoform_height_frac) * max_y_i, max_y_i) + else : + plt.ylim(0., max_y_i) + + plt.xticks([], []) + plt.yticks([], []) + + plt.axis('off') + + #Annotate chromosome and coordinates plotted + if track_i == len(track_labels) - 1 : + text_str = chrom + ":" + str(plot_start) + "-" + str(plot_end) + " (" + str(int(plot_end-plot_start)) + "bp) - " + "'" + search_gene + "' (" + gene_strand + ")" + plt.text(0.0, -0.14 / float(fig_size[1]), text_str, horizontalalignment='left', verticalalignment='center', transform=plt.gca().transAxes, fontsize=8, zorder=6) + + #Annotate metrics derived from the coverage tracks + if gene_slice is not None : + tr_label_0_str = ' (' + track_label[0] + ')' if plot_pair else '' + tr_label_1_str = ' (' + track_label[1] + ')' if plot_pair else '' + + #Max coverage + y_max_str = 'Max' + tr_label_0_str + ' = ' + str(round(max_1_i if not log_scale else 2**max_1_i - 1, 2)) + if plot_pair : + y_max_str += ',' + tr_label_1_str + ' = ' + str(round(max_2_i if not log_scale else 2**max_2_i - 1, 2)) + + #Sum of coverage + y_sum_str = 'Sum' + tr_label_0_str + ' = ' + str(round(sum_1_i, 2)) + if plot_pair : + y_sum_str += ',' + tr_label_1_str + ' = ' + str(round(sum_2_i, 2)) + + plt.text(0.005, 0.94, y_max_str, fontname='monospace', horizontalalignment='left', verticalalignment='center', transform=plt.gca().transAxes, fontsize=6, zorder=6) + plt.text(0.005, 0.82, y_sum_str, fontname='monospace', horizontalalignment='left', verticalalignment='center', transform=plt.gca().transAxes, fontsize=6, zorder=6) + + #Annotate log fold change (if plotting a pair of coverage tracks) + if plot_pair and track_label[0].lower() in ['ref', 'wt'] and track_label[1].lower() in ['alt', 'var', 'mut'] : + log_ratio_str = 'Log ratio (' + track_label[1] + ' / ' + track_label[0] + ') = ' + str(round(np.log2(sum_2_i / sum_1_i), 3)) + plt.text(0.005, 0.70, log_ratio_str, fontname='monospace', horizontalalignment='left', verticalalignment='center', transform=plt.gca().transAxes, fontsize=6, zorder=6) + + #Optionally annotate coverage ratio metrics computed from the tracks + if highlight_covr_bins_rel is not None : + + covr_1_i = 0. + covr_2_i = 0. + if gene_strand == '-' : + covr_1_i = (y_1_site_1_cov + 1e-6) / (y_1_site_2_cov + 1e-6) + covr_2_i = (y_2_site_1_cov + 1e-6) / (y_2_site_2_cov + 1e-6) + else : + covr_1_i = (y_1_site_2_cov + 1e-6) / (y_1_site_1_cov + 1e-6) + covr_2_i = (y_2_site_2_cov + 1e-6) / (y_2_site_1_cov + 1e-6) + + #Coverage ratio + covr_str = 'COVR' + tr_label_0_str + ' = ' + str(round(covr_1_i, 3)) + if plot_pair : + covr_str += ',' + tr_label_1_str + ' = ' + str(round(covr_2_i, 3)) + + plt.text(0.005, 0.58, covr_str, fontname='monospace', horizontalalignment='left', verticalalignment='center', transform=plt.gca().transAxes, fontsize=6, zorder=6) + + #Annotate log fold change (if plotting a pair of coverage tracks) + if plot_pair and track_label[0].lower() in ['ref', 'wt'] and track_label[1].lower() in ['alt', 'var', 'mut'] : + log_ratio_str = 'Log COVR ratio (' + track_label[1] + ' / ' + track_label[0] + ') = ' + str(round(np.log2(covr_2_i / covr_1_i), 3)) + plt.text(0.005, 0.46, log_ratio_str, fontname='monospace', horizontalalignment='left', verticalalignment='center', transform=plt.gca().transAxes, fontsize=6, zorder=6) + + plt.legend(handles=legend_handles, loc='upper right', fontsize=6) + + plt.tight_layout() + + #Optionally save figure + if save_figs : + plt.savefig("borzoi" + save_suffix + ".png", dpi=300, transparent=False) + plt.savefig("borzoi" + save_suffix + ".pdf") + + plt.show() + + +#Function to visualize coverage tracks def plot_coverage_track_pair_bins( y_wt, y_mut, @@ -1065,20 +1693,22 @@ def plot_coverage_track_pair_bins( normalize_window=4096, bin_size=32, pad=16, - rescale_tracks=True, normalize_counts=False, save_figs=False, save_suffix="default", gene_slice=None, anno_df=None, + untransform_old=False ): + #Calculate plot start and end bin positions plot_start = center_pos - plot_window // 2 plot_end = center_pos + plot_window // 2 plot_start_bin = (plot_start - start) // bin_size - pad plot_end_bin = (plot_end - start) // bin_size - pad + #Calculate coverage normalization start and end bin positions normalize_start = center_pos - normalize_window // 2 normalize_end = center_pos + normalize_window // 2 @@ -1109,12 +1739,14 @@ def plot_coverage_track_pair_bins( y_wt_curr = np.array(np.copy(y_wt), dtype=np.float32) y_mut_curr = np.array(np.copy(y_mut), dtype=np.float32) - if rescale_tracks: - # undo scale + #Undo transformations + if untransform_old : + + #Undo scale y_wt_curr /= track_scale y_mut_curr /= track_scale - # undo soft_clip + #Undo clip-soft if clip_soft is not None: y_wt_curr_unclipped = (y_wt_curr - clip_soft) ** 2 + clip_soft y_mut_curr_unclipped = (y_mut_curr - clip_soft) ** 2 + clip_soft @@ -1125,13 +1757,35 @@ def plot_coverage_track_pair_bins( y_wt_curr[unclip_mask_wt] = y_wt_curr_unclipped[unclip_mask_wt] y_mut_curr[unclip_mask_mut] = y_mut_curr_unclipped[unclip_mask_mut] - # undo sqrt - y_wt_curr = y_wt_curr ** (1.0 / track_transform) - y_mut_curr = y_mut_curr ** (1.0 / track_transform) + #Undo sqrt + y_wt_curr = y_wt_curr ** (1. / track_transform) + y_mut_curr = y_mut_curr ** (1. / track_transform) + else : + + #Undo clip-soft + if clip_soft is not None: + y_wt_curr_unclipped = (y_wt_curr - clip_soft + 1) ** 2 + clip_soft - 1 + y_mut_curr_unclipped = (y_mut_curr - clip_soft + 1) ** 2 + clip_soft - 1 + + unclip_mask_wt = y_wt_curr > clip_soft + unclip_mask_mut = y_mut_curr > clip_soft + y_wt_curr[unclip_mask_wt] = y_wt_curr_unclipped[unclip_mask_wt] + y_mut_curr[unclip_mask_mut] = y_mut_curr_unclipped[unclip_mask_mut] + + #Undo sqrt + y_wt_curr = (y_wt_curr + 1) ** (1. / track_transform) - 1 + y_mut_curr = (y_mut_curr + 1) ** (1. / track_transform) - 1 + + #Undo scale + y_wt_curr /= track_scale + y_mut_curr /= track_scale + + #Average across replicate tracks y_wt_curr = np.mean(y_wt_curr[..., track_index], axis=(0, 1, 3)) y_mut_curr = np.mean(y_mut_curr[..., track_index], axis=(0, 1, 3)) + #Normalize reference/alternate coverage track counts if normalize_counts: wt_count = np.sum(y_wt_curr[normalize_start_bin:normalize_end_bin]) mut_count = np.sum(y_mut_curr[normalize_start_bin:normalize_end_bin]) @@ -1144,6 +1798,7 @@ def plot_coverage_track_pair_bins( y_wt_curr *= wt_count y_mut_curr *= wt_count + #Print aggregated exon coverage for target gene if gene_slice is not None: sum_wt = np.sum(y_wt_curr[gene_slice]) sum_mut = np.sum(y_mut_curr[gene_slice]) @@ -1154,13 +1809,15 @@ def plot_coverage_track_pair_bins( y_wt_curr = y_wt_curr[plot_start_bin:plot_end_bin] y_mut_curr = y_mut_curr[plot_start_bin:plot_end_bin] + #Apply log+1 or sqrt+1 transform if log_scale: - y_wt_curr = np.log2(y_wt_curr + 1.0) - y_mut_curr = np.log2(y_mut_curr + 1.0) + y_wt_curr = np.log2(y_wt_curr + 1.) + y_mut_curr = np.log2(y_mut_curr + 1.) elif sqrt_scale: - y_wt_curr = np.sqrt(y_wt_curr + 1.0) - y_mut_curr = np.sqrt(y_mut_curr + 1.0) + y_wt_curr = np.sqrt(y_wt_curr + 1.) + y_mut_curr = np.sqrt(y_mut_curr + 1.) + #Calculate global coverage peak max and print values max_y_wt = np.max(y_wt_curr) max_y_mut = np.max(y_mut_curr) @@ -1175,6 +1832,7 @@ def plot_coverage_track_pair_bins( f = plt.figure(figsize=(12, 2)) + #Plot coverage tracks as bins plt.bar( np.arange(plot_end_bin - plot_start_bin) + plot_start_bin, y_wt_curr, @@ -1184,6 +1842,7 @@ def plot_coverage_track_pair_bins( label="Ref", ) + #Plot variant coverage tracks if plot_mut: plt.bar( np.arange(plot_end_bin - plot_start_bin) + plot_start_bin, @@ -1196,17 +1855,18 @@ def plot_coverage_track_pair_bins( xtick_vals = [] - for pas_ix, anno_pos in enumerate(anno_poses): + #Annotate sites from a list of positions (draw as vertical lines) + for _, anno_pos in enumerate(anno_poses): - pas_bin = int((anno_pos - start) // 32) - 16 + anno_bin = int((anno_pos - start) // 32) - 16 - xtick_vals.append(pas_bin) + xtick_vals.append(anno_bin) - bin_end = pas_bin + 3 - 0.5 + bin_end = anno_bin + 3 - 0.5 bin_start = bin_end - 5 plt.axvline( - x=pas_bin, + x=anno_bin, color="cyan", linewidth=2, alpha=0.5, @@ -1214,6 +1874,7 @@ def plot_coverage_track_pair_bins( zorder=-1, ) + #Annotate variant position plt.scatter( [mut_bin], [0.075 * max_y], @@ -1229,6 +1890,7 @@ def plot_coverage_track_pair_bins( plt.xticks([], []) plt.yticks([], []) + #Annotate the plotted coordinates plt.xlabel( chrom + ":" @@ -1240,7 +1902,7 @@ def plot_coverage_track_pair_bins( + "bp window)", fontsize=8, ) - plt.ylabel("Signal (log)" if not rescale_tracks else "Signal", fontsize=8) + plt.ylabel("Signal", fontsize=8) plt.title("Track(s): " + str(track_name), fontsize=8) @@ -1248,9 +1910,10 @@ def plot_coverage_track_pair_bins( plt.tight_layout() + #Optionally save figures if save_figs: plt.savefig( - "borzoi_" + "borzoi" + save_suffix + "_track_" + str(track_index[0]) @@ -1261,7 +1924,7 @@ def plot_coverage_track_pair_bins( transparent=False, ) plt.savefig( - "borzoi_" + "borzoi" + save_suffix + "_track_" + str(track_index[0]) @@ -1275,17 +1938,18 @@ def plot_coverage_track_pair_bins( # Helper functions (measured RNA-seq coverage loader) - +#Function that opens coverage files and returns read and close functions def get_coverage_reader( cov_files, target_length, crop_length, blacklist_bed, blacklist_pct=0.5 ): - # open genome coverage files + #Open genome coverage files cov_opens = [CovFace(cov_file) for cov_file in cov_files] - # read blacklist regions + #Read blacklist regions black_chr_trees = read_blacklist(blacklist_bed) + #Function to read coverage def _read_coverage( chrom, start, @@ -1298,16 +1962,17 @@ def _read_coverage( target_length=target_length, crop_length=crop_length, black_chr_trees=black_chr_trees, + transform_old=False, ): n_targets = len(cov_opens) targets = [] - # for each targets + #Loop over targets for target_i in range(n_targets): - # extract sequence as BED style + #Extract sequence as BED style if start < 0: seq_cov_nt = np.concatenate( [np.zeros(-start), cov_opens[target_i].read(chrom, 0, end)], axis=0 @@ -1315,26 +1980,27 @@ def _read_coverage( else: seq_cov_nt = cov_opens[target_i].read(chrom, start, end) # start - 1 - # extend to full length + #Extend to full length if seq_cov_nt.shape[0] < end - start: seq_cov_nt = np.concatenate( [seq_cov_nt, np.zeros((end - start) - seq_cov_nt.shape[0])], axis=0 ) - # read coverage + #Read coverage seq_cov_nt = cov_opens[target_i].read(chrom, start, end) - # determine baseline coverage + #Determine baseline coverage if target_length >= 8: baseline_cov = np.percentile(seq_cov_nt, 100 * blacklist_pct) baseline_cov = np.nan_to_num(baseline_cov) else: baseline_cov = 0 - # set blacklist to baseline + #Set blacklist to baseline if chrom in black_chr_trees: for black_interval in black_chr_trees[chrom][start:end]: - # adjust for sequence indexes + + #Adjust for sequence indexes black_seq_start = black_interval.begin - start black_seq_end = black_interval.end - start black_seq_values = seq_cov_nt[black_seq_start:black_seq_end] @@ -1342,59 +2008,81 @@ def _read_coverage( black_seq_values, -baseline_cov, baseline_cov ) - # set NaN's to baseline + #Set NaN's to baseline nan_mask = np.isnan(seq_cov_nt) seq_cov_nt[nan_mask] = baseline_cov - # sum pool - seq_cov = ( - seq_cov_nt.reshape(target_length, -1).sum(axis=1, dtype="float32") - ** 0.75 - ) + #Apply original transform (from borzoi manuscript) + if transform_old: + + #Sum pool + seq_cov = ( + seq_cov_nt.reshape(target_length, -1).sum(axis=1, dtype="float32") + ** 0.75 + ) - # crop - seq_cov = seq_cov[crop_length:-crop_length] + #Crop + if crop_length > 0 : + seq_cov = seq_cov[crop_length:-crop_length] - # clip - if clip_soft is not None: - clip_mask = seq_cov > clip_soft - seq_cov[clip_mask] = clip_soft + np.sqrt(seq_cov[clip_mask] - clip_soft) - if clip is not None: - seq_cov = np.clip(seq_cov, -clip, clip) + #Clip + if clip_soft is not None: + clip_mask = seq_cov > clip_soft + seq_cov[clip_mask] = clip_soft + np.sqrt(seq_cov[clip_mask] - clip_soft) + if clip is not None: + seq_cov = np.clip(seq_cov, -clip, clip) - # scale - seq_cov = scale * seq_cov + #Scale + seq_cov = scale * seq_cov + else: + + #Scale + seq_cov_nt = scale * seq_cov_nt - # clip float16 min/max + #Sum pool + seq_cov = -1 + np.sqrt( + 1 + seq_cov_nt.reshape(target_length, -1).sum(axis=1, dtype="float32") + ) + + #Clip + if clip_soft is not None: + clip_mask = seq_cov > clip_soft + seq_cov[clip_mask] = clip_soft - 1 + np.sqrt(seq_cov[clip_mask] - clip_soft + 1) + if clip is not None: + seq_cov = np.clip(seq_cov, -clip, clip) + + #Clip float16 min/max seq_cov = np.clip( seq_cov, np.finfo(np.float16).min, np.finfo(np.float16).max ) - # append to targets + #Append to targets targets.append(seq_cov.astype("float16")[:, None]) return np.concatenate(targets, axis=-1) + #Function to close coverage files def _close_coverage(cov_opens=cov_opens): - # close genome coverage files + #Loop over coverage files and close them for cov_open in cov_opens: cov_open.close() return _read_coverage, _close_coverage - +#Function to read genome blacklist coordinates and construct interval trees def read_blacklist(blacklist_bed, black_buffer=20): - """Construct interval trees of blacklist - regions for each chromosome.""" black_chr_trees = {} if blacklist_bed is not None and os.path.isfile(blacklist_bed): + + #Loop over blacklist for line in open(blacklist_bed): a = line.split() chrm = a[0] start = max(0, int(a[1]) - black_buffer) end = int(a[2]) + black_buffer + #Initialize new interval tree for chromosome if chrm not in black_chr_trees: black_chr_trees[chrm] = intervaltree.IntervalTree() @@ -1402,13 +2090,14 @@ def read_blacklist(blacklist_bed, black_buffer=20): return black_chr_trees - +#Coverage reader interface class CovFace: def __init__(self, cov_file): self.cov_file = cov_file self.bigwig = False self.bed = False + #Parse coverage file type and open the file cov_ext = os.path.splitext(self.cov_file)[1].lower() if cov_ext == ".gz": cov_ext = os.path.splitext(self.cov_file[:-3])[1].lower() @@ -1431,32 +2120,36 @@ def __init__(self, cov_file): ) exit(1) + #Function to read bed file with coordinates def preprocess_bed(self): - # read BED + #Read bed bed_df = pd.read_csv( self.cov_file, sep="\t", usecols=range(3), names=["chr", "start", "end"] ) - # for each chromosome + #Loop over chromosomes self.cov_open = {} for chrm in bed_df.chr.unique(): bed_chr_df = bed_df[bed_df.chr == chrm] - # find max pos + #Find max pos pos_max = bed_chr_df.end.max() - # initialize array + #Initialize array self.cov_open[chrm] = np.zeros(pos_max, dtype="bool") - # set peaks + #Set peaks for peak in bed_chr_df.itertuples(): self.cov_open[peak.chr][peak.start : peak.end] = 1 + #Function to read coverage values def read(self, chrm, start, end): + #Read from bigwig if self.bigwig: cov = self.cov_open.values(chrm, start, end, numpy=True).astype("float16") else: + #Read from non-bigwig source if chrm in self.cov_open: cov = self.cov_open[chrm][start:end] pad_zeros = end - start - len(cov) @@ -1464,15 +2157,18 @@ def read(self, chrm, start, end): cov_pad = np.zeros(pad_zeros, dtype="bool") cov = np.concatenate([cov, cov_pad]) else: + #Error finding coordinates print( "WARNING: %s doesn't see %s:%d-%d. Setting to all zeros." % (self.cov_file, chrm, start, end), file=sys.stderr, ) + #Return zeros cov = np.zeros(end - start, dtype="float16") return cov + #Function to close coverage file handle def close(self): if not self.bed: self.cov_open.close() From 9b1d23af346a4f95f4487902091483a9ee184754 Mon Sep 17 00:00:00 2001 From: johli Date: Sun, 6 Oct 2024 20:34:04 -0700 Subject: [PATCH 28/32] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c89042a..e16d50e 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ The following directories contain *minimal* tutorials regarding model training, ### Example Notebooks The following notebooks contain example code for predicting and interpreting genetic variants. -[Notebook 1a: Interpret eQTL SNP (expression)](https://github.com/calico/borzoi/blob/main/examples/borzoi_example_eqtl_chr10_116952944_T_C.ipynb)
-[Notebook 1b: Interpret sQTL SNP (splicing)](https://github.com/calico/borzoi/blob/main/examples/borzoi_example_sqtl_chr9_135548708_G_C.ipynb)
-[Notebook 1c: Interpret paQTL SNP (polyadenylation)](https://github.com/calico/borzoi/blob/main/examples/borzoi_example_paqtl_chr1_236763042_A_G.ipynb)
+[Notebook 1a: Interpret eQTL SNP (expression)](https://github.com/calico/borzoi/blob/main/examples/borzoi_example_eqtl_chr10_116952944_T_C.ipynb) [(fancy)](https://github.com/calico/borzoi/blob/main/examples/borzoi_example_eqtl_chr10_116952944_T_C_fancy.ipynb)
+[Notebook 1b: Interpret paQTL SNP (polyadenylation)](https://github.com/calico/borzoi/blob/main/examples/borzoi_example_paqtl_chr1_236763042_A_G.ipynb) [(fancy)](https://github.com/calico/borzoi/blob/main/examples/borzoi_example_paqtl_chr1_236763042_A_G_fancy.ipynb)
+[Notebook 1c: Interpret sQTL SNP (splicing)](https://github.com/calico/borzoi/blob/main/examples/borzoi_example_sqtl_chr9_135548708_G_C.ipynb)
[Notebook 1d: Interpret ipaQTL SNP (splicing and polya)](https://github.com/calico/borzoi/blob/main/examples/borzoi_example_ipaqtl_chr10_116664061_G_A.ipynb)
From ac6dabf3f1c27f89d0e8f53b120d54efc0eb4d9f Mon Sep 17 00:00:00 2001 From: johli Date: Mon, 7 Oct 2024 08:59:30 -0700 Subject: [PATCH 29/32] Update README.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e16d50e..8dec173 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,10 @@ The training data for Borzoi can be downloaded from the following URL: [Borzoi Training Data](https://storage.googleapis.com/borzoi-paper/data/)
-*Note*: This data bucket is very large and thus set to "Requester Pays". +*Note*: This data bucket is very large (multiple TB) and thus set to "Requester Pays". To access the bucket, you must have a billable user project set up on the Google Cloud Platform (GCP) which is specified with the "-u" flag when issuing gsutil commands. For example, to list the contents of "gs://borzoi-paper/data", issue the following command: +```sh +gsutil -u ls gs://borzoi-paper/data +``` ### QTL Availability The curated e-/s-/pa-/ipaQTL benchmarking data can be downloaded from the following URLs: From ac341e4ff60c778311b2767e2c5640600005de5b Mon Sep 17 00:00:00 2001 From: johli Date: Mon, 7 Oct 2024 09:00:58 -0700 Subject: [PATCH 30/32] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8dec173..634c2e3 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ The training data for Borzoi can be downloaded from the following URL: [Borzoi Training Data](https://storage.googleapis.com/borzoi-paper/data/)
-*Note*: This data bucket is very large (multiple TB) and thus set to "Requester Pays". To access the bucket, you must have a billable user project set up on the Google Cloud Platform (GCP) which is specified with the "-u" flag when issuing gsutil commands. For example, to list the contents of "gs://borzoi-paper/data", issue the following command: +*Note*: This data bucket is large (multiple TB) and thus set to "Requester Pays". To access the bucket, you must have a billable user project set up on the Google Cloud Platform (GCP) and included with the "-u" flag when issuing gsutil commands. For example, to list the contents of "gs://borzoi-paper/data", issue this command: ```sh gsutil -u ls gs://borzoi-paper/data ``` From c0c6bed5aadc987a8c7dc7bea37c39e8cb9c2ed0 Mon Sep 17 00:00:00 2001 From: Johannes Linder Date: Mon, 7 Oct 2024 19:46:10 -0700 Subject: [PATCH 31/32] Cleaned env_vars shell script. --- README.md | 3 ++- env_vars.sh | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 634c2e3..c0eeacf 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ These repositories further depend on a number of python packages (which are auto A new conda environment can be created with `conda create -n borzoi_py310 python=3.10`.
Some of the scripts in this repository start multi-process jobs and require [slurm](https://slurm.schedmd.com/). -Finally, the code base relies on a number of environment variables. For convenience, these can be configured in the active conda environment with the 'env_vars.sh' script. First, open up 'env_vars.sh' in each repository folder and change the two lines of code at the top to your username and local path. Then, issue these commands: +Finally, the code base relies on a number of environment variables. For convenience, these can be configured in the active conda environment with the 'env_vars.sh' script. First, open up 'env_vars.sh' in each repository folder and change the few lines of code at the top to your local paths. Then, issue these commands: ```sh cd borzoi conda activate borzoi_py310 @@ -73,6 +73,7 @@ export PYTHONPATH=$WESTMINSTER_DIR/src/westminster/scripts:$PYTHONPATH export BORZOI_CONDA=/home//anaconda3/etc/profile.d/conda.sh export BORZOI_HG38=$BORZOI_DIR/examples/hg38 export BORZOI_MM10=$BORZOI_DIR/examples/mm10 +export BASKERVILLE_CONDA=$BORZOI_CONDA ``` *Note*: The *baskerville* and *westminster* variables are only required for data processing and model training. diff --git a/env_vars.sh b/env_vars.sh index 4719e52..d6a9890 100755 --- a/env_vars.sh +++ b/env_vars.sh @@ -2,7 +2,7 @@ # set these variables before running the script LOCAL_BORZOI_PATH="/home/jlinder/borzoi" -LOCAL_USER="jlinder" +LOCAL_CONDA_PATH="/home/jlinder/anaconda3/etc/profile.d/conda.sh" # create env_vars sh scripts in local conda env mkdir -p "$CONDA_PREFIX/etc/conda/activate.d" @@ -26,7 +26,7 @@ echo 'export PYTHONPATH=$BORZOI_DIR/src/scripts:$PYTHONPATH' >> $file_vars_act echo 'export BORZOI_HG38=$BORZOI_DIR/examples/hg38' >> $file_vars_act echo 'export BORZOI_MM10=$BORZOI_DIR/examples/mm10' >> $file_vars_act -echo "export BORZOI_CONDA=/home/$LOCAL_USER/anaconda3/etc/profile.d/conda.sh" >> $file_vars_act +echo "export BORZOI_CONDA=$LOCAL_CONDA_PATH" >> $file_vars_act # append env variable unsets to /deactivate.d/env_vars.sh echo 'unset BORZOI_DIR' >> $file_vars_deact From b900127f1da1f60c46dec9f51313d1339778ca84 Mon Sep 17 00:00:00 2001 From: johli Date: Tue, 8 Oct 2024 09:39:48 -0700 Subject: [PATCH 32/32] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c0eeacf..7b93f37 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ cd borzoi pip install -e . ``` -To train new models, the [westminster repository](https://github.com/calico/westminster.git) is also required and can be installed with these commands: +To train new models, the [westminster repository](https://github.com/calico/westminster.git) is also required and can be installed with these commands (*this repo is not yet available, but will be made public soon*): ```sh git clone https://github.com/calico/westminster.git cd westminster