-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcommon.h
191 lines (161 loc) · 6.28 KB
/
common.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#ifndef __COMMON
#define __COMMON
#include <stdio.h>
#include <htslib/sam.h>
#include <htslib/hts.h>
#include <zlib.h>
#include <stdbool.h>
#include "sonic/sonic.h"
#include <htslib/faidx.h>
//#define MAIN_DELETION_CLUSTER
#define INVERSION 'V'
#define INSERTION 'I'
#define DELETION 'D'
#define TANDEMDUP 'E'
#define NUMTFORWARD 'N'
#define NUMTREVERSE 'O'
#define INVDUP 'W'
#define INVDUPLEFT 'S'
#define INVDUPRIGHT 'T'
#define INTERDUP 'X'
#define INTERDUPLEFT 'H'
#define INTERDUPRIGHT 'M'
#define MEIFORWARD 'A'
#define MEIREVERSE 'B'
/* For split reads FFAB means reads are in Forward-Forward orientation and
* position of A is smaller than position of B */
#define FFAB 'C'
#define FFBA 'G'
#define FRAB 'J'
#define FRBA 'K'
#define RFAB 'L'
#define RFBA 'N'
#define LEFT 'L'
#define RIGHT 'R'
#define NONE 'N'
#define FORWARD 'F'
#define REVERSE 'R'
/* Exit Codes */
#define EXIT_SUCCESS 0
#define EXIT_COMMON 1
#define EXIT_MAXBAMS 2
#define EXIT_PARAM_ERROR 3
#define EXIT_EXTERNAL_PROG_ERROR 4
#define EXIT_FILE_OPEN_ERROR 5
#define EXIT_READGROUP 6
#define EXIT_SONIC 7
#define EXIT_BAM_INDEX 8
#define EXIT_ITERATOR_LOAD 9
#define EXIT_BAM_CLOSE 10
#define EXIT_WRONG_SONIC 11
/* Return Codes */
#define RETURN_SUCCESS 1
#define RETURN_ERROR 0
#define MAX_BAMS 100
#define MAXLISTBRKPOINTINTR 10000000
#define MAX_SAMPLES 100
/* Maximum filename length */
#define MAX_LENGTH 1024
/* MAPPING INFO */
#define RPUNMAPPED 0
#define RPCONC 1
#define RPDEL 2
#define RPINV 3
#define RPINS 4
#define RPTDUP 5
#define RPMEI 6
#define RPINTERCHR 7;
#define RPNUMT 8;
/* Running mode of tardis */
#define QUICK 0
#define SENSITIVE 1
extern int running_mode;
extern int ten_x_flag;
extern int output_hs_flag;
extern int debug_mode; /* boolean stand-in to work in debug mode - .name and .clusters are created */
extern int cluster_of_reads;
// Track memory usage
extern long long memUsage;
extern FILE *logFile; //Defined in tardis.c
typedef struct _params
{
char* ref_genome; /* path to reference genome - fasta */
char* reps; /* path to repeatmasker file - *rm.out */
char* dups; /* path to segmental duplications file - bed */
char* bam_files; /* paths to comma separated input BAM files as a single string before being tokenized */
char* bam_list_path; /* path to a file that lists BAM file paths in advance */
char** bam_file_list; /* the actual list that holds all bam file paths after tokenization */
char* gaps; /* path to assembly gaps file - bed */
char* mei; /* regular expression-like MEI list */
char* outprefix; /* prefix for the output files */
char* outdir; /* output directory */
int force_read_length; /* force read length to a certain value, discard those that are shorter. Hidden feature due to GIAB */
char no_soft_clip; /* boolean stand-in to skip soft clip */
int alt_mapping; /* check the alternative mapping locations from the xa field in bwa */
int seq_resolved; /* whether to output sequence resolved calls */
int interdup; /* whether to cluster interspersed duplications */
int no_mei; /* whether to cluster mobile element insertions */
//char skip_mrfast; /* boolean stand-in to skip mrFast mapping (If you already have the divet file created) */
//int threads; /* number of threads to use for parallel mrFAST, and maybe future parallelization of TARDIS */
int num_bams; /* number of input BAM files */
int num_samples; /* number of samples */
char **sample_names; /* sample names -- maybe first count through a linked list, then convert to this array */
int size_samples_array; /* current size of the samples array */
int quick; /* boolean stand-in to work in bam-only mode (no divet) */
//int sensitive; /* boolean stand-in to work in sensitive mode (divet) */
int ten_x; /*boolean for whether we're using 10x data*/
int output_hs; /*boolean for whether to record the homogeneity score (HS) in VCF regardless whether HS is used in set cover or not*/
int make_sonic; /*make SONIC file and exit*/
int load_sonic; /*load SONIC file*/
char *sonic_info; /* SONIC reference information string for building */
int first_chr; /*the first chromosome as indexed in sonic. 0 by default*/
int last_chr; /*the last chromosome as indexed in sonic. chrom_count by default*/
int mq_threshold; /* Minimum mapping quality */
int rp_threshold; /* Minimum read pair support */
int number_of_different_mei_types; /* Number of distinct MEI types e.g. ALU:L1:SVA has three different types */
int cluster_of_read; /* Number of clusters that a read can be involved. 10 by default */
char *sonic_file; /* SONIC file name */
sonic *this_sonic; /* SONIC */
char *ref_seq; /* reference sequence per chromosome */
int hash_size; /* size of the hash table for split read mapping */
char histogram_only; /* boolean for generating histogram only */
} parameters;
/* Parameter related TARDIS functions */
void init_params( parameters**);
void print_params( parameters*);
/* FILE opening and error printing functions. For opening regular and BAM/SAM
files safely */
void print_error( char*);
FILE* safe_fopen( char* path, char* mode);
gzFile safe_fopen_gz( char* path, char* mode);
htsFile* safe_hts_open( char* path, char* mode);
/* General BAM processing functions */
int is_proper( int flag);
int is_concordant( bam1_core_t bam_alignment_core, int min, int max);
char base_as_char( int base_as_int);
char complement_char( char base);
void qual_to_ascii( char* qual);
/* String functions */
void set_str( char **target, char *source); /* Even safer than strncpy */
void reverse_string( char* str);
int is_dna_letter( char base);
/* Misc. Utility */
int compare_size_int( const void* p, const void* q);
void print_quote( void);
int find_chr_index_bam( char* chromosome_name, bam_hdr_t* bam_header);
int max( int x, int y);
int min( int x, int y);
int hammingDistance( char *str1, char *str2, int len);
int vh_cmprReadNameStr (const void *a, const void *b);
int32_t calculateInsertSize( int32_t pos_left, int32_t pos_right,uint16_t flag, int read_length);
// Memory allocation/tracking functions
void* getMem( size_t size);
void* reallocMem( void* ptr, size_t old_size, size_t new_size);
void freeMem( void* ptr, size_t size);
double getMemUsage();
unsigned long encode_ten_x_barcode(char* source);
void get_working_directory(parameters *);
void clean_up_temp_files(parameters *);
char *get_file_name(char *);
int compare_sonic_ref(parameters *);
#endif