-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlps.h
273 lines (250 loc) · 10.5 KB
/
lps.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
/**
* @file lps.h
* @brief Defines the lps struct and its associated methods for handling locally consistent
* parsing (LCP) of strings.
*
* The lps struct is responsible for performing LCP operations on input strings, constructing
* cores, and supporting functionalities like parsing, compression (deepening), and memory
* usage calculations. It includes methods for reading and writing the data to files as well
* as deepening the LCP to higher levels of compression.
*
* The lps struct leverages various helper classes like core, encoding, and hash to manage the
* string data and its compressed forms. Additionally, it supports both standard and
* reverse-complement parsing for specialized string handling in bioinformatics and other fields.
*
* Key functionalities include:
* - Parsing an input string or file to extract LCP cores.
* - Performing multi-level compression of LCP cores (deepening).
* - Saving and loading LCP cores from files.
* - Calculating memory usage of the constructed LCP structure.
*
* Dependencies:
* - Requires core.h, encoding.h, hash.h, and constant.h for auxiliary data structures and utilities.
*
* Example usage:
* @code
* std::string sequence = "AGCTAGCTAG";
* lcp::lps parser(sequence);
* parser.deepen();
* parser.write("output.lps");
* @endcode
*
* @see core.h
* @see encoding.h
* @see hash.h
* @see constant.h
*
* @namespace lcp
* @struct lps
*
* @note Destructor handles clean-up of allocated memory for cores.
*
* @author Akmuhammet Ashyralyyev
* @version 1.0
* @date 2024-09-14
*
*/
#ifndef LPS_H
#define LPS_H
#ifdef __cplusplus
extern "C" {
#endif
#include "core.h"
#include "encoding.h"
#include <stdio.h>
#include <math.h>
#define CONSTANT_FACTOR 1.5
struct lps {
int level;
int size;
struct core *cores;
};
/**
* @brief Reverses a string and stores the result in a dynamically allocated buffer.
*
* @param str The input string to reverse (must be valid for the given length).
* @param len The length of the input string (excluding null terminator, if any).
* @param rev Pointer to a char pointer where the reversed string will be stored.
* Memory is dynamically allocated and must be freed by the caller.
*
* @note The reversed string does not include a null terminator.
*/
void reverse(const char *str, int len, char **rev);
/**
* @brief Constructs an lps object from a string.
*
* @param lps_ptr The `lps` object that will be initialized
* @param str The input string to be parsed.
* @param len The length of the string to be parsed.
*/
void init_lps(struct lps *lps_ptr, const char *str, int len);
/**
* @brief Constructs an lps object from a string.
*
* @param lps_ptr The `lps` object that will be initialized
* @param str The input string to be parsed.
* @param len The length of the string to be parsed.
* @param offset The length of the offset in which each index will be shifted.
*/
void init_lps_offset(struct lps *lps_ptr, const char *str, int len, uint64_t offset);
/**
* @brief Constructs an lps object from a string, with reverse complement
* transformation.
*
* @param lps_ptr The `lps` object that will be initialized
* @param str The input string to be parsed.
* @param len The length of the string to be parsed.
*/
void init_lps2(struct lps *lps_ptr, const char *str, int len);
/**
* @brief Initializes an lps object by reading its contents from a binary file.
*
* This function reads the level and size of the `lps` object from the provided file,
* allocates memory for the cores array if necessary, and then reads each core's data.
* If any error occurs during reading, it will print an error message and terminate
* the program or return control, depending on error-handling strategy.
*
* @param lps_ptr The `lps` object that will be initialized
* @param in File pointer to the binary file containing the serialized lps data.
*
* @note The caller must ensure that the `FILE *in` is a valid and open binary file
* for reading. The function will allocate memory for `lps_ptr->cores` if
* `lps_ptr->size > 0`. The caller is responsible for freeing this memory later.
*/
void init_lps3(struct lps *lps_ptr, FILE *in);
/**
* @brief Constructs an lps object from a string, using split and merge paradigm.
* The give string will be roughly split into the length of `chunk_size`
* and processed independently. The final cores will be merged into a single array.
*
* @param lps_ptr The `lps` object that will be initialized
* @param str The input string to be parsed.
* @param len The length of the string to be parsed.
* @param chunk_size The length of the chunks to be processed.
*/
void init_lps4(struct lps *lps_ptr, const char *str, int len, int lcp_level, int chunk_size);
/**
* @brief Destructor for the lps object. Frees dynamically allocated memory for cores.
*/
void free_lps(struct lps *lps_ptr);
/**
* @brief Serializes and writes an lps object to a binary file.
*
* This function writes the level, size, and all core objects of the `lps` object
* to the specified binary file. Each core's data, including its bit representation,
* is written sequentially to the file. The resulting file can later be read to
* reconstruct the `lps` object.
*
* @param lps_ptr The `lps` object that will be initialized
* @param out File pointer to the binary file where the lps data will be written.
*
* @note The caller must ensure that the `FILE *out` is a valid and open binary file
* for writing. If the file cannot be written to, the function behavior is undefined.
*/
void write_lps(struct lps *lps_ptr, FILE *out);
/**
* @brief Parses a sequence to extract Locally Consisted Parsing (LCP) cores and stores them in a
* array of cores.
*
* This function iterates over a sequence defined by iterators `begin` and `end` and identifies key
* segments, called "cores," that represent the (LCP) regions. By analyzing
* character relationships in the sequence (such as equality or relative order), it builds and stores
* these cores for further processing in the LCP framework.
*
* @param begin Iterator pointing to the beginning of the sequence to parse.
* @param end Iterator pointing to the end of the sequence to parse.
* @param cores Pointer to a array where the identified LCP cores will be stored.
* @param offset The distance measure where the indecies of the core will be shifted by.
* @return Size of the cores identified in the given string.
*/
int parse1(const char *begin, const char *end, struct core *cores, uint64_t offset);
/**
* @brief Parses a sequence to extract Locally Consisted Parsing (LCP) cores and stores them in a
* array of cores using complement alphabet.
*
* This function iterates over a sequence defined by iterators `begin` and `end` and identifies key
* segments, called "cores," that represent the (LCP) regions. By analyzing
* character relationships in the sequence (such as equality or relative order based on complement),
* it builds and stores these cores for further processing in the LCP framework.
*
* @param begin Iterator pointing to the beginning of the sequence to parse.
* @param end Iterator pointing to the end of the sequence to parse.
* @param cores Pointer to a array where the identified LCP cores will be stored.
* @param offset The distance measure where the indecies of the core will be shifted by.
* @return Size of the cores identified in the given string.
*/
int parse2(const char *begin, const char *end, struct core *cores, uint64_t offset);
/**
* @brief Parses a array of cores to extract Locally Consisted Parsing (LCP) cores and stores them in a
* array of cores.
*
* This function iterates over a array of `core` structures defined by iterators `begin` and `end` and
* identifies key segments, called "cores," that represent the (LCP) regions. By analyzing
* `core` structure relationships in the array (such as equality or relative order), it builds and stores
* these cores for further processing in the LCP framework.
*
* @param begin Iterator pointing to the beginning of the `core` array to parse.
* @param end Iterator pointing to the end of the `core` array to parse.
* @param cores Pointer to a array where the identified LCP cores will be stored.
* @return Size of the cores identified in the given string.
*/
int parse3(struct core *begin, struct core *end, struct core *cores);
/**
* @brief Calculates and returns the memory size used by the `lps` structure.
*
* @return The memory size (in bytes) used by the `lps` structure.
*/
int64_t lps_memsize(const struct lps *lps_ptr);
/**
* @brief Deepens the compression level of the LCP structure. This method compresses the
* existing cores and finds new cores.
*
* @param lps_ptr The `lps` object that will be parsed over.
* @return 1 if successful in deepening the structure, 0 otherwise.
*/
int lps_deepen1(struct lps *lps_ptr);
/**
* @brief Deepens the compression level of the LCP structure to a specific level.
*
* @param lps_ptr The `lps` object that will be parsed over.
* @param lcp_level The target compression level to deepen to.
* @return 1 if deepening was successful, 0 otherwise.
*/
int lps_deepen(struct lps *lps_ptr, int lcp_level);
/**
* @brief Outputs the representation of a `lcp` pointer.
*
* This function iterates over the cores in the `cores` array and outputs them.
* It prints each cores as bit representation, after initially printing the LCP level.
*
* @param lps_ptr A pointer to the `lps` object to be output.
*/
void print_lps(const struct lps *lps_ptr);
/**
* @brief Equality operator for comparing two lcp::lps objects.
*
* This function compares the sizes of the core arrays of both objects, and then
* compares each core element by dereferencing the core pointers. If all elements match,
* the function returns true; otherwise, it returns false.
*
* @param lhs The left-hand side lps object to compare.
* @param rhs The right-hand side lps object to compare.
* @return 1 if both lps objects are equal, 0 otherwise.
*/
int lps_eq(const struct lps *lhs, const struct lps *rhs);
/**
* @brief Inequality operator for comparing two lcp::lps objects.
*
* This function first checks if the sizes of the core arrays are different.
* If they are, the objects are not equal. It then checks each core element.
* If any element is different, the function returns true; otherwise, it returns false.
*
* @param lhs The left-hand side lps object to compare.
* @param rhs The right-hand side lps object to compare.
* @return 1 if the lps objects are not equal, 0 otherwise.
*/
int lps_neq(const struct lps *lhs, const struct lps *rhs);
#ifdef __cplusplus
}
#endif
#endif