-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
restructure project - files for data processing in cli
- Loading branch information
Showing
10 changed files
with
236 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#ifndef GENOGROVE_BEDFILEVALIDATOR_HPP | ||
#define GENOGROVE_BEDFILEVALIDATOR_HPP | ||
|
||
// Class | ||
#include "FileFormatValidator.hpp" | ||
|
||
// Standard | ||
#include <string> | ||
#include <sstream> | ||
#include <fstream> | ||
#include <algorithm> | ||
|
||
namespace genogrove { | ||
class BEDfileValidator : public FileFormatValidator { | ||
public: | ||
BEDfileValidator(); | ||
bool validate(std::string& filepath) override; | ||
std::string getErrorMessage() override; | ||
|
||
bool isValidLine(std::string& line, int lineNum); | ||
bool isValidInteger(std::string& str); | ||
|
||
private: | ||
std::string errorMessage; | ||
}; | ||
} | ||
|
||
#endif //GENOGROVE_BEDFILEVALIDATOR_HPP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#ifndef GENOGROVE_FILEFORMATVALIDATOR_HPP | ||
#define GENOGROVE_FILEFORMATVALIDATOR_HPP | ||
|
||
#include <string> | ||
|
||
namespace genogrove { | ||
class FileFormatValidator { | ||
public: | ||
virtual bool validate(std::string& filepath) = 0; | ||
virtual std::string getErrorMessage() = 0; | ||
virtual ~FileFormatValidator() = default; | ||
|
||
}; | ||
}; | ||
|
||
#endif //GENOGROVE_FILEFORMATVALIDATOR_HPP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#ifndef GENOGROVE_FILETYPEDETECTOR_HPP | ||
#define GENOGROVE_FILETYPEDETECTOR_HPP | ||
|
||
// Standard | ||
#include <string> | ||
#include <fstream> | ||
#include <iostream> | ||
#include <bitset> | ||
#include <unordered_map> | ||
#include <filesystem> | ||
|
||
namespace genogrove { | ||
enum class FileType { | ||
BED, | ||
BEDGRAPH, | ||
GFF, | ||
GTF, | ||
VCF, | ||
UNKNOWN | ||
}; | ||
|
||
class FileTypeDetector { | ||
public: | ||
std::tuple<FileType, std::bitset<1>> detectFileType(const std::filesystem::path& filepath); | ||
}; | ||
} | ||
|
||
|
||
#endif //GENOGROVE_FILETYPEDETECTOR_HPP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#ifndef GENOGROVE_FILEVALIDATOR_HPP | ||
#define GENOGROVE_FILEVALIDATOR_HPP | ||
|
||
// Standard | ||
#include <filesystem> | ||
|
||
// Class | ||
#include "FileFormatValidator.hpp" | ||
#include "FileTypeDetector.hpp" | ||
#include "BEDfileValidator.hpp" | ||
|
||
namespace genogrove { | ||
class FileValidatorFactory { | ||
public: | ||
static bool validate(const std::filesystem::path& filepath, | ||
FileType filetype, | ||
std::bitset<1> gzipped, | ||
std::string& errorMessage) { | ||
std::unique_ptr<FileFormatValidator> validator; | ||
switch(filetype) { | ||
case FileType::BED: | ||
validator = std::make_unique<BEDfileValidator>(); | ||
break; | ||
default: | ||
errorMessage = "Unsupported file type"; | ||
return false; | ||
} | ||
return true; | ||
} | ||
}; | ||
} | ||
|
||
#endif //GENOGROVE_FILEVALIDATOR_HPP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#include "BEDfileValidator.hpp" | ||
|
||
namespace genogrove { | ||
BEDfileValidator::BEDfileValidator() : errorMessage("") {} | ||
|
||
bool BEDfileValidator::validate(std::string& filepath) { | ||
std::ifstream infile(filepath); | ||
if(!infile.is_open()) { | ||
errorMessage = "Failed to open file: " + filepath; | ||
return false; | ||
} | ||
|
||
std::string line; | ||
int lineNum = 1; | ||
while(std::getline(infile, line)) { | ||
if(!isValidLine(line, lineNum)) { | ||
return false; | ||
} | ||
lineNum++; | ||
} | ||
return true; | ||
} | ||
|
||
std::string BEDfileValidator::getErrorMessage() { | ||
return errorMessage; | ||
} | ||
|
||
bool BEDfileValidator::isValidLine(std::string& line, int lineNum) { | ||
std::stringstream ss(line); | ||
std::string chrom, start, end; | ||
|
||
if(!(ss >> chrom >> start >> end)) { // check if first three columns are valid | ||
errorMessage = "Invalid line in file (line " + std::to_string(lineNum) + "): " + line; | ||
return false; | ||
} | ||
|
||
if(!isValidInteger(start) || !isValidInteger(end)) { // check if start and end are valid integers | ||
errorMessage = "Invalid start or end coordinate in file (line " + std::to_string(lineNum) + "): " + line; | ||
return false; | ||
} | ||
|
||
if(std::stoi(start) >= std::stoi(end)) { // check if start is less than or equal to end | ||
errorMessage = "Start coordinate is greater than end coordinate in file (line " + std::to_string(lineNum) + "): " + line; | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
bool BEDfileValidator::isValidInteger(std::string& str) { | ||
return std::all_of(str.begin(), str.end(), ::isdigit); | ||
} | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#include "FileTypeDetector.hpp" | ||
|
||
namespace genogrove { | ||
|
||
static const std::unordered_map<std::string, FileType> extensions = { | ||
{".bed", FileType::BED}, | ||
{".bedgraph", FileType::BEDGRAPH}, | ||
{".gff", FileType::GFF}, | ||
{".gtf", FileType::GTF}, | ||
{".vcf", FileType::VCF} | ||
}; | ||
|
||
std::tuple<FileType, std::bitset<1>> FileTypeDetector::detectFileType(const std::filesystem::path& filepath) { | ||
std::ifstream file(filepath, std::ios::binary); | ||
if(!file) { throw std::runtime_error("Failed to open file: " + filepath.string()); } | ||
|
||
// check if the file is gzipped | ||
char buffer[2]; | ||
file.read(buffer, 2); | ||
std::bitset<1> gzipped(0); | ||
if(file.gcount() == 2 && buffer[0] == 0x1f && buffer[1] == 0x8b) { gzipped = std::bitset<1>(1); } | ||
|
||
// extract the file extension | ||
std::string extension = filepath.extension().string(); | ||
if(gzipped == std::bitset<1>(1) && extension == ".gz") { | ||
extension = filepath.stem().extension().string(); | ||
} | ||
|
||
// look up the file type | ||
auto it = extensions.find(extension); | ||
FileType filetype = (it != extensions.end()) ? it->second : FileType::UNKNOWN; | ||
|
||
return std::make_tuple(filetype, gzipped); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.