Skip to content

Commit

Permalink
restructured File Detector and Parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
riasc committed Nov 18, 2024
1 parent a3034c1 commit 61a82eb
Show file tree
Hide file tree
Showing 11 changed files with 163 additions and 189 deletions.
12 changes: 9 additions & 3 deletions cli/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,20 @@ CPMAddPackage(
"CXXOPTS_BUILD_EXAMPLES OFF"
"CXXOPTS_BUILD_TESTS OFF"
)
CPMAddPackage(
NAME zlib
GITHUB_REPOSITORY madler/zlib
GIT_TAG v1.3.1
)

file(GLOB GG_LIB_SOURCES "../src/*.cpp")
file(GLOB GG_CLI_SOURCES "src/*.cpp")

add_executable(genogrove_cli ${GG_LIB_SOURCES} ${GG_CLI_SOURCES})
target_include_directories(genogrove_cli PUBLIC "${cxxopts_SOURCE_DIR}/include/")
target_include_directories(genogrove_cli PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include/")
target_link_libraries(genogrove_cli cxxopts)
target_include_directories(genogrove_cli PUBLIC "${cxxopts_SOURCE_DIR}/include/") # cxxopts header files
target_include_directories(genogrove_cli PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include/") # genogrove header files
target_include_directories(genogrove_cli PUBLIC include) # cli header files
target_link_libraries(genogrove_cli cxxopts zlibstatic)

# set the output directory for the executable
set_target_properties(genogrove_cli PROPERTIES
Expand Down
28 changes: 24 additions & 4 deletions cli/include/BEDReader.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
//
// Created by Richard Albin Schaefer on 11/17/24.
//

#ifndef GENOGROVE_BEDREADER_HPP
#define GENOGROVE_BEDREADER_HPP

// Standard
#include <string>
#include <filesystem>
#include <istream>
#include <algorithm>

// Class
#include "FileReader.hpp"
#include "genogrove/Interval.hpp"
#include "FileEntry.hpp"

class BEDReader : public FileReader {
public:
BEDReader(const std::filesystem::path&, bool gzipped);
bool readNext(FileEntry& entry) override;
bool hasNext() override;
std::string getErrorMessage() override;

private:
std::unique_ptr<std::istream> inputStream;
std::string errorMessage;
size_t lineNum;
};

#endif //GENOGROVE_BEDREADER_HPP
28 changes: 0 additions & 28 deletions cli/include/BEDfileValidator.hpp

This file was deleted.

16 changes: 0 additions & 16 deletions cli/include/FileFormatValidator.hpp

This file was deleted.

16 changes: 12 additions & 4 deletions cli/include/FileReader.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
//
// Created by Richard Albin Schaefer on 11/17/24.
//

#ifndef GENOGROVE_FILEREADER_HPP
#define GENOGROVE_FILEREADER_HPP

// Class
#include "genogrove/Interval.hpp"
#include "FileEntry.hpp"

class FileReader {
public:
virtual bool readNext(FileEntry& entry) = 0;
virtual bool hasNext() = 0;
virtual std::string getErrorMessage() = 0;
virtual ~FileReader() = default;
};

#endif //GENOGROVE_FILEREADER_HPP
28 changes: 24 additions & 4 deletions cli/include/FileReaderFactory.hpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
//
// Created by Richard Albin Schaefer on 11/17/24.
//

#ifndef GENOGROVE_FILEREADERFACTORY_HPP
#define GENOGROVE_FILEREADERFACTORY_HPP

// Standard
#include <filesystem>

// Class
#include "FileReader.hpp"
#include "BEDReader.hpp"
#include "FileTypeDetector.hpp"

class FileReaderFactory {
public:
static std::unique_ptr<FileReader> create(
const std::filesystem::path& filepath,
FileType filetype,
bool gzipped
) {
switch(filetype) {
case FileType::BED:
return std::make_unique<BEDReader>(filepath, gzipped);
default:
return nullptr;
}
}
};

#endif //GENOGROVE_FILEREADERFACTORY_HPP
26 changes: 12 additions & 14 deletions cli/include/FileTypeDetector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,19 @@
#include <unordered_map>
#include <filesystem>

namespace genogrove {
enum class FileType {
BED,
BEDGRAPH,
GFF,
GTF,
VCF,
UNKNOWN
};
enum class FileType {
BED,
BEDGRAPH,
GFF,
GTF,
VCF,
UNKNOWN
};

class FileTypeDetector {
public:
std::tuple<FileType, bool> detectFileType(const std::filesystem::path& filepath);
};
}
class FileTypeDetector {
public:
std::tuple<FileType, bool> detectFileType(const std::filesystem::path& filepath);
};


#endif //GENOGROVE_FILETYPEDETECTOR_HPP
33 changes: 0 additions & 33 deletions cli/include/FileValidatorFactory.hpp

This file was deleted.

62 changes: 59 additions & 3 deletions cli/src/BEDReader.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,59 @@
//
// Created by Richard Albin Schaefer on 11/17/24.
//
#include "BEDReader.hpp"


BEDReader::BEDReader(const std::filesystem::path& filepath, bool gzipped) {
lineNum = 1;
if(gzipped) {
// open file gzipped
} else {
// inputStream = std::make_unique<std::ifstream>(filepath);
}
}

bool BEDReader::readNext(FileEntry& entry) {
std::string line;
if(!std::getline(*inputStream, line)) {
errorMessage = "Failed to read line at line " + std::to_string(lineNum);
}

std::stringstream ss(line);
std::string chrom, start, end;

try {
if(!(ss >> chrom >> start >> end)) {
errorMessage = "Invalid line format at line " + std::to_string(lineNum);
return false;
}

// validate integers
if(!std::all_of(start.begin(), start.end(), ::isdigit) ||
!std::all_of(end.begin(), end.end(), ::isdigit)) {
errorMessage = "Invalid coordinate format at line " + std::to_string(lineNum);
return false;
}

// validate and create interval object
size_t startNum = std::stoul(start);
size_t endNum = std::stoul(end);
if(startNum >= endNum) {
errorMessage = "Start coordinate must be less than end coordinate at line " + std::to_string(lineNum);
return false;
}
FileEntry entry(chrom, genogrove::Interval{startNum, endNum}, '\0');
lineNum++;
return true;

} catch(std::exception& e) {
errorMessage = "Failed to parse line at line " + std::to_string(lineNum) + ": " + line;
return false;
}
}

bool BEDReader::hasNext() {
return inputStream && !inputStream->eof();
}

std::string BEDReader::getErrorMessage() {
return errorMessage;
}

54 changes: 0 additions & 54 deletions cli/src/BEDfileValidator.cpp

This file was deleted.

Loading

0 comments on commit 61a82eb

Please sign in to comment.