Skip to content

Commit

Permalink
Merge from main KMC repository
Browse files Browse the repository at this point in the history
  • Loading branch information
jnalanko committed Apr 21, 2023
2 parents f959ba5 + 25d29e6 commit dabf7b5
Show file tree
Hide file tree
Showing 141 changed files with 701 additions and 6,603 deletions.
13 changes: 10 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ jobs:
KMC_SINGLE_READ: ./tests/kmc_CLI/data/single_read.fq
DATA_DIR: ./tests/kmc_CLI/data/
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: recursive
- name: make
run: |
g++ -v
Expand Down Expand Up @@ -51,8 +53,13 @@ jobs:
runs-on: macOS-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
submodules: recursive
- uses: maxim-lobanov/setup-xcode@v1
with:
xcode-version: '14.1'
- name: make
run: make -j12 kmc



13 changes: 2 additions & 11 deletions .github/workflows/self-hosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ jobs:
run: |
chmod +x $SCRIPT
/usr/bin/time -v $SCRIPT $DATA_DIR
prepare-small-fastq-tests:
name: Prepare small fastq tests
runs-on: [self-hosted, kmc]
Expand All @@ -35,7 +34,6 @@ jobs:
run: |
chmod +x $SCRIPT
/usr/bin/time -v $SCRIPT $DATA_DIR $TRIVIAL_COUNTER $FILE_TO_GET_PART_FROM 400000
make-tests:
name: Make tests
runs-on: [self-hosted, kmc]
Expand All @@ -46,10 +44,11 @@ jobs:
KMC_TOOLS: ./bin/kmc_tools
steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- name: make (default)
run: |
/usr/bin/time -v make -j32
small-fastq-tests:
name: Small fastq tests
runs-on: [self-hosted, kmc]
Expand All @@ -66,11 +65,3 @@ jobs:
run: |
chmod +x $SCRIPT
/usr/bin/time -v $SCRIPT $DATA_DIR $KMC_EXE $KMC_TOOLS $KMC_DUMP_EXE







24 changes: 24 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,27 @@ bin/

# Editor backup filels
*~
/.vs/kmc/v17
/.vs/kmc/FileContentIndex
/.vs
/Debug
/kmc_CLI/Debug
/kmc_CLI/x64/Debug/kmc_CLI.tlog
/kmc_CLI/x64/Debug
/kmc_core/Debug
/kmc_core/x64/Debug
/kmc_dump/Debug/kmc_dump.tlog
/kmc_dump/Debug
/kmc_dump/x64/Debug
/kmc_dump_sample/Debug
/kmc_dump_sample/x64/Debug
/kmc_tools/Debug
/py_kmc_api/Debug
/py_kmc_api/x64/Debug
/x64/Debug
/kmc_CLI/x64/Release
/kmc_core/x64/Release
/kmc_dump/x64/Release
/kmc_tools/x64/Release
/py_kmc_api/x64/Release
/x64/Release
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "3rd_party/cloudflare"]
path = 3rd_party/cloudflare
url = https://github.com/refresh-bio/dependencies-zlib
1 change: 1 addition & 0 deletions 3rd_party/cloudflare
Submodule cloudflare added at 3cccc9
97 changes: 74 additions & 23 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
all: kmc kmc_dump kmc_tools py_kmc_api

UNAME_S := $(shell uname -s)
UNAME_M := $(shell uname -m)
UNAME_P := $(shell uname -p)

KMC_MAIN_DIR = kmc_core
KMC_CLI_DIR = kmc_CLI
Expand All @@ -14,20 +16,64 @@ OUT_INCLUDE_DIR = include

ifeq ($(UNAME_S),Darwin)
CC = g++

D_OS =
D_ARCH =

CFLAGS = -Wall -O3 -m64 -static-libgcc -static-libstdc++ -pthread -std=c++14
CLINK = -lm -static-libgcc -static-libstdc++ -O3 -pthread -std=c++14
ifeq ($(UNAME_S),Darwin)
D_OS=MACOS
ifeq ($(UNAME_M),arm64)
D_ARCH=ARM64
else
D_ARCH=X64
endif
else
D_OS=LINUX
D_ARCH=X64
ifeq ($(UNAME_M),arm64)
D_ARCH=ARM64
endif
ifeq ($(UNAME_M),aarch64)
D_ARCH=ARM64
endif
endif

PY_KMC_API_CFLAGS = -Wl,-undefined,dynamic_lookup -fPIC -Wall -shared -std=c++14 -O3
CPU_FLAGS =
STATIC_CFLAGS =
STATIC_LFLAGS =
PY_FLAGS =

ifeq ($(D_OS),MACOS)
CC = g++-11

ifeq ($(D_ARCH),ARM64)
CPU_FLAGS = -march=armv8.4-a
else
CPU_FLAGS = -m64
endif
STATIC_CFLAGS = -static-libgcc -static-libstdc++ -pthread
STATIC_LFLAGS = -static-libgcc -static-libstdc++ -pthread
PY_FLAGS = -Wl,-undefined,dynamic_lookup -fPIC
else
CC = g++

CFLAGS = -Wall -O3 -m64 -static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive -std=c++14
CLINK = -lm -static -O3 -Wl,--whole-archive -lpthread -Wl,--no-whole-archive -std=c++14

PY_KMC_API_CFLAGS = -fPIC -Wall -shared -std=c++14 -O3
ifeq ($(D_ARCH),ARM64)
CPU_FLAGS = -march=armv8-a
STATIC_CFLAGS =
STATIC_LFLAGS = -static-libgcc -static-libstdc++ -pthread
else
CPU_FLAGS = -m64
STATIC_CFLAGS = -static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive
STATIC_LFLAGS = -static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive
endif
PY_FLAGS = -fPIC
endif


CFLAGS = -Wall -O3 -fsigned-char $(CPU_FLAGS) $(STATIC_CFLAGS) -std=c++14
CLINK = -lm $(STATIC_LFLAGS) -O3 -std=c++14
PY_KMC_API_CFLAGS = $(PY_FLAGS) -Wall -shared -std=c++14 -O3

KMC_CLI_OBJS = \
$(KMC_CLI_DIR)/kmc.o

Expand All @@ -51,6 +97,10 @@ $(KMC_MAIN_DIR)/kb_collector.o \
$(KMC_MAIN_DIR)/kmc_runner.o

ifeq ($(UNAME_S),Darwin)
ifeq ($(D_ARCH),ARM64)
RADULS_OBJS = \
$(KMC_MAIN_DIR)/raduls_neon.o
else
RADULS_OBJS =

KMC_LIBS = \
Expand All @@ -68,18 +118,12 @@ else
$(KMC_MAIN_DIR)/raduls_sse41.o \
$(KMC_MAIN_DIR)/raduls_avx2.o \
$(KMC_MAIN_DIR)/raduls_avx.o

KMC_LIBS = \
$(KMC_MAIN_DIR)/libs/libz.a \
$(KMC_MAIN_DIR)/libs/libbz2.a

KMC_TOOLS_LIBS = \
$(KMC_TOOLS_DIR)/libs/libz.a \
$(KMC_TOOLS_DIR)/libs/libbz2.a

LIB_KMC_CORE = $(OUT_BIN_DIR)/libkmc_core.a
endif
endif

LIB_ZLIB=3rd_party/cloudflare/libz.a
LIB_KMC_CORE = $(OUT_BIN_DIR)/libkmc_core.a


KMC_DUMP_OBJS = \
$(KMC_DUMP_DIR)/nc_utils.o \
Expand All @@ -106,9 +150,11 @@ $(KMC_TOOLS_DIR)/fastq_writer.o \
$(KMC_TOOLS_DIR)/percent_progress.o \
$(KMC_TOOLS_DIR)/kff_info_reader.o

$(LIB_ZLIB):
cd 3rd_party/cloudflare; ./configure; make libz.a

$(KMC_CLI_OBJS) $(KMC_CORE_OBJS) $(KMC_DUMP_OBJS) $(KMC_API_OBJS) $(KFF_OBJS) $(KMC_TOOLS_OBJS): %.o: %.cpp
$(CC) $(CFLAGS) -c $< -o $@
$(CC) $(CFLAGS) -I 3rd_party/cloudflare -c $< -o $@

$(KMC_MAIN_DIR)/raduls_sse2.o: $(KMC_MAIN_DIR)/raduls_sse2.cpp
$(CC) $(CFLAGS) -msse2 -c $< -o $@
Expand All @@ -119,26 +165,30 @@ $(KMC_MAIN_DIR)/raduls_avx.o: $(KMC_MAIN_DIR)/raduls_avx.cpp
$(KMC_MAIN_DIR)/raduls_avx2.o: $(KMC_MAIN_DIR)/raduls_avx2.cpp
$(CC) $(CFLAGS) -mavx2 -c $< -o $@

$(KMC_MAIN_DIR)/raduls_neon.o: $(KMC_MAIN_DIR)/raduls_neon.cpp
$(CC) $(CFLAGS) -c $< -o $@


$(LIB_KMC_CORE): $(KMC_CORE_OBJS) $(RADULS_OBJS) $(KMC_API_OBJS) $(KFF_OBJS)
-mkdir -p $(OUT_INCLUDE_DIR)
cp $(KMC_MAIN_DIR)/kmc_runner.h $(OUT_INCLUDE_DIR)/kmc_runner.h
-mkdir -p $(OUT_BIN_DIR)
ar rcs $@ $^

kmc: $(KMC_CLI_OBJS) $(LIB_KMC_CORE)
kmc: $(KMC_CLI_OBJS) $(LIB_KMC_CORE) $(LIB_ZLIB)
-mkdir -p $(OUT_BIN_DIR)
$(CC) $(CLINK) -o $(OUT_BIN_DIR)/$@ $^ $(KMC_LIBS)
$(CC) $(CLINK) -o $(OUT_BIN_DIR)/$@ $^ $(LIB_ZLIB)

kmc_dump: $(KMC_DUMP_OBJS) $(KMC_API_OBJS)
-mkdir -p $(OUT_BIN_DIR)
$(CC) $(CLINK) -o $(OUT_BIN_DIR)/$@ $^

kmc_tools: $(KMC_TOOLS_OBJS) $(KMC_API_OBJS) $(KFF_OBJS)
kmc_tools: $(KMC_TOOLS_OBJS) $(KMC_API_OBJS) $(KFF_OBJS) $(LIB_ZLIB)
-mkdir -p $(OUT_BIN_DIR)
$(CC) $(CLINK) -o $(OUT_BIN_DIR)/$@ $^ $(KMC_TOOLS_LIBS)
$(CC) $(CLINK) -I 3rd_party/cloudflare -o $(OUT_BIN_DIR)/$@ $^ $(LIB_ZLIB)

$(PY_KMC_API_DIR)/%.o: $(KMC_API_DIR)/%.cpp
$(CC) -c -fPIC -Wall -O3 -m64 -std=c++14 $^ -o $@
$(CC) -c -fPIC -Wall -O3 $(CPU_FLAGS) -std=c++14 $^ -o $@

py_kmc_api: $(PY_KMC_API_OBJS) $(PY_KMC_API_OBJS)
-mkdir -p $(OUT_BIN_DIR)
Expand All @@ -157,3 +207,4 @@ clean:
-rm -f $(PY_KMC_API_DIR)/*.so
-rm -rf $(OUT_BIN_DIR)
-rm -rf $(OUT_INCLUDE_DIR)
cd 3rd_party/cloudflare; make clean;
16 changes: 10 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ KMC
=
[![GitHub downloads](https://img.shields.io/github/downloads/refresh-bio/kmc/total.svg?style=flag&label=GitHub%20downloads)](https://github.com/refresh-bio/KMC/releases)
[![Bioconda downloads](https://img.shields.io/conda/dn/bioconda/kmc.svg?style=flag&label=Bioconda%20downloads)](https://anaconda.org/bioconda/kmc)
[![GitHub Actions CI](../../actions/workflows/main.yml/badge.svg)](../../actions/workflows/main.yml)
[![Biocontainer downloads](https://img.shields.io/endpoint?url=https%3A%2F%2Fmmseqs.com%2Fbiocontainer.php%3Fcontainer%3Dkmc)](https://biocontainers.pro/tools/kmc)
[![GitHub Actions CI](../../actions/workflows/main.yml/badge.svg)](../../actions/workflows/main.yml) [![Join the chat at https://gitter.im/refresh-bio/KMC](https://badges.gitter.im/refresh-bio/KMC.svg)](https://gitter.im/refresh-bio/KMC?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

KMC is a disk-based program for counting k-mers from (possibly gzipped) FASTQ/FASTA files.
KMC is one of many projects developed by [REFRESH Bioinformatics Group](http://sun.aei.polsl.pl/REFRESH/).
Expand Down Expand Up @@ -32,11 +33,16 @@ Having the k-mers counted it is possible to dump KMC binary database to textual
```

Installation details
##### Compile from sources
```
git clone --recurse-submodules https://github.com/refresh-bio/kmc.git
cd kmc
make -j32
```
=
The following libraries come with KMC in a binary (64-bit compiled for x86 platform) form.
If your system needs other binary formats, you should put the following libraries in kmc_core/libs:
* libbzip2 - for support for bzip2-compressed input FASTQ/FASTA files (http://www.bzip.org/)
* zlib - for support for gzip-compressed input FASTQ/FASTA files (http://www.zlib.net/)
* zlib - for support for gzip-compressed input FASTQ/FASTA files

The following libraries come with KMC in a source coude form.
* pybind11 - used to create python wrapper of KMC API (https://github.com/pybind/pybind11)
Expand All @@ -46,7 +52,7 @@ If needed, you can also redefine maximal length of k-mer, which is 256 in the cu
Note: KMC is highly optimized and spends only as many bytes for k-mer (rounded up to 8) as
necessary, so using large values of MAX_K does not affect the KMC performance for short k-mers.

Some parts of KMC use C++14 features, so you need a compatible C++ compiler, e.g., gcc 4.9+ or clang 3.4+
Some parts of KMC use C++17 features, so you need a compatible C++ compiler

After that, you can run make to compile kmc and kmc_dump applications.

Expand Down Expand Up @@ -122,8 +128,6 @@ License
=
* KMC software distributed under GNU GPL 3 licence.

* libbzip2 is open-source (BSD-style license)

* gzip is free, open-source

* pybind11 (https://github.com/pybind/pybind11) is open-source (BDS-style license)
Expand Down
Loading

0 comments on commit dabf7b5

Please sign in to comment.