Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
grtheod committed Mar 16, 2020
1 parent bbf9dfe commit 97d7293
Show file tree
Hide file tree
Showing 121 changed files with 107,932 additions and 1 deletion.
18 changes: 18 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
.idea/*
cmake-build-*/*
cmake-build-debug/*
scripts/*.txt
scripts/*.out
scripts/*.cpp
scripts/tempRes/*.txt
scripts/tempRes2/*.txt
scripts/microbenchmarks/*.txt
scripts/benchmarks/*.txt
scripts/microbenchmarks/*.dat
scripts/benchmarks/*.dat
*.dat
*.pdf
*.jpg
*eps
resources/datasets/google-cluster-data/saber-debs-demo.data
resources/datasets/lrb/datafile3hours.dat
95 changes: 95 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
cmake_minimum_required(VERSION 3.6)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

project(LightSaber)

include_directories(src)
include_directories(test)

add_subdirectory(src)
add_subdirectory(test)

set(CMAKE_CXX_STANDARD 17)

SET(HEADERS
src/utils/Utils.h
test/benchmarks/queries/queries.h
test/benchmarks/queries/queryUtils.h
test/aggregation/windows/windowFunctions/aggregateFunctions.h
test/aggregation/windows/windowTypes/Windows.h
)
SET(HEADERS_DATA_STRUCTURES
)
# common source shared
SET(SRC_WINSUM
#WinSum/WinSumBase.cpp
#WinSum/WinSum_addlong.cpp
)
SET(SOURCES
${HEADERS}
#benchmarks/query.cpp
#${SRC_WINSUM}
)

# -- apply to all configurations
#SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -g -fcilkplus -Wall" )
#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include utils.h -fdiagnostics-color=auto -D_GLIBCXX_USE_CXX11_ABI=0")
#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include utils.h -fdiagnostics-color=auto")

#SET(CMAKE_CXX_FLAGS_DEBUG
# "${CMAKE_CXX_FLAGS_DEBUG} -std=c++17 -g -O0 -Wall -Wfatal-errors") #-DHAVE_NUMA
#SET( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++11 -g -fcilkplus -Wall -O2" )
#SET(CMAKE_CXX_FLAGS_RELEASE "-std=c++17 -g -Wall -O3") #-DHAVE_NUMA
#SET(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -std=c++11 -g -fcilkplus -Wall")
#if (CMAKE_BUILD_TYPE STREQUAL "Debug")
# add_definitions(-DCONFIG_KAGE_GLOBAL_DEBUG_LEVEL=30)
# # add_definitions(-D_GLIBCXX_DEBUG) # will fail compilation in folly
#endif ()
#if (CMAKE_BUILD_TYPE STREQUAL "Release")
# add_definitions(-DCONFIG_KAGE_GLOBAL_DEBUG_LEVEL=50)
# add_definitions(-DNDEBUG)
#endif ()
#if (CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
# add_definitions(-DCONFIG_KAGE_GLOBAL_DEBUG_LEVEL=20)
#endif ()

# SET( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lboost_date_time" )
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread")
#SET( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}" )
#SET( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -L/home/xzl/tmp/boost_1_61_0/stage/lib -static" )


# xzl: tbb will be in the default lib path
#FIND_LIBRARY(tbb NAMES libtbb.so)
#FIND_LIBRARY(tbb NAMES libtbb.so PATHS tbb/lib/intel64/gcc4.7)
#FIND_LIBRARY(tbb_malloc NAMES libtbbmalloc.so PATHS tbb/lib/intel64/gcc4.7)
#TARGET_LINK_LIBRARIES(test.bin boost_date_time boost_system boost_thread numa ${tbb}))


# Boost Library
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost 1.63.0 COMPONENTS filesystem regex program_options iostreams)


# VTune directory
set(VTUNE_DIR ${VTUNE_HOME}/vtune_amplifier)
#${VTUNE_HOME}/vtune_amplifier)#"/media/george/DATA/intel/vtune_amplifier")
include_directories(${VTUNE_DIR}/include)
#include_directories(/media/george/DATA/intel/vtune_amplifier/lib64)
#link_directories(/media/george/DATA/intel/vtune_amplifier/lib64)
#include_directories(/media/george/DATA/intel/vtune_amplifier/lib64/runtime)
#link_directories(/media/george/DATA/intel/vtune_amplifier/lib64/runtime)

# Locate GTest
#find_package(GTest REQUIRED)
#include_directories(${GTEST_INCLUDE_DIRS})
#target_link_libraries(runTests ${GTEST_LIBRARIES} pthread)

# Locate GBenchmark
#find_package(benchmark REQUIRED)
#include_directories(${benchmark_INCLUDE_DIRS})
#target_link_libraries(runTests ${benchmark_LIBRARIES} pthread)

94 changes: 94 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
FROM ubuntu:bionic

RUN apt update && \
apt upgrade -y && \
apt install -y \
aptitude \
autotools-dev \
binutils-dev \
bison \
build-essential \
ccache \
flex \
g++ \
git \
libboost-all-dev \
libbz2-dev \
libdouble-conversion-dev \
libevent-dev \
libffi-dev \
libgflags-dev \
libgoogle-glog-dev \
libgtest-dev \
libiberty-dev \
libicu-dev \
libjemalloc-dev \
liblz4-dev \
liblzma-dev \
libsnappy-dev \
libssl-dev \
libtbb-dev \
libxml2-dev \
make \
pkg-config \
python-dev \
zlib1g-dev \
wget

RUN cd && \
apt remove --purge --auto-remove cmake && \
version=3.16 && \
build=2 && \
mkdir ~/temp && \
cd ~/temp && \
wget https://cmake.org/files/v$version/cmake-$version.$build.tar.gz && \
tar -xzvf cmake-$version.$build.tar.gz && \
cd cmake-$version.$build/ && \
./bootstrap && \
make -j$(nproc) && \
make install

RUN cd /usr/src/gtest && \
cmake . && \
make && \
cp *.a /usr/lib && \
mkdir -p /usr/local/lib/gtest && \
ln -s /usr/lib/libgtest.a /usr/local/lib/gtest/libgtest.a && \
ln -s /usr/lib/libgtest_main.a /usr/local/lib/gtest/libgtest_main.a

RUN cd && \
git clone --depth 1 https://github.com/google/benchmark.git && \
cd benchmark && \
mkdir build && \
cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON && \
make -j$(nproc) && \
make install

RUN cd && \
git clone https://github.com/llvm/llvm-project.git && \
cd llvm-project && \
git checkout e3a94ba4a92 && \
mkdir build && \
cd build && \
cmake -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=ON -DCLANG_INCLUDE_DOCS=OFF -DCLANG_INCLUDE_TESTS=OFF \
-DCLANG_INSTALL_SCANBUILD=OFF -DCLANG_INSTALL_SCANVIEW=OFF -DCLANG_PLUGIN_SUPPORT=OFF \
-DLLVM_TARGETS_TO_BUILD=X86 -G "Unix Makefiles" ../llvm && \
make -j$(nproc) && \
make install

RUN ln -s /root/llvm-project/build/bin/clang++ /usr/lib/ccache/ && \
ln -s /root/llvm-project/build/bin/clang /usr/lib/ccache/


ENV LLVM_HOME=/root/llvm-project/build
ENV PATH=$LLVM_HOME/bin:$PATH
ENV LIBRARY_PATH=$LLVM_HOME/lib:$LIBRARY_PATH
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBRARY_PATH
ENV PATH=/usr/lib/ccache:$PATH

RUN cd && \
git clone https://github.com/lsds/LightSaber.git && \
cd LightSaber && \
./scripts/build.sh
File renamed without changes.
112 changes: 111 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,111 @@
# LightSaber
<div align="center">
<img src="https://github.com/lsds/LightSaber/blob/master/docs/images/logo.png" height="70%" width="70%"></img>
</div>

# LightSaber [![License](https://img.shields.io/github/license/lsds/LightSaber.svg?branch=master)](https://github.com/lsds/LightSaber/blob/master/LICENCE.md) [![Release](https://img.shields.io/github/v/release/lsds/LightSaber.svg?branch=master)](https://github.com/lsds/LightSaber/releases)

LightSaber is a stream processing engine that balances parallelism and incremental processing when
executing window aggregation queries on multi-core CPUs. LightSaber operates on in-order
streams of data and achieves up to an order of magnitude higher throughput compared to existing systems.

### Getting started

The `prepare-software.sh` script will guide you through the installation and compilation process of our system locally.

```
$ git clone https://github.com/lsds/LightSaber.git
$ cd LightSaber
$ ./scripts/prepare-software.sh
$ ./scripts/build.sh
```

Otherwise use the Dockerfile:
```
$ git clone https://github.com/lsds/LightSaber.git
$ cd LightSaber
$ docker build --tag="lightsaber" .
$ docker run -ti lightsaber
```

### Run unit tests
```
$ ./build/test/unit_tests/unit_tests_run
```

### Running a microbenchmark (e.g., Projection)
```
$ ./build/test/benchmarks/microbenchmarks/TestProjection
```

### Running an application benchmark with sample data
```
$ ./build/test/benchmarks/applications/cluster_monitoring
```
### How to cite LightSaber
* **[SIGMOD]** Georgios Theodorakis, Alexandros Koliousis, Peter R. Pietzuch, and Holger Pirk. LightSaber: Efficient Window Aggregation on Multi-core Processors, SIGMOD, 2020
```
@inproceedings{Theodorakis2020,
author = {Georgios Theodorakis and Alexandros Koliousis and Peter R. Pietzuch and Holger Pirk},
title = {{LightSaber: Efficient Window Aggregation on Multi-core Processors}},
booktitle = {Proceedings of the 2020 ACM SIGMOD International Conference on Management of Data},
series = {SIGMOD '20},
year = {2020},
publisher = {ACM},
address = {Portland, OR, USA},
}
```

#### Other related publications
* **[EDBT]** Georgios Theodorakis, Peter R. Pietzuch, and Holger Pirk. SlideSide: A fast Incremental Stream Processing Algorithm for Multiple Queries, EDBT, 2020
* **[ADMS]** Georgios Theodorakis, Alexandros Koliousis, Peter R. Pietzuch, and Holger Pirk. Hammer Slide: Work- and CPU-efficient Streaming Window Aggregation, ADMS, 2018
* **[SIGMOD]** Alexandros Koliousis, Matthias Weidlich, Raul Castro Fernandez, Alexander Wolf, Paolo Costa, and Peter Pietzuch. Saber: Window-Based Hybrid Stream Processing for Heterogeneous Architectures, SIGMOD, 2016


### The LightSaber engine
<div align="center">
<img src="https://github.com/lsds/LightSaber/blob/master/docs/images/architecture.png"></img>
</div>

#### LightSaber configuration

Variables in **SystemConf.h** configure the LightSaber runtime. Each of them also corresponds to a command-line argument available to all LightSaber applications:

###### --threads _N_

Sets the number of CPU worker threads (`WORKER_THREADS` variable). The default value is `1`. **CPU worker threads are pinned to physical cores**. The threads are pinned to core ids based on the underlying hardware (e.g., if there are multiple sockets with n cores each, the first n threads are pinned in the first socket and so on).

###### --slots _N_

Sets the number of intermediate query result slots (`SLOTS` variable). The default value is `256`.

###### --partial-windows _N_

Sets the maximum number of window fragments in a query task (`PARTIAL_WINDOWS` variable). The default value is `1024`.

###### --circular-size _N_

Sets the circular buffer size in bytes (`CIRCULAR_BUFFER_SIZE` variable). The default value is `4194304`, i.e. 4 MB.

###### --unbounded-size _N_

Sets the intermediate result buffer size in bytes (`UNBOUNDED_BUFFER_SIZE` variable). The default value is `524288`, i.e. 512 KB.

###### --hashtable-size _N_

Hash table size (in number of buckets): hash tables hold partial window aggregate results (`HASH_TABLE_SIZE` variable with the default value 512).

###### --throughput-monitor-interval _N_

Sets the query throughput matrix update interval, in msec (`THROUGHPUT_MONITOR_INTERVAL` variable). The default value is `1000` i.e. 1 sec.

###### --performance-monitor-interval _N_

Sets the performance monitor interval, in msec (`PERFORMANCE_MONITOR_INTERVAL` variable). The default value is `1000`, i.e. 1 sec. Controls how often LightSaber prints on standard output performance statistics such as throughput and latency.

###### --latency `true`|`false`

Determines whether LightSaber should measure task latency or not (`LATENCY_ON` variable). The default value is `false`.

###### To enable NUMA-aware scheduling

Set the HAVE_NUMA flag in the respective CMakeLists.txt (e.g., in test/benchmarks/applications/CMakeLists.txt) and recompile the code.
Loading

0 comments on commit 97d7293

Please sign in to comment.