diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d60eac..d4c6fae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ PROJECT(rmilter C) SET(RMILTER_VERSION_MAJOR 1) -SET(RMILTER_VERSION_MINOR 9) +SET(RMILTER_VERSION_MINOR 10) SET(RMILTER_VERSION_PATCH 0) IF(GIT_ID) @@ -39,7 +39,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0 FATAL_ERROR) OPTION(ENABLE_DEBUG "Enable debug output [default: OFF]" OFF) OPTION(ENABLE_OPTIMIZATION "Enable optimization [default: OFF]" OFF) OPTION(ENABLE_DKIM "Enable dkim signatures [default: ON]" ON) -OPTION(ENABLE_MEMCACHED "Enable memcached storage [default: ON]" ON) +OPTION(ENABLE_MEMCACHED "Enable memcached storage [default: OFF]" OFF) OPTION(WANT_SYSTEMD_UNITS "Install systemd unit files on Linux [default: OFF]" OFF) ############################# INCLUDE SECTION ############################################# @@ -330,7 +330,6 @@ CHECK_INCLUDE_FILES(inttypes.h HAVE_INTTYPES_H) CHECK_INCLUDE_FILES(endian.h HAVE_ENDIAN_H) CHECK_INCLUDE_FILES(sys/endian.h HAVE_SYS_ENDIAN_H) CHECK_INCLUDE_FILES(machine/endian.h HAVE_MACHINE_ENDIAN_H) -CHECK_INCLUDE_FILES(sys/queue.h HAVE_SYS_QUEUE_H) CHECK_INCLUDE_FILES(sys/socket.h HAVE_SYS_SOCKET_H) CHECK_INCLUDE_FILES(sys/mman.h HAVE_SYS_MMAN_H) CHECK_INCLUDE_FILES(sys/un.h HAVE_SYS_UN_H) @@ -353,7 +352,7 @@ IF(HAVE_SYS_SENDFILE_H) CHECK_SYMBOL_EXISTS(sendfile sys/sendfile.h HAVE_SENDFILE) ELSE() # BSD version - CHECK_SYMBOL_EXISTS(sendfile sys/types.h;sys/socket.h;sys/uio.h HAVE_SENDFILE) + CHECK_SYMBOL_EXISTS(sendfile "sys/types.h;sys/socket.h;sys/uio.h" HAVE_SENDFILE) ENDIF() CHECK_SYMBOL_EXISTS(mkstemp unistd.h HAVE_MKSTEMP) CHECK_SYMBOL_EXISTS(PATH_MAX limits.h HAVE_PATH_MAX) @@ -364,10 +363,10 @@ CHECK_SYMBOL_EXISTS(MAP_NOCORE sys/mman.h HAVE_MMAP_NOCORE) ProcessPackage(OPENSSL LIBRARY crypto INCLUDE err.h INCLUDE_SUFFIXES include/openssl ROOT ${OPENSSL_ROOT_DIR} MODULES openssl) -ProcessPackage(PCRE LIBRARY pcre INCLUDE pcre.h INCLUDE_SUFFIXES include/pcre - ROOT ${PCRE_ROOT_DIR} MODULES pcre libpcre pcre3 libpcre3) ProcessPackage(LIBMILTER LIBRARY milter INCLUDE mfapi.h INCLUDE_SUFFIXES include/libmilter LIB_SUFFIXES lib/libmilter ROOT ${LIBMILTER_ROOT_DIR} MODULES libmilter) +ProcessPackage(PCRE LIBRARY pcre INCLUDE pcre.h INCLUDE_SUFFIXES include/pcre + ROOT ${PCRE_ROOT_DIR} MODULES pcre libpcre pcre3 libpcre3) ProcessPackage(GLIB2 LIBRARY glib-2.0 INCLUDE glib.h INCLUDE_SUFFIXES include/glib ROOT ${GLIB_ROOT_DIR} MODULES glib-2.0>=2.28) @@ -391,16 +390,19 @@ ENDIF() ################################ SOURCES SECTION ########################### ADD_SUBDIRECTORY(hiredis) INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/hiredis") +INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/contrib/lc-btrie") +INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/") ADD_SUBDIRECTORY(contrib/libucl) ADD_SUBDIRECTORY(contrib/http-parser) ADD_SUBDIRECTORY(contrib/xxhash) +ADD_SUBDIRECTORY(contrib/lc-btrie) +ADD_SUBDIRECTORY(contrib/zstd) SET(RMILTERSRC compat/blake2b-ref.c src/upstream.c src/util.c src/radix.c - src/regexp.c src/cache.c ${CMAKE_BINARY_DIR}/cfg_lex.c ${CMAKE_BINARY_DIR}/cfg_yacc.c @@ -423,6 +425,8 @@ TARGET_LINK_LIBRARIES(rmilter rmilter-hiredis) TARGET_LINK_LIBRARIES(rmilter ucl) TARGET_LINK_LIBRARIES(rmilter xxhash) TARGET_LINK_LIBRARIES(rmilter rmilter-http-parser) +TARGET_LINK_LIBRARIES(rmilter lcbtrie) +TARGET_LINK_LIBRARIES(rmilter rmilter-zstd) ##################### INSTALLATION ########################################## @@ -437,5 +441,4 @@ ADD_CUSTOM_TARGET(dist ${CMAKE_SOURCE_DIR}/dist.sh # systemd unit IF(WANT_SYSTEMD_UNITS MATCHES "ON") INSTALL(FILES "rmilter.service" DESTINATION ${SYSTEMDDIR}) - INSTALL(FILES "rmilter.socket" DESTINATION ${SYSTEMDDIR}) ENDIF() diff --git a/ChangeLog b/ChangeLog index ae5a7ec..4a05a26 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +1.9.2: + * [Feature] Allow adding/overriding options for rmilter + * [Feature] Allow to disable dkim signing completely + * [Fix] Allow empty sections in the config + * [Fix] Do not fold dkim signature received from Rspamd + * [Fix] Fix adding headers from rspamd + * [Fix] Fix adding of IP addresses to the lists + * [Fix] Treat connections from unix socket as local connections + * [Fix] Unbroke 'spamd_greylist = no' config option + * [Rework] Process all flag config file options in same way + * [Rework] Remove old and deprecated rules + * [Rework] Use lc-btrie code for radix trie + +1.9.1: + * [Feature] Allow to disable greylisting and ratelimit globally + * [Feature] Optimize sending data to rspamd + * [Fix] Avoid using of LTS as it seems to be broken on FreeBSD + * [Fix] Fix detection of sendfile on FreeBSD + * [Fix] Fix probability setup + * [Fix] Fix sdsnewlen usage + * [Fix] Fix usage of magic constants + * [Rework] Remove systemd socket activation + 1.9.0: * [CritFix] Fix spam scan * [Feature] Add .try_include macro diff --git a/centos/80-rmilter.preset b/centos/80-rmilter.preset new file mode 100644 index 0000000..bf45b50 --- /dev/null +++ b/centos/80-rmilter.preset @@ -0,0 +1 @@ +enable rmilter.service diff --git a/centos/rmilter.conf b/centos/rmilter.conf index b631ade..74d3b9c 100644 --- a/centos/rmilter.conf +++ b/centos/rmilter.conf @@ -2,5 +2,4 @@ .include /etc/rmilter/rmilter.conf.common -# rmilter is socket-activated under systemd -bind_socket = fd:3; +bind_socket = unix:/var/run/rmilter/rmilter.sock; diff --git a/centos/rmilter.conf.common b/centos/rmilter.conf.common index 5c7de46..4e5603a 100644 --- a/centos/rmilter.conf.common +++ b/centos/rmilter.conf.common @@ -206,29 +206,6 @@ limits { limit_to_ip_from = 100:0.033333333; }; -beanstalk { - # List of beanstalk servers, random selected - #servers = bot01.example.com:3132; - - # Address of server to which rmilter should send all messages copies - #copy_server = somehost:13333; - - # Address of server to which rmilter should send spam messages copies - #spam_server = otherhost:13333; - - # Beanstalk protocol - protocol = tcp; - # Time to live for task in seconds - lifetime = 172800; - # Regexp that define for which messages we should put the whole message to beanstalk - # now only In-Reply-To headers are checked - id_regexp = "/^SomeID.*$/"; - # Flags for sending beanstalk copies - send_beanstalk_headers = yes; - send_beanstalk_copy = yes; - send_beanstalk_spam = yes; -}; - greylisting { timeout = 300s; expire = 3d; @@ -236,10 +213,6 @@ greylisting { 192.168.1.1, 192.168.2.0/24, [::1]/128; - awl_enable = yes; - awl_pool = 10M; - awl_hits = 10; - awl_ttl = 3600s; }; dkim { diff --git a/centos/rmilter.spec b/centos/rmilter.spec index cdfaa82..939bb58 100644 --- a/centos/rmilter.spec +++ b/centos/rmilter.spec @@ -2,6 +2,10 @@ %define rmilter_group adm %define rmilter_home %{_localstatedir}/run/rmilter +%define has_systemd 1 +%{?el6:%undefine has_systemd} +%{?!suse_version:%define not_suse 1} + Name: rmilter Version: 1.7.0 Release: 1 @@ -17,14 +21,12 @@ License: BSD2c %endif URL: https://github.com/vstakhov/rmilter BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} -%if 0%{?suse_version} -BuildRequires: bison,flex -%else +%if 0%{?not_suse} BuildRequires: sendmail-milter %endif BuildRequires: sendmail-devel,openssl-devel,pcre-devel,glib2-devel -BuildRequires: cmake -%if 0%{?suse_version} || 0%{?el7} || 0%{?fedora} +BuildRequires: cmake,bison,flex,libopendkim-devel +%if 0%{?has_systemd} BuildRequires: systemd Requires(pre): systemd Requires(post): systemd @@ -50,6 +52,7 @@ Source0: https://rspamd.com/downloads/%{name}-%{version}.tar.xz Source1: %{name}.conf Source2: %{name}.conf.common Source3: %{name}.conf.sysvinit +Source5: 80-rmilter.preset %description The rmilter utility is designed to act as milter for sendmail and postfix MTA. @@ -66,11 +69,11 @@ rm -rf %{buildroot} || true -DCMAKE_INSTALL_PREFIX=%{_prefix} \ -DCONFDIR=%{_sysconfdir}/rmilter \ -DMANDIR=%{_mandir} \ -%if 0%{?el6} - -DWANT_SYSTEMD_UNITS=OFF \ -%else +%if 0%{?has_systemd} -DWANT_SYSTEMD_UNITS=ON \ -DSYSTEMDDIR=%{_unitdir} \ +%else + -DWANT_SYSTEMD_UNITS=OFF \ %endif %if 0%{?suse_version} -DCMAKE_SKIP_INSTALL_RPATH=ON \ @@ -91,6 +94,8 @@ rm -rf %{buildroot} || true %if 0%{?el6} %{__install} -p -D -m 0755 %{SOURCE4} %{buildroot}%{_initrddir}/%{name} %{__install} -p -D -d -m 0755 %{buildroot}%{rmilter_home} +%else +%{__install} -p -D -m 0644 %{SOURCE5} %{buildroot}%{_presetdir}/80-rmilter.preset %endif %{__install} -p -D -d -m 0755 %{buildroot}%{_sysconfdir}/%{name}/rmilter.conf.d/ @@ -104,18 +109,17 @@ rm -rf %{buildroot} %if 0%{?suse_version} %service_add_pre %{name}.service -%service_add_pre %{name}.socket %endif %post %{__chown} -R %{rmilter_user}:%{rmilter_group} %{rmilter_home} %if 0%{?suse_version} %service_add_post %{name}.service -%service_add_post %{name}.socket %endif -%if 0%{?fedora} || 0%{?el7} -%systemd_post %{name}.service -%systemd_post %{name}.socket +%if 0%{?not_suse} && 0%{?has_systemd} +#Macro is not used as we want to do this on upgrade +#%systemd_post %{name}.service +systemctl --no-reload preset %{name}.service >/dev/null 2>&1 || : %endif %if 0%{?el6} /sbin/chkconfig --add %{name} @@ -124,11 +128,9 @@ rm -rf %{buildroot} %preun %if 0%{?suse_version} %service_del_preun %{name}.service -%service_del_preun %{name}.socket %endif -%if 0%{?fedora} || 0%{?el7} +%if 0%{?not_suse} && 0%{?has_systemd} %systemd_preun %{name}.service -%systemd_preun %{name}.socket %endif %if 0%{?el6} if [ $1 = 0 ]; then @@ -140,11 +142,9 @@ fi %postun %if 0%{?suse_version} %service_del_postun %{name}.service -%service_del_postun %{name}.socket %endif -%if 0%{?fedora} || 0%{?el7} +%if 0%{?not_suse} && 0%{?has_systemd} %systemd_postun_with_restart %{name}.service -%systemd_postun %{name}.socket %endif %if 0%{?el6} if [ $1 -ge 1 ]; then @@ -155,9 +155,9 @@ fi %files %defattr(-,root,root,-) -%if 0%{?suse_version} || 0%{?fedora} || 0%{?el7} +%if 0%{?has_systemd} %{_unitdir}/%{name}.service -%{_unitdir}/%{name}.socket +%{_presetdir}/80-rmilter.preset %endif %if 0%{?el6} %{_initrddir}/%{name} diff --git a/compat/queue.h b/compat/queue.h deleted file mode 100644 index d62afcc..0000000 --- a/compat/queue.h +++ /dev/null @@ -1,618 +0,0 @@ -/*- - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)queue.h 8.5 (Berkeley) 8/20/94 - * $FreeBSD: src/sys/sys/queue.h,v 1.68 2006/10/24 11:20:29 ru Exp $ - */ - -#ifndef _SYS_QUEUE_H_ -#define _SYS_QUEUE_H_ - -#include - -/* - * This file defines four types of data structures: singly-linked lists, - * singly-linked tail queues, lists and tail queues. - * - * A singly-linked list is headed by a single forward pointer. The elements - * are singly linked for minimum space and pointer manipulation overhead at - * the expense of O(n) removal for arbitrary elements. New elements can be - * added to the list after an existing element or at the head of the list. - * Elements being removed from the head of the list should use the explicit - * macro for this purpose for optimum efficiency. A singly-linked list may - * only be traversed in the forward direction. Singly-linked lists are ideal - * for applications with large datasets and few or no removals or for - * implementing a LIFO queue. - * - * A singly-linked tail queue is headed by a pair of pointers, one to the - * head of the list and the other to the tail of the list. The elements are - * singly linked for minimum space and pointer manipulation overhead at the - * expense of O(n) removal for arbitrary elements. New elements can be added - * to the list after an existing element, at the head of the list, or at the - * end of the list. Elements being removed from the head of the tail queue - * should use the explicit macro for this purpose for optimum efficiency. - * A singly-linked tail queue may only be traversed in the forward direction. - * Singly-linked tail queues are ideal for applications with large datasets - * and few or no removals or for implementing a FIFO queue. - * - * A list is headed by a single forward pointer (or an array of forward - * pointers for a hash table header). The elements are doubly linked - * so that an arbitrary element can be removed without a need to - * traverse the list. New elements can be added to the list before - * or after an existing element or at the head of the list. A list - * may only be traversed in the forward direction. - * - * A tail queue is headed by a pair of pointers, one to the head of the - * list and the other to the tail of the list. The elements are doubly - * linked so that an arbitrary element can be removed without a need to - * traverse the list. New elements can be added to the list before or - * after an existing element, at the head of the list, or at the end of - * the list. A tail queue may be traversed in either direction. - * - * For details on the use of these macros, see the queue(3) manual page. - * - * - * SLIST LIST STAILQ TAILQ - * _HEAD + + + + - * _HEAD_INITIALIZER + + + + - * _ENTRY + + + + - * _INIT + + + + - * _EMPTY + + + + - * _FIRST + + + + - * _NEXT + + + + - * _PREV - - - + - * _LAST - - + + - * _FOREACH + + + + - * _FOREACH_SAFE + + + + - * _FOREACH_REVERSE - - - + - * _FOREACH_REVERSE_SAFE - - - + - * _INSERT_HEAD + + + + - * _INSERT_BEFORE - + - + - * _INSERT_AFTER + + + + - * _INSERT_TAIL - - + + - * _CONCAT - - + + - * _REMOVE_HEAD + - + - - * _REMOVE + + + + - * - */ -#ifdef QUEUE_MACRO_DEBUG -/* Store the last 2 places the queue element or head was altered */ -struct qm_trace { - char * lastfile; - int lastline; - char * prevfile; - int prevline; -}; - -#define TRACEBUF struct qm_trace trace; -#define TRASHIT(x) do {(x) = (void *)-1;} while (0) - -#define QMD_TRACE_HEAD(head) do { \ - (head)->trace.prevline = (head)->trace.lastline; \ - (head)->trace.prevfile = (head)->trace.lastfile; \ - (head)->trace.lastline = __LINE__; \ - (head)->trace.lastfile = __FILE__; \ -} while (0) - -#define QMD_TRACE_ELEM(elem) do { \ - (elem)->trace.prevline = (elem)->trace.lastline; \ - (elem)->trace.prevfile = (elem)->trace.lastfile; \ - (elem)->trace.lastline = __LINE__; \ - (elem)->trace.lastfile = __FILE__; \ -} while (0) - -#else -#define QMD_TRACE_ELEM(elem) -#define QMD_TRACE_HEAD(head) -#define TRACEBUF -#define TRASHIT(x) -#endif /* QUEUE_MACRO_DEBUG */ - -/* - * Singly-linked List declarations. - */ -#define SLIST_HEAD(name, type) \ -struct name { \ - struct type *slh_first; /* first element */ \ -} - -#define SLIST_HEAD_INITIALIZER(head) \ - { NULL } - -#define SLIST_ENTRY(type) \ -struct { \ - struct type *sle_next; /* next element */ \ -} - -/* - * Singly-linked List functions. - */ -#define SLIST_EMPTY(head) ((head)->slh_first == NULL) - -#define SLIST_FIRST(head) ((head)->slh_first) - -#define SLIST_FOREACH(var, head, field) \ - for ((var) = SLIST_FIRST((head)); \ - (var); \ - (var) = SLIST_NEXT((var), field)) - -#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = SLIST_FIRST((head)); \ - (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ - (var) = (tvar)) - -#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ - for ((varp) = &SLIST_FIRST((head)); \ - ((var) = *(varp)) != NULL; \ - (varp) = &SLIST_NEXT((var), field)) - -#define SLIST_INIT(head) do { \ - SLIST_FIRST((head)) = NULL; \ -} while (0) - -#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ - SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \ - SLIST_NEXT((slistelm), field) = (elm); \ -} while (0) - -#define SLIST_INSERT_HEAD(head, elm, field) do { \ - SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \ - SLIST_FIRST((head)) = (elm); \ -} while (0) - -#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) - -#define SLIST_REMOVE(head, elm, type, field) do { \ - if (SLIST_FIRST((head)) == (elm)) { \ - SLIST_REMOVE_HEAD((head), field); \ - } \ - else { \ - struct type *curelm = SLIST_FIRST((head)); \ - while (SLIST_NEXT(curelm, field) != (elm)) \ - curelm = SLIST_NEXT(curelm, field); \ - SLIST_NEXT(curelm, field) = \ - SLIST_NEXT(SLIST_NEXT(curelm, field), field); \ - } \ - TRASHIT((elm)->field.sle_next); \ -} while (0) - -#define SLIST_REMOVE_HEAD(head, field) do { \ - SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ -} while (0) - -/* - * Singly-linked Tail queue declarations. - */ -#define STAILQ_HEAD(name, type) \ -struct name { \ - struct type *stqh_first;/* first element */ \ - struct type **stqh_last;/* addr of last next element */ \ -} - -#define STAILQ_HEAD_INITIALIZER(head) \ - { NULL, &(head).stqh_first } - -#define STAILQ_ENTRY(type) \ -struct { \ - struct type *stqe_next; /* next element */ \ -} - -/* - * Singly-linked Tail queue functions. - */ -#define STAILQ_CONCAT(head1, head2) do { \ - if (!STAILQ_EMPTY((head2))) { \ - *(head1)->stqh_last = (head2)->stqh_first; \ - (head1)->stqh_last = (head2)->stqh_last; \ - STAILQ_INIT((head2)); \ - } \ -} while (0) - -#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) - -#define STAILQ_FIRST(head) ((head)->stqh_first) - -#define STAILQ_FOREACH(var, head, field) \ - for((var) = STAILQ_FIRST((head)); \ - (var); \ - (var) = STAILQ_NEXT((var), field)) - - -#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = STAILQ_FIRST((head)); \ - (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ - (var) = (tvar)) - -#define STAILQ_INIT(head) do { \ - STAILQ_FIRST((head)) = NULL; \ - (head)->stqh_last = &STAILQ_FIRST((head)); \ -} while (0) - -#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ - if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\ - (head)->stqh_last = &STAILQ_NEXT((elm), field); \ - STAILQ_NEXT((tqelm), field) = (elm); \ -} while (0) - -#define STAILQ_INSERT_HEAD(head, elm, field) do { \ - if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \ - (head)->stqh_last = &STAILQ_NEXT((elm), field); \ - STAILQ_FIRST((head)) = (elm); \ -} while (0) - -#define STAILQ_INSERT_TAIL(head, elm, field) do { \ - STAILQ_NEXT((elm), field) = NULL; \ - *(head)->stqh_last = (elm); \ - (head)->stqh_last = &STAILQ_NEXT((elm), field); \ -} while (0) - -#define STAILQ_LAST(head, type, field) \ - (STAILQ_EMPTY((head)) ? \ - NULL : \ - ((struct type *)(void *) \ - ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) - -#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) - -#define STAILQ_REMOVE(head, elm, type, field) do { \ - if (STAILQ_FIRST((head)) == (elm)) { \ - STAILQ_REMOVE_HEAD((head), field); \ - } \ - else { \ - struct type *curelm = STAILQ_FIRST((head)); \ - while (STAILQ_NEXT(curelm, field) != (elm)) \ - curelm = STAILQ_NEXT(curelm, field); \ - if ((STAILQ_NEXT(curelm, field) = \ - STAILQ_NEXT(STAILQ_NEXT(curelm, field), field)) == NULL)\ - (head)->stqh_last = &STAILQ_NEXT((curelm), field);\ - } \ - TRASHIT((elm)->field.stqe_next); \ -} while (0) - -#define STAILQ_REMOVE_HEAD(head, field) do { \ - if ((STAILQ_FIRST((head)) = \ - STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \ - (head)->stqh_last = &STAILQ_FIRST((head)); \ -} while (0) - -/* - * List declarations. - */ -#define LIST_HEAD(name, type) \ -struct name { \ - struct type *lh_first; /* first element */ \ -} - -#define LIST_HEAD_INITIALIZER(head) \ - { NULL } - -#define LIST_ENTRY(type) \ -struct { \ - struct type *le_next; /* next element */ \ - struct type **le_prev; /* address of previous next element */ \ -} - -/* - * List functions. - */ - -#if (defined(_KERNEL) && defined(INVARIANTS)) -#define QMD_LIST_CHECK_HEAD(head, field) do { \ - if (LIST_FIRST((head)) != NULL && \ - LIST_FIRST((head))->field.le_prev != \ - &LIST_FIRST((head))) \ - panic("Bad list head %p first->prev != head", (head)); \ -} while (0) - -#define QMD_LIST_CHECK_NEXT(elm, field) do { \ - if (LIST_NEXT((elm), field) != NULL && \ - LIST_NEXT((elm), field)->field.le_prev != \ - &((elm)->field.le_next)) \ - panic("Bad link elm %p next->prev != elm", (elm)); \ -} while (0) - -#define QMD_LIST_CHECK_PREV(elm, field) do { \ - if (*(elm)->field.le_prev != (elm)) \ - panic("Bad link elm %p prev->next != elm", (elm)); \ -} while (0) -#else -#define QMD_LIST_CHECK_HEAD(head, field) -#define QMD_LIST_CHECK_NEXT(elm, field) -#define QMD_LIST_CHECK_PREV(elm, field) -#endif /* (_KERNEL && INVARIANTS) */ - -#define LIST_EMPTY(head) ((head)->lh_first == NULL) - -#define LIST_FIRST(head) ((head)->lh_first) - -#define LIST_FOREACH(var, head, field) \ - for ((var) = LIST_FIRST((head)); \ - (var); \ - (var) = LIST_NEXT((var), field)) - -#define LIST_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = LIST_FIRST((head)); \ - (var) && ((tvar) = LIST_NEXT((var), field), 1); \ - (var) = (tvar)) - -#define LIST_INIT(head) do { \ - LIST_FIRST((head)) = NULL; \ -} while (0) - -#define LIST_INSERT_AFTER(listelm, elm, field) do { \ - QMD_LIST_CHECK_NEXT(listelm, field); \ - if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\ - LIST_NEXT((listelm), field)->field.le_prev = \ - &LIST_NEXT((elm), field); \ - LIST_NEXT((listelm), field) = (elm); \ - (elm)->field.le_prev = &LIST_NEXT((listelm), field); \ -} while (0) - -#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ - QMD_LIST_CHECK_PREV(listelm, field); \ - (elm)->field.le_prev = (listelm)->field.le_prev; \ - LIST_NEXT((elm), field) = (listelm); \ - *(listelm)->field.le_prev = (elm); \ - (listelm)->field.le_prev = &LIST_NEXT((elm), field); \ -} while (0) - -#define LIST_INSERT_HEAD(head, elm, field) do { \ - QMD_LIST_CHECK_HEAD((head), field); \ - if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \ - LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\ - LIST_FIRST((head)) = (elm); \ - (elm)->field.le_prev = &LIST_FIRST((head)); \ -} while (0) - -#define LIST_NEXT(elm, field) ((elm)->field.le_next) - -#define LIST_REMOVE(elm, field) do { \ - QMD_LIST_CHECK_NEXT(elm, field); \ - QMD_LIST_CHECK_PREV(elm, field); \ - if (LIST_NEXT((elm), field) != NULL) \ - LIST_NEXT((elm), field)->field.le_prev = \ - (elm)->field.le_prev; \ - *(elm)->field.le_prev = LIST_NEXT((elm), field); \ - TRASHIT((elm)->field.le_next); \ - TRASHIT((elm)->field.le_prev); \ -} while (0) - -/* - * Tail queue declarations. - */ -#define TAILQ_HEAD(name, type) \ -struct name { \ - struct type *tqh_first; /* first element */ \ - struct type **tqh_last; /* addr of last next element */ \ - TRACEBUF \ -} - -#define TAILQ_HEAD_INITIALIZER(head) \ - { NULL, &(head).tqh_first } - -#define TAILQ_ENTRY(type) \ -struct { \ - struct type *tqe_next; /* next element */ \ - struct type **tqe_prev; /* address of previous next element */ \ - TRACEBUF \ -} - -/* - * Tail queue functions. - */ -#if (defined(_KERNEL) && defined(INVARIANTS)) -#define QMD_TAILQ_CHECK_HEAD(head, field) do { \ - if (!TAILQ_EMPTY(head) && \ - TAILQ_FIRST((head))->field.tqe_prev != \ - &TAILQ_FIRST((head))) \ - panic("Bad tailq head %p first->prev != head", (head)); \ -} while (0) - -#define QMD_TAILQ_CHECK_TAIL(head, field) do { \ - if (*(head)->tqh_last != NULL) \ - panic("Bad tailq NEXT(%p->tqh_last) != NULL", (head)); \ -} while (0) - -#define QMD_TAILQ_CHECK_NEXT(elm, field) do { \ - if (TAILQ_NEXT((elm), field) != NULL && \ - TAILQ_NEXT((elm), field)->field.tqe_prev != \ - &((elm)->field.tqe_next)) \ - panic("Bad link elm %p next->prev != elm", (elm)); \ -} while (0) - -#define QMD_TAILQ_CHECK_PREV(elm, field) do { \ - if (*(elm)->field.tqe_prev != (elm)) \ - panic("Bad link elm %p prev->next != elm", (elm)); \ -} while (0) -#else -#define QMD_TAILQ_CHECK_HEAD(head, field) -#define QMD_TAILQ_CHECK_TAIL(head, headname) -#define QMD_TAILQ_CHECK_NEXT(elm, field) -#define QMD_TAILQ_CHECK_PREV(elm, field) -#endif /* (_KERNEL && INVARIANTS) */ - -#define TAILQ_CONCAT(head1, head2, field) do { \ - if (!TAILQ_EMPTY(head2)) { \ - *(head1)->tqh_last = (head2)->tqh_first; \ - (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ - (head1)->tqh_last = (head2)->tqh_last; \ - TAILQ_INIT((head2)); \ - QMD_TRACE_HEAD(head1); \ - QMD_TRACE_HEAD(head2); \ - } \ -} while (0) - -#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) - -#define TAILQ_FIRST(head) ((head)->tqh_first) - -#define TAILQ_FOREACH(var, head, field) \ - for ((var) = TAILQ_FIRST((head)); \ - (var); \ - (var) = TAILQ_NEXT((var), field)) - -#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = TAILQ_FIRST((head)); \ - (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ - (var) = (tvar)) - -#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ - for ((var) = TAILQ_LAST((head), headname); \ - (var); \ - (var) = TAILQ_PREV((var), headname, field)) - -#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ - for ((var) = TAILQ_LAST((head), headname); \ - (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ - (var) = (tvar)) - -#define TAILQ_INIT(head) do { \ - TAILQ_FIRST((head)) = NULL; \ - (head)->tqh_last = &TAILQ_FIRST((head)); \ - QMD_TRACE_HEAD(head); \ -} while (0) - -#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ - QMD_TAILQ_CHECK_NEXT(listelm, field); \ - if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ - TAILQ_NEXT((elm), field)->field.tqe_prev = \ - &TAILQ_NEXT((elm), field); \ - else { \ - (head)->tqh_last = &TAILQ_NEXT((elm), field); \ - QMD_TRACE_HEAD(head); \ - } \ - TAILQ_NEXT((listelm), field) = (elm); \ - (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ - QMD_TRACE_ELEM(&(elm)->field); \ - QMD_TRACE_ELEM(&listelm->field); \ -} while (0) - -#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ - QMD_TAILQ_CHECK_PREV(listelm, field); \ - (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ - TAILQ_NEXT((elm), field) = (listelm); \ - *(listelm)->field.tqe_prev = (elm); \ - (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ - QMD_TRACE_ELEM(&(elm)->field); \ - QMD_TRACE_ELEM(&listelm->field); \ -} while (0) - -#define TAILQ_INSERT_HEAD(head, elm, field) do { \ - QMD_TAILQ_CHECK_HEAD(head, field); \ - if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ - TAILQ_FIRST((head))->field.tqe_prev = \ - &TAILQ_NEXT((elm), field); \ - else \ - (head)->tqh_last = &TAILQ_NEXT((elm), field); \ - TAILQ_FIRST((head)) = (elm); \ - (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \ - QMD_TRACE_HEAD(head); \ - QMD_TRACE_ELEM(&(elm)->field); \ -} while (0) - -#define TAILQ_INSERT_TAIL(head, elm, field) do { \ - QMD_TAILQ_CHECK_TAIL(head, field); \ - TAILQ_NEXT((elm), field) = NULL; \ - (elm)->field.tqe_prev = (head)->tqh_last; \ - *(head)->tqh_last = (elm); \ - (head)->tqh_last = &TAILQ_NEXT((elm), field); \ - QMD_TRACE_HEAD(head); \ - QMD_TRACE_ELEM(&(elm)->field); \ -} while (0) - -#define TAILQ_LAST(head, headname) \ - (*(((struct headname *)((head)->tqh_last))->tqh_last)) - -#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) - -#define TAILQ_PREV(elm, headname, field) \ - (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) - -#define TAILQ_REMOVE(head, elm, field) do { \ - QMD_TAILQ_CHECK_NEXT(elm, field); \ - QMD_TAILQ_CHECK_PREV(elm, field); \ - if ((TAILQ_NEXT((elm), field)) != NULL) \ - TAILQ_NEXT((elm), field)->field.tqe_prev = \ - (elm)->field.tqe_prev; \ - else { \ - (head)->tqh_last = (elm)->field.tqe_prev; \ - QMD_TRACE_HEAD(head); \ - } \ - *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ - TRASHIT((elm)->field.tqe_next); \ - TRASHIT((elm)->field.tqe_prev); \ - QMD_TRACE_ELEM(&(elm)->field); \ -} while (0) - - -#ifdef _KERNEL - -/* - * XXX insque() and remque() are an old way of handling certain queues. - * They bogusly assumes that all queue heads look alike. - */ - -struct quehead { - struct quehead *qh_link; - struct quehead *qh_rlink; -}; - -#ifdef __CC_SUPPORTS___INLINE - -static __inline void -insque(void *a, void *b) -{ - struct quehead *element = (struct quehead *)a, - *head = (struct quehead *)b; - - element->qh_link = head->qh_link; - element->qh_rlink = head; - head->qh_link = element; - element->qh_link->qh_rlink = element; -} - -static __inline void -remque(void *a) -{ - struct quehead *element = (struct quehead *)a; - - element->qh_link->qh_rlink = element->qh_rlink; - element->qh_rlink->qh_link = element->qh_link; - element->qh_rlink = 0; -} - -#else /* !__CC_SUPPORTS___INLINE */ - -void insque(void *a, void *b); -void remque(void *a); - -#endif /* __CC_SUPPORTS___INLINE */ - -#endif /* _KERNEL */ - -#endif /* !_SYS_QUEUE_H_ */ diff --git a/config.h.in b/config.h.in index dfb0df4..2af79ac 100644 --- a/config.h.in +++ b/config.h.in @@ -345,10 +345,6 @@ typedef int bool; #include #include - #include -/* compat */ -#include "queue.h" - #endif diff --git a/contrib/lc-btrie/CMakeLists.txt b/contrib/lc-btrie/CMakeLists.txt new file mode 100644 index 0000000..f81b56e --- /dev/null +++ b/contrib/lc-btrie/CMakeLists.txt @@ -0,0 +1,9 @@ +SET(LCTRIESRC btrie.c + mempool.c) +ADD_LIBRARY(lcbtrie STATIC ${LCTRIESRC}) + +SET(LCTRIE_CFLAGS "-DBUILD_RSPAMD") +if ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") + SET(LCTRIE_CFLAGS "${LCTRIE_CFLAGS} -O3") +endif () +set_target_properties(lcbtrie PROPERTIES COMPILE_FLAGS "${LCTRIE_CFLAGS}") \ No newline at end of file diff --git a/contrib/lc-btrie/btrie.c b/contrib/lc-btrie/btrie.c new file mode 100644 index 0000000..884fdc8 --- /dev/null +++ b/contrib/lc-btrie/btrie.c @@ -0,0 +1,2684 @@ +/* Level-Compressed Tree Bitmap (LC-TBM) Trie implementation + * + * Contributed by Geoffrey T. Dairiki + * + * This file is released under a "Three-clause BSD License". + * + * Copyright (c) 2013, Geoffrey T. Dairiki + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * * Neither the name of Geoffrey T. Dairiki nor the names of other + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GEOFFREY + * T. DAIRIKI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +/***************************************************************** + * + * This code implements a routing table conceptually based on a binary + * trie structure. Internally, the trie is represented by two types + * of compound nodes: "multibit nodes", which contain the top few + * levels of an entire binary subtree; and "level compression" (LC) + * nodes which represent a (potentially long) chain of out-degree one + * (single child) binary nodes (possibly ending at a terminal node). + * + * The multibit nodes are represented using a "Tree Bitmap" structure + * (more on this below), which is very efficient --- both in terms of + * memory usage and lookup speed --- at representing densely branching + * parts of the trie. The LC nodes can efficiently represent long + * non-branching chains of binary trie nodes. Using both node types + * together results in efficient representation of both the sparse and + * dense parts of a binary trie. + * + * Graphically, here's the rough idea: + * + * ........ + * .LC o . + * . / . LC nodes can + * . o . <= represent long chains + * . \ . of (non-branching) binary + * . o . trie nodes + * . / . + * . o . + * ......../..... + * .TBM o . + * . / \ . TBM nodes can represent + * . o * . <= several levels of densely + * . / \ . branching binary trie nodes + * . o o . + * ......./.....\....... + * .TBM o .. o LC. + * . / \ .. \ . + * . o o .. o . + * . / / \ .. \ . + * . * o *.. o . + * ...../....... / . + * . o LC. . o . + * . \ . .....\...... + * . * . . o TBM. + * ........ . / \ . + * . o o . + * . / \ \ . + * .* * *. + * ........... + * + * Terminology + * ----------- + * + * node + * Usually, in the comments below, "node" will be used to refer to + * a compound node: either a multibit (TBM) node or an LC node. + * + * "internal node" or "prefix" + * The terms "prefix" or "internal node" are used to refer to + * a node in the binary trie which is internal to a multibit (TBM) + * node. + * + * ---------------------------------------------------------------- + * + * Internal Representation of the Nodes + * ==================================== + * + * Multibit (TBM) Nodes + * ~~~~~~~~~~~~~~~~~~~~ + * + * The multibit nodes are represented using a "Tree Bitmap" (TBM) + * structure as described by Eatherton, Dittia and Varghese[1]. See + * the paper referenced below for basic details. + * + * A multibit node, represents several levels of a binary trie. + * For example, here is a multibit node of stride 2 (which represent + * two levels of a binary trie. + * + * +------- | ------+ + * | multi o | + * | bit / \ | + * | node / \ | + * | o * | + * +--- / \ - / \ --+ + * O + * + * Note that, for a multibit node of stride S, there are 2^S - 1 internal + * nodes, each of which may have data (or not) associated with them, and + * 2^S "external paths" leading to other (possibly compound nodes). + * (In the diagram above, one of three internal node (the one denoted by "*") + * has data, and one of four extending paths leads to an external node + * (denoted by the 'O').) + * + * The TBM structure can represent these bitmaps in a very memory-efficient + * manner. + * + * Each TBM node consists of two bitmaps --- the "internal bitmap" and the + * "extending paths bitmap" --- and a pointer which points to an array + * which contains both the extending path ("child") nodes and any + * internal prefix data for the TBM node. + * + * +--------+--------+ + * TBM | ext bm | int bm | + * Node +--------+--------+ + * | pointer |----+ + * +-----------------+ | + * | + * | + * +-----------------+ | + * | extending path | | + * | node[N-1] | | + * +-----------------+ | + * / ... / | + * / ... / | + * +-----------------+ | + * | extending path | | + * | node[0] | | + * +-----------------+<---+ + * | int. data[M-1] | + * +-----------------+ + * / ... / + * +-----------------+ + * | int. data[0] | + * +-----------------+ + * + * The extending paths bitmap (or "ext bitmap") has one bit for each + * possible "extending path" from the bottom of the multibit node. To + * check if a particular extending path is present, one checks to see if + * the corresponding bit is set in the ext bitmap. The index into the + * array of children for that path can be found by counting the number + * of set bits to the left of that bit. + * + * Similary, the internal bitmap has one bit for each binary node + * which is internal to the multibit node. To determine whether there + * is data stored for an internal prefix, one checks the corresponding + * bit in the internal bitmap. As for extending paths, the index into + * the array of internal data is found by counting the number of set + * bits to the left of that bit. + * + * To save space in the node structure, the node data array is stored + * contiguously with the node extending path array. The single + * ("children") pointer in the TBM structure points to the beginning + * of the array of extending path nodes and to (one past) the end of + * the the internal data array. + * + * The multibit stride is chosen so that the entire TBM node structure fits + * in the space of two pointers. On 32 bit machines this means the stride + * is four (each of the two bitmaps is 16 bits); on 32 bit machines the + * stride is five. + * + * Note that there are only 2^stride - 1 internal prefixes in a TBM + * node. That means there is one unused bit in the internal bitmap. + * We require that that bit must always be clear for a TBM node. (If + * set, it indicates that the structure represents, instead, an LC + * node. See below.) + * + * ---------------------------------------------------------------- + * + * Level Compression (LC) Nodes + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * LC nodes are used to represent a chain of out-degree-one (single + * child) prefixes in the binary trie. The are represented by a bit + * string (the "relative prefix") along with its length and a pointer + * to the extending path (the next node past the LC node.) + * + * + * Non-Terminal LC Node: + * + * +------------------+-------+ + * | relative prefix |1|0|len| + * +------------------+-------+ + * | ptr.child |--+ + * +--------------------------+ | + * | + * | + * +--------------------------+ | + * | Next node - | | + * | either LC or TBM | | + * | | | + * +--------------------------+<-+ + * + * The Relative Prefix + * ------------------- + * + * The maximum relative prefix per LC node is selected so that (again) + * the entire node structure fits in the space of two pointers. On 32 bit + * machines, the maximum relative prefix is 24 bits; on 62 bit machines + * the limit is 56 bits. + * + * In the LC node structure, the relative prefix is stored as an array + * of bytes. To avoid some bit-shifting during tree searches, these + * bytes are byte-aligned with the global prefix. In other words, in + * general there are (pos % 8) "pad" bits at the beginning of the + * relative prefix --- where pos "starting bit" (or depth in the + * binary tree) of the LC node --- which really belong to the parent + * node(s) of the LC node. For efficiency (so that we don't have to + * mask them out when matching) we require that these pad bits be + * correct --- they must match the path which leads to the LC node. + * + * The relative prefix length stored in the LC node structure does not + * count the pad bits. + * + * Terminal Node Compression + * ------------------------- + * + * For memory efficiency, we also support "terminal LC" nodes. When + * the extension path from an LC node consists a single terminal node, + * we store that terminal nodes data directly in the parent LC node. + * + * Instead of this: + * + * +------------------+-------+ + * | relative prefix |1|0|len| + * +------------------+-------+ + * | ptr.child |--+ + * +--------------------------+ | + * | + * +--------------------------+ | + * | Terminal Node (TBM node, | | + * | empty except for the | | + * +--| root internal node.) | | + * | +--------------------------+<-+ + * | + * +->+--------------------------+ + * | terminal node data | + * +--------------------------+ + * + * We can do this: + * + * +------------------+-------+ + * | relative prefix |1|1|len| + * +------------------+-------+ + * | terminal node data | + * +--------------------------+ + * + * Terminal LC nodes are differentiated from non-terminal LC nodes + * by the setting of the is_terminal flag. + * + * Node Structure Packing Details + * ------------------------------ + * + * The LC and TBM node structures are carefully packed so that the + * "is_lc" flag (which indicates that a node is an LC node) + * corresponds to the one unused bit in the internal bitmap of the TBM + * node structure (which we require to be zero for TBM nodes). + * + * ---------------------------------------------------------------- + * + * References + * ========== + * + * [1] Will Eatherton, George Varghese, and Zubin Dittia. 2004. Tree + * bitmap: hardware/software IP lookups with incremental + * updates. SIGCOMM Comput. Commun. Rev. 34, 2 (April 2004), + * 97-122. DOI=10.1145/997150.997160 + * http://doi.acm.org/10.1145/997150.997160 + * http://comnet.kaist.ac.kr/yhlee/CN_2008_Spring/readings/Eath-04-tree_bitmap.pdf + * + ****************************************************************/ + +#include +#include +#include +#include +#if defined(TEST) && defined(NDEBUG) +# warning undefining NDEBUG for TEST build +# undef NDEBUG +#endif +#include + +#include "btrie.h" +#include "mempool.h" + +#if defined(__SIZEOF_POINTER__) +#if __SIZEOF_POINTER__ == 4 +# define TBM_STRIDE 4 +#elif __SIZEOF_POINTER__ == 8 +# define TBM_STRIDE 5 +#else +# error "Unsupported word size" +#endif +#else +#ifdef __LP64__ +# define TBM_STRIDE 5 +#else +# define TBM_STRIDE 4 +#endif +#endif + +#ifndef NO_STDINT_H +# if TBM_STRIDE == 4 + typedef uint16_t tbm_bitmap_t; +# else + typedef uint32_t tbm_bitmap_t; +# endif +#else /* NO_STDINT_H */ +# if TBM_STRIDE == 4 +# if SIZEOF_SHORT == 2 + typedef short unsigned tbm_bitmap_t; +# else +# error "can not determine type for 16 bit unsigned int" +# endif +# else /* TBM_STRIDE == 5 */ +# if SIZEOF_INT == 4 + typedef unsigned tbm_bitmap_t; +# elif SIZEOF_LONG == 4 + typedef long unsigned tbm_bitmap_t; +# else +# error "can not determine type for 32 bit unsigned int" +# endif +# endif +#endif + +#define TBM_FANOUT (1U << TBM_STRIDE) +#define LC_BYTES_PER_NODE (__SIZEOF_POINTER__ - 1) + + +typedef union node_u node_t; + +/* The tbm_node and lc_node structs must be packed so that the the + * high bit (LC_FLAGS_IS_LC) of lc_flags in the the lc_node struct + * coincides with bit zero (the most significant bit) of tbm_node's + * int_bm. (This bit is how we differentiate between the two node + * types. It is always clear for a tbm_node and always set for an + * lc_node.) + */ + +struct tbm_node { +#ifdef WORDS_BIGENDIAN + tbm_bitmap_t int_bm; /* the internal bitmap */ + tbm_bitmap_t ext_bm; /* extending path ("external") bitmap */ +#else + tbm_bitmap_t ext_bm; /* extending path ("external") bitmap */ + tbm_bitmap_t int_bm; /* the internal bitmap */ +#endif + union { + node_t *children; /* pointer to array of children */ + const void **data_end; /* one past end of internal prefix data array */ + } ptr; +}; + +struct lc_node { + /* lc_flags contains the LC prefix length and a couple of bit flags + * (apparently char-sized bit fields are a gcc extension) + */ +# define LC_FLAGS_IS_LC 0x80 +# define LC_FLAGS_IS_TERMINAL 0x40 +# define LC_FLAGS_LEN_MASK 0x3f +#ifdef WORDS_BIGENDIAN + btrie_oct_t lc_flags; + btrie_oct_t prefix[LC_BYTES_PER_NODE]; +#else + btrie_oct_t prefix[LC_BYTES_PER_NODE]; + btrie_oct_t lc_flags; +#endif + union { + node_t *child; /* pointer to child (if !is_terminal) */ + const void *data; /* the prefix data (if is_terminal) */ + } ptr; +}; + +union node_u { + struct tbm_node tbm_node; + struct lc_node lc_node; +}; + +struct free_hunk { + struct free_hunk *next; +}; + +#define MAX_CHILD_ARRAY_LEN (TBM_FANOUT + TBM_FANOUT / 2) + +struct btrie { + node_t root; + + struct mempool *mp; + struct free_hunk *free_list[MAX_CHILD_ARRAY_LEN]; + jmp_buf exception; + /* mem mgmt stats */ + size_t alloc_total; /* total bytes allocated from mempool */ + size_t alloc_data; /* bytes allocated for TBM node int. prefix data */ + size_t alloc_waste; /* bytes wasted by rounding of data array size */ +#ifdef BTRIE_DEBUG_ALLOC + size_t alloc_hist[MAX_CHILD_ARRAY_LEN * 2]; /* histogram of alloc sizes */ +#endif + + /* trie stats */ + size_t n_entries; /* number of entries */ + size_t n_tbm_nodes; /* total number of TBM nodes in tree */ + size_t n_lc_nodes; /* total number of LC nodes in tree */ +}; + + +/**************************************************************** + * + * Memory management + * + * We will need to frequently resize child/data arrays. The current + * mempool implementation does not support resizing/freeing, so here + * we roll our own. + */ + +static inline void +_free_hunk(struct btrie *btrie, void *buf, unsigned n_nodes) +{ + struct free_hunk *hunk = buf; + + hunk->next = btrie->free_list[n_nodes - 1]; + btrie->free_list[n_nodes - 1] = hunk; +} + +static inline void * +_get_hunk(struct btrie *btrie, unsigned n_nodes) +{ + struct free_hunk *hunk = btrie->free_list[n_nodes - 1]; + + if (hunk != NULL) + btrie->free_list[n_nodes - 1] = hunk->next; + return hunk; +} + +/* Get pointer to uninitialized child/data array. + * + * Allocates memory for an array of NDATA (void *)s followed by an + * array of NCHILDREN (node_t)s. The returned pointer points to to + * beginning of the children array (i.e. it points to (one past) the + * end of the data array.) + */ +static node_t * +alloc_nodes(struct btrie *btrie, unsigned nchildren, unsigned ndata) +{ + size_t n_nodes = nchildren + (ndata + 1) / 2; + node_t *hunk; + + assert(n_nodes > 0 && n_nodes <= MAX_CHILD_ARRAY_LEN); + + hunk = _get_hunk(btrie, n_nodes); + if (hunk == NULL) { + /* Do not have free hunk of exactly the requested size, look for a + * larger hunk. (The funny order in which we scan the buckets is + * heuristically selected in an attempt to minimize unnecessary + * creation of small fragments) + */ + size_t n, skip = n_nodes > 4 ? 4 : n_nodes; + for (n = n_nodes + skip; n <= MAX_CHILD_ARRAY_LEN; n++) { + if ((hunk = _get_hunk(btrie, n)) != NULL) { + _free_hunk(btrie, hunk + n_nodes, n - n_nodes); + goto DONE; + } + } + for (n = n_nodes + 1; n < n_nodes + skip && n <= MAX_CHILD_ARRAY_LEN; n++) { + if ((hunk = _get_hunk(btrie, n)) != NULL) { + _free_hunk(btrie, hunk + n_nodes, n - n_nodes); + goto DONE; + } + } + + /* failed to find free hunk, allocate a fresh one */ + hunk = mp_alloc(btrie->mp, n_nodes * sizeof(node_t), 1); + if (hunk == NULL) + longjmp(btrie->exception, BTRIE_ALLOC_FAILED); + btrie->alloc_total += n_nodes * sizeof(node_t); + } + + DONE: + btrie->alloc_data += ndata * sizeof(void *); + btrie->alloc_waste += (ndata % 2) * sizeof(void *); +#ifdef BTRIE_DEBUG_ALLOC + btrie->alloc_hist[2 * nchildren + ndata]++; +#endif + + /* adjust pointer to allow room for data array before child array */ + return hunk + (ndata + 1) / 2; +} + +/* Free memory allocated by alloc_nodes */ +static void +free_nodes(struct btrie *btrie, node_t *buf, unsigned nchildren, unsigned ndata) +{ + size_t n_nodes = nchildren + (ndata + 1) / 2; + + assert(n_nodes > 0 && n_nodes <= MAX_CHILD_ARRAY_LEN); + + _free_hunk(btrie, buf - (ndata + 1) / 2, n_nodes); + + btrie->alloc_data -= ndata * sizeof(void *); + btrie->alloc_waste -= (ndata % 2) * sizeof(void *); +#ifdef BTRIE_DEBUG_ALLOC + btrie->alloc_hist[2 * nchildren + ndata]--; +#endif +} + +/* Debugging/development only: */ +#ifdef BTRIE_DEBUG_ALLOC +static void +dump_alloc_hist(const struct btrie *btrie) +{ + unsigned bin; + size_t total_alloc = 0; + size_t total_free = 0; + size_t total_bytes = 0; + size_t total_waste = 0; + size_t total_free_bytes = 0; + + puts("hunk alloc free alloc wasted free"); + puts("size hunks hunks bytes bytes bytes"); + puts("==== ====== ====== ======== ======== ========"); + + for (bin = 1; bin < 2 * MAX_CHILD_ARRAY_LEN; bin++) { + size_t n_alloc = btrie->alloc_hist[bin]; + size_t bytes = n_alloc * bin * sizeof(void *); + size_t waste_bytes = (bin % 2) * n_alloc * sizeof(void *); + size_t n_free = 0, free_bytes; + if (bin % 2 == 0) { + const struct free_hunk *hunk; + for (hunk = btrie->free_list[bin / 2 - 1]; hunk; hunk = hunk->next) + n_free++; + } + free_bytes = n_free * bin * sizeof(void *); + + printf("%3zu: %6zu %6zu %8zu %8zu %8zu\n", bin * sizeof(void *), + n_alloc, n_free, bytes, waste_bytes, free_bytes); + + total_alloc += n_alloc; + total_free += n_free; + total_bytes += bytes; + total_waste += waste_bytes; + total_free_bytes += free_bytes; + } + puts("---- ------ ------ -------- -------- --------"); + printf("SUM: %6zu %6zu %8zu %8zu %8zu\n", + total_alloc, total_free, total_bytes, total_waste, total_free_bytes); +} +#endif + + +/**************************************************************** + * + * Bit twiddling + * + */ + +static inline tbm_bitmap_t +bit(unsigned b) +{ + return 1U << ((1 << TBM_STRIDE) - 1 - b); +} + + +/* count the number of set bits in bitmap + * + * algorithm from + * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + */ +static inline unsigned +count_bits(tbm_bitmap_t v) +{ + /* Count set bits in parallel. */ + /* v = (v & 0x5555...) + ((v >> 1) & 0x5555...); */ + v -= (v >> 1) & (tbm_bitmap_t)~0UL/3; + /* v = (v & 0x3333...) + ((v >> 2) & 0x3333...); */ + v = (v & (tbm_bitmap_t)~0UL/5) + ((v >> 2) & (tbm_bitmap_t)~0UL/5); + /* v = (v & 0x0f0f...) + ((v >> 4) & 0x0f0f...); */ + v = (v + (v >> 4)) & (tbm_bitmap_t)~0UL/17; + /* v = v % 255; */ +#if TBM_STRIDE == 4 + /* tbm_bitmap_t is uint16_t, avoid the multiply */ + return (v + (v >> 8)) & 0x0ff; +#else + return (v * (tbm_bitmap_t)(~0UL/255)) >> ((sizeof(tbm_bitmap_t) - 1) * 8); +#endif +} + +static inline unsigned +count_bits_before(tbm_bitmap_t bm, int b) +{ + return b ? count_bits(bm >> ((1 << TBM_STRIDE) - b)) : 0; +} + +static inline unsigned +count_bits_from(tbm_bitmap_t bm, int b) +{ + return count_bits(bm << b); +} + +/* extracts a few bits from bitstring, returning them as an integer */ +static inline btrie_oct_t +extract_bits(const btrie_oct_t *prefix, unsigned pos, unsigned nbits) +{ + if (nbits == 0) + return 0; + else { + unsigned v = (prefix[pos / 8] << 8) + prefix[pos / 8 + 1]; + return (v >> (16 - nbits - pos % 8)) & ((1U << nbits) - 1); + } +} + +static inline unsigned +extract_bit(const btrie_oct_t *prefix, int pos) +{ + return (prefix[pos / 8] >> (7 - pos % 8)) & 0x01; +} + +/* get mask for high n bits of a byte */ +static inline btrie_oct_t +high_bits(unsigned n) +{ + return (btrie_oct_t) -(1U << (8 - n)); +} + +/* determine whether two prefixes are equal */ +static inline int +prefixes_equal(const btrie_oct_t *pfx1, const btrie_oct_t *pfx2, unsigned len) +{ + return (memcmp(pfx1, pfx2, len / 8) == 0 + && ((pfx1[len / 8] ^ pfx2[len / 8]) & high_bits(len % 8)) == 0); +} + +/* determine length of longest common subprefix */ +static inline unsigned +common_prefix(const btrie_oct_t *pfx1, const btrie_oct_t *pfx2, unsigned len) +{ + /* algorithm adapted from + * http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup + */ + static btrie_oct_t leading_zeros[] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + unsigned nb; + + for (nb = 0; nb < len / 8; nb++) { + unsigned diff = *pfx1++ ^ *pfx2++; + if (diff != 0) + return 8 * nb + leading_zeros[diff]; + } + if (len % 8) { + unsigned n = leading_zeros[*pfx1 ^ *pfx2]; + if (n < len % 8) + return 8 * nb + n; + } + return len; +} + +/**************************************************************** + */ + +static inline int +is_empty_node(const node_t *node) +{ + return node->tbm_node.ext_bm == 0 && node->tbm_node.int_bm == 0; +} + +static inline int +is_lc_node(const node_t *node) +{ + return (node->lc_node.lc_flags & LC_FLAGS_IS_LC) != 0; +} + +static inline int +is_tbm_node(const node_t *node) +{ + return !is_lc_node(node); +} + +/* is node a TBM node with internal data? */ +static inline int +has_data(const node_t *node) +{ + return is_tbm_node(node) && node->tbm_node.int_bm != 0; +} + +static inline unsigned +base_index(unsigned pfx, unsigned plen) +{ + assert(plen < TBM_STRIDE); + assert(pfx < (1U << plen)); + return pfx | (1U << plen); +} + +/* initialize node to an empty TBM node */ +static inline void +init_empty_node(struct btrie *btrie, node_t *node) +{ + memset(node, 0, sizeof(*node)); + btrie->n_tbm_nodes++; +} + +/* get pointer to TBM internal prefix data */ +static inline const void ** +tbm_data_p(const struct tbm_node *node, unsigned pfx, unsigned plen) +{ + unsigned bi = base_index(pfx, plen); + + if ((node->int_bm & bit(bi)) == 0) + return NULL; /* no data */ + else { + return &node->ptr.data_end[ -(int)count_bits_from(node->int_bm, bi) ]; + } +} + +/* add an element to the internal data array */ +static void +tbm_insert_data(struct btrie *btrie, struct tbm_node *node, + unsigned pfx, unsigned plen, const void *data) +{ + /* XXX: don't realloc if already big enough? */ + unsigned bi = base_index(pfx, plen); + unsigned nchildren = count_bits(node->ext_bm); + int ndata = count_bits(node->int_bm); + unsigned di = count_bits_before(node->int_bm, bi); + node_t *old_children = node->ptr.children; + const void **old_data_beg = node->ptr.data_end - ndata; + const void **data_beg; + + assert((node->int_bm & bit(bi)) == 0); + + node->ptr.children = alloc_nodes(btrie, nchildren, ndata + 1); + data_beg = node->ptr.data_end - (ndata + 1); + data_beg[di] = data; + node->int_bm |= bit(bi); + + if (nchildren != 0 || ndata != 0) { + memcpy(data_beg, old_data_beg, di * sizeof(data_beg[0])); + memcpy(&data_beg[di + 1], &old_data_beg[di], + (ndata - di) * sizeof(data_beg[0]) + nchildren * sizeof(node_t)); + free_nodes(btrie, old_children, nchildren, ndata); + } +} + +/* determine whether TBM has internal prefix data for pfx/plen or ancestors */ +static inline int +has_internal_data(const struct tbm_node *node, unsigned pfx, unsigned plen) +{ +# define BIT(n) (1U << ((1 << TBM_STRIDE) - 1 - (n))) +# define B0() BIT(1) /* the bit for 0/0 */ +# define B1(n) (BIT((n) + 2) | B0()) /* the bits for n/1 and its ancestors */ +# define B2(n) (BIT((n) + 4) | B1(n >> 1)) /* the bits for n/2 and ancestors */ +# define B3(n) (BIT((n) + 8) | B2(n >> 1)) /* the bits for n/3 and ancestors */ +# define B4(n) (BIT((n) + 16) | B3(n >> 1)) /* the bits for n/4 and ancestors */ + + static tbm_bitmap_t ancestors[] = { + 0, B0(), + B1(0), B1(1), + B2(0), B2(1), B2(2), B2(3), + B3(0), B3(1), B3(2), B3(3), B3(4), B3(5), B3(6), B3(7), +# if TBM_STRIDE == 5 + B4(0), B4(1), B4(2), B4(3), B4(4), B4(5), B4(6), B4(7), + B4(8), B4(9), B4(10), B4(11), B4(12), B4(13), B4(14), B4(15), +# elif TBM_STRIDE != 4 +# error "unsupported TBM_STRIDE" +# endif + }; +# undef B4 +# undef B3 +# undef B2 +# undef B1 +# undef B0 +# undef BIT + + return (node->int_bm & ancestors[base_index(pfx, plen)]) != 0; +} + +/* get pointer to TBM extending path */ +static inline node_t * +tbm_ext_path(const struct tbm_node *node, unsigned pfx) +{ + if ((node->ext_bm & bit(pfx)) == 0) + return NULL; + else + return &node->ptr.children[count_bits_before(node->ext_bm, pfx)]; +} + +/* resize TBM node child array to make space for new child node */ +static node_t * +tbm_insert_ext_path(struct btrie *btrie, struct tbm_node *node, unsigned pfx) +{ + unsigned nchildren = count_bits(node->ext_bm); + unsigned ci = count_bits_before(node->ext_bm, pfx); + int ndata = count_bits(node->int_bm); + node_t *old_children = node->ptr.children; + const void **old_data_beg = node->ptr.data_end - ndata; + + assert ((node->ext_bm & bit(pfx)) == 0); + + node->ptr.children = alloc_nodes(btrie, nchildren + 1, ndata); + init_empty_node(btrie, &node->ptr.children[ci]); + node->ext_bm |= bit(pfx); + + if (nchildren != 0 || ndata != 0) { + const void **data_beg = node->ptr.data_end - ndata; + memcpy(data_beg, old_data_beg, + ndata * sizeof(data_beg[0]) + ci * sizeof(node_t)); + memcpy(&node->ptr.children[ci + 1], &old_children[ci], + (nchildren - ci) * sizeof(old_children[0])); + free_nodes(btrie, old_children, nchildren, ndata); + } + + return &node->ptr.children[ci]; +} + + + +static inline int +lc_is_terminal(const struct lc_node *node) +{ + return (node->lc_flags & LC_FLAGS_IS_TERMINAL) != 0; +} + +static inline unsigned +lc_len(const struct lc_node *node) +{ + return node->lc_flags & LC_FLAGS_LEN_MASK; +} + +static inline void +lc_init_flags(struct lc_node *node, int is_terminal, unsigned len) +{ + assert((len & ~LC_FLAGS_LEN_MASK) == 0); + node->lc_flags = LC_FLAGS_IS_LC | len; + if (is_terminal) + node->lc_flags |= LC_FLAGS_IS_TERMINAL; +} + +static inline void +lc_add_to_len(struct lc_node *node, int increment) +{ + unsigned new_len = lc_len(node) + increment; + assert((new_len & ~LC_FLAGS_LEN_MASK) == 0); + node->lc_flags = (node->lc_flags & ~LC_FLAGS_LEN_MASK) | new_len; +} + +static inline unsigned +lc_shift(unsigned pos) +{ + return pos / 8; +} + +static inline unsigned +lc_base(unsigned pos) +{ + return 8 * lc_shift(pos); +} + +static inline unsigned +lc_bits(const struct lc_node *node, unsigned pos) +{ + return pos % 8 + lc_len(node); +} + +static inline unsigned +lc_bytes(const struct lc_node *node, unsigned pos) +{ + return (lc_bits(node, pos) + 7) / 8; +} + +static inline unsigned +lc_leading_bits(const struct lc_node *node, unsigned pos, unsigned nbits) +{ + return extract_bits(node->prefix, pos % 8, nbits); +} + + + + +/* Initialize a new terminal LC node + * + * If prefix is too long to fit in a single LC node, then a chain + * of LC nodes will be created. + */ +static void +init_terminal_node(struct btrie *btrie, node_t *dst, unsigned pos, + const btrie_oct_t *prefix, unsigned len, const void *data) +{ + struct lc_node *node = &dst->lc_node; + unsigned nbytes = (len + 7) / 8; + + while (nbytes - lc_shift(pos) > LC_BYTES_PER_NODE) { + memcpy(node->prefix, prefix + lc_shift(pos), LC_BYTES_PER_NODE); + lc_init_flags(node, 0, 8 * LC_BYTES_PER_NODE - pos % 8); + node->ptr.child = alloc_nodes(btrie, 1, 0); + pos += lc_len(node); + node = &node->ptr.child->lc_node; + btrie->n_lc_nodes++; + } + + memcpy(node->prefix, prefix + lc_shift(pos), nbytes - lc_shift(pos)); + lc_init_flags(node, 1, len - pos); + node->ptr.data = data; + btrie->n_lc_nodes++; +} + +/* merge chains of multiple LC nodes into a single LC node, if possible. + * + * also ensure that the leading nodes in the LC chain have maximum length. + */ +static void +coalesce_lc_node(struct btrie *btrie, struct lc_node *node, unsigned pos) +{ + while (! lc_is_terminal(node) + && lc_bits(node, pos) < 8 * LC_BYTES_PER_NODE + && is_lc_node(node->ptr.child)) { + struct lc_node *child = &node->ptr.child->lc_node; + unsigned spare_bits = 8 * LC_BYTES_PER_NODE - lc_bits(node, pos); + unsigned end = pos + lc_len(node); + unsigned shift = lc_shift(end) - lc_shift(pos); + if (lc_len(child) <= spare_bits) { + /* node plus child will fit in single node - merge */ + memcpy(node->prefix + shift, child->prefix, + lc_bytes(child, end)); + lc_init_flags(node, lc_is_terminal(child), lc_len(node) + lc_len(child)); + node->ptr = child->ptr; + free_nodes(btrie, (node_t *)child, 1, 0); + btrie->n_lc_nodes--; + } + else { + /* can't merge, but can take some of childs bits */ + unsigned cshift = lc_shift(end + spare_bits) - lc_shift(end); + + memcpy(node->prefix + shift, child->prefix, LC_BYTES_PER_NODE - shift); + lc_add_to_len(node, spare_bits); + if (cshift) + memmove(child->prefix, child->prefix + cshift, + lc_bytes(child, end) - cshift); + assert(lc_len(child) > spare_bits); + lc_add_to_len(child, -spare_bits); + + pos += lc_len(node); + node = child; + } + } +} + + +static void init_tbm_node(struct btrie *btrie, node_t *node, unsigned pos, + const btrie_oct_t pbyte, + const void **root_data_p, + node_t *left, node_t *right); + +/* given an LC node at orig_pos, create a new (shorter) node at pos */ +static void +shorten_lc_node(struct btrie *btrie, node_t *dst, unsigned pos, + struct lc_node *src, unsigned orig_pos) +{ + assert(orig_pos < pos); + assert(lc_len(src) >= pos - orig_pos); + assert(dst != (node_t *)src); + + if (lc_len(src) == pos - orig_pos && !lc_is_terminal(src)) { + /* just steal the child */ + node_t *child = src->ptr.child; + *dst = *child; + free_nodes(btrie, child, 1, 0); + btrie->n_lc_nodes--; + } + else { + struct lc_node *node = &dst->lc_node; + unsigned shift = lc_shift(pos) - lc_shift(orig_pos); + if (shift) { + memmove(node->prefix, src->prefix + shift, + lc_bytes(src, orig_pos) - shift); + node->lc_flags = src->lc_flags; + node->ptr = src->ptr; + } + else { + *node = *src; + } + lc_add_to_len(node, -(pos - orig_pos)); + coalesce_lc_node(btrie, node, pos); + } +} + +/* convert LC node to non-terminal LC node of length len *in place* + * + * on entry, node must have length at least len + */ +static void +split_lc_node(struct btrie *btrie, struct lc_node *node, unsigned pos, unsigned len) +{ + node_t *child = alloc_nodes(btrie, 1, 0); + + assert(lc_len(node) >= len); + shorten_lc_node(btrie, child, pos + len, node, pos); + + lc_init_flags(node, 0, len); + node->ptr.child = child; + btrie->n_lc_nodes++; +} + +/* convert non-terminal LC node of length one to a TBM node *in place* */ +static void +convert_lc_node_1(struct btrie *btrie, struct lc_node *node, unsigned pos) +{ + btrie_oct_t pbyte = node->prefix[0]; + node_t *child = node->ptr.child; + node_t *left, *right; + + assert(lc_len(node) == 1); + assert(!lc_is_terminal(node)); + + if (extract_bit(node->prefix, pos % 8)) + left = NULL, right = child; + else + left = child, right = NULL; + init_tbm_node(btrie, (node_t *)node, pos, pbyte, NULL, left, right); + free_nodes(btrie, child, 1, 0); + btrie->n_lc_nodes--; +} + +/* convert an LC node to TBM node *in place* */ +static void +convert_lc_node(struct btrie *btrie, struct lc_node *node, unsigned pos) +{ + unsigned len = lc_len(node); + + if (len >= TBM_STRIDE) { + unsigned pfx = lc_leading_bits(node, pos, TBM_STRIDE); + struct tbm_node *result = (struct tbm_node *)node; + + /* split to LC of len TBM_STRIDE followed by child (extending path) */ + split_lc_node(btrie, node, pos, TBM_STRIDE); + /* then convert leading LC node to TBM node */ + result->int_bm = 0; + result->ext_bm = bit(pfx); + btrie->n_lc_nodes--; + btrie->n_tbm_nodes++; + } + else if (lc_is_terminal(node)) { + /* convert short terminal LC to TBM (with internal data) */ + unsigned pfx = lc_leading_bits(node, pos, len); + const void *data = node->ptr.data; + node_t *result = (node_t *)node; + + init_empty_node(btrie, result); + tbm_insert_data(btrie, &result->tbm_node, pfx, len, data); + + btrie->n_lc_nodes--; + } + else { + assert(len > 0); + for (; len > 1; len--) { + split_lc_node(btrie, node, pos, len - 1); + convert_lc_node_1(btrie, &node->ptr.child->lc_node, pos + len - 1); + } + convert_lc_node_1(btrie, node, pos); + } +} + + +static void +insert_lc_node(struct btrie *btrie, node_t *dst, unsigned pos, + btrie_oct_t pbyte, unsigned last_bit, node_t *tail) +{ + struct lc_node *node = &dst->lc_node; + btrie_oct_t mask = 1 << (7 - (pos % 8)); + btrie_oct_t bit = last_bit ? mask : 0; + + if (mask != 0x01 && is_lc_node(tail)) { + /* optimization: LC tail has room for the extra bit (without shifting) */ + assert((tail->lc_node.prefix[0] & mask) == bit); + *node = tail->lc_node; + lc_add_to_len(node, 1); + return; + } + + /* add new leading LC node of len 1 */ + node->prefix[0] = pbyte | bit; + lc_init_flags(node, 0, 1); + node->ptr.child = alloc_nodes(btrie, 1, 0); + node->ptr.child[0] = *tail; + btrie->n_lc_nodes++; + + if (is_lc_node(tail)) + coalesce_lc_node(btrie, node, pos); +} + +/* given: + * pbyte: the bits in the prefix between lc_base(pos) and pos + * pfx: the next TBM_STRIDE bits in the prefix starting at pos + * returns: + * the bits in the prefix between lc_base(pos + plen) and pos + plen + */ +static inline btrie_oct_t +next_pbyte(btrie_oct_t pbyte, unsigned pos, unsigned pfx) +{ + unsigned end = pos + TBM_STRIDE; + + if (end % 8 != 0) { + btrie_oct_t nbyte = (btrie_oct_t)pfx << (8 - end % 8); + if (end % 8 > TBM_STRIDE) + nbyte |= pbyte & high_bits(pos % 8); + return nbyte; + } + return 0; +} + +/* construct a new TBM node, given the data and children of the + * root prefix of the new node. + */ +static void +init_tbm_node(struct btrie *btrie, node_t *dst, unsigned pos, + const btrie_oct_t pbyte, + const void **root_data_p, node_t *left, node_t *right) +{ + struct tbm_node *node = &dst->tbm_node; + unsigned nchildren = 0; + unsigned ndata = 0; + node_t children[TBM_FANOUT]; + const void *data[TBM_FANOUT - 1]; + tbm_bitmap_t ext_bm = 0; + tbm_bitmap_t int_bm = 0; + unsigned i, d, pfx_base; + + if (left && is_lc_node(left) && lc_len(&left->lc_node) < TBM_STRIDE) + convert_lc_node(btrie, &left->lc_node, pos + 1); + if (right && is_lc_node(right) && lc_len(&right->lc_node) < TBM_STRIDE) + convert_lc_node(btrie, &right->lc_node, pos + 1); + + /* set internal data for root prefix */ + if (root_data_p) { + data[ndata++] = *root_data_p; + int_bm |= bit(base_index(0, 0)); + } + /* copy internal data from children */ + for (d = 0; d < TBM_STRIDE - 1; d++) { + if (left && has_data(left)) { + for (i = 0; i < 1U << d; i++) { + const void **data_p = tbm_data_p(&left->tbm_node, i, d); + if (data_p) { + data[ndata++] = *data_p; + int_bm |= bit(base_index(i, d + 1)); + } + } + } + if (right && has_data(right)) { + for (i = 0; i < 1U << d; i++) { + const void **data_p = tbm_data_p(&right->tbm_node, i, d); + if (data_p) { + data[ndata++] = *data_p; + int_bm |= bit(base_index(i + (1 << d), d + 1)); + } + } + } + } + + /* copy extending paths */ + for (pfx_base = 0; pfx_base < TBM_FANOUT; pfx_base += TBM_FANOUT / 2) { + node_t *child = pfx_base ? right : left; + if (child == NULL) { + continue; + } + else if (is_lc_node(child)) { + unsigned pfx = pfx_base + lc_leading_bits(&child->lc_node, pos + 1, + TBM_STRIDE - 1); + /* child is LC node, just shorten it by TBM_STRIDE - 1 */ + shorten_lc_node(btrie, &children[nchildren++], pos + TBM_STRIDE, + &child->lc_node, pos + 1); + ext_bm |= bit(pfx); + } + else if (!is_empty_node(child)) { + /* convert deepest internal prefixes of child to extending paths + * of the new node + */ + for (i = 0; i < TBM_FANOUT / 2; i++) { + const void **data_p = tbm_data_p(&child->tbm_node, i, TBM_STRIDE - 1); + node_t *left_ext = tbm_ext_path(&child->tbm_node, 2 * i); + node_t *right_ext = tbm_ext_path(&child->tbm_node, 2 * i + 1); + if (data_p || left_ext || right_ext) { + node_t *ext_path = &children[nchildren++]; + unsigned pfx = pfx_base + i; + btrie_oct_t npbyte = next_pbyte(pbyte, pos, pfx); + + ext_bm |= bit(pfx); + if (left_ext == NULL && right_ext == NULL) { + /* only have data - set ext_path to zero-length terminal LC node */ + lc_init_flags(&ext_path->lc_node, 1, 0); + ext_path->lc_node.prefix[0] = npbyte; + ext_path->lc_node.ptr.data = *data_p; + btrie->n_lc_nodes++; + } + else if (data_p || (left_ext && right_ext)) { + /* have at least two of data, left_ext, right_ext + * ext_path must be a full TBM node */ + init_tbm_node(btrie, ext_path, pos + TBM_STRIDE, + npbyte, data_p, left_ext, right_ext); + } + else if (left_ext) { + /* have only left_ext, insert length-one LC node */ + insert_lc_node(btrie, ext_path, pos + TBM_STRIDE, + npbyte, 0, left_ext); + } + else { + /* have only right_ext, insert length-one LC node */ + insert_lc_node(btrie, ext_path, pos + TBM_STRIDE, + npbyte, 1, right_ext); + } + } + } + btrie->n_tbm_nodes--; + free_nodes(btrie, child->tbm_node.ptr.children, + count_bits(child->tbm_node.ext_bm), + count_bits(child->tbm_node.int_bm)); + } + } + + assert(count_bits(int_bm) == ndata); + assert(count_bits(ext_bm) == nchildren); + + node->ptr.children = alloc_nodes(btrie, nchildren, ndata); + memcpy(node->ptr.data_end - (int)ndata, data, ndata * sizeof(data[0])); + memcpy(node->ptr.children, children, nchildren * sizeof(children[0])); + node->ext_bm = ext_bm; + node->int_bm = int_bm; + btrie->n_tbm_nodes++; +} + + +static enum btrie_result +add_to_trie(struct btrie *btrie, node_t *node, unsigned pos, + const btrie_oct_t *prefix, unsigned len, const void *data) +{ + for (;;) { + if (is_lc_node(node)) { + struct lc_node *lc_node = &node->lc_node; + unsigned end = pos + lc_len(lc_node); + unsigned cbits = common_prefix(prefix + lc_shift(pos), lc_node->prefix, + (len < end ? len : end) - lc_base(pos)); + unsigned clen = lc_base(pos) + cbits; /* position of first mismatch */ + + if (clen == end && !lc_is_terminal(lc_node)) { + /* matched entire prefix of LC node, proceed to child */ + assert(lc_len(lc_node) > 0); + node = lc_node->ptr.child; + pos = end; + } + else if (clen == end && len == end && lc_is_terminal(lc_node)) { + /* exact match for terminal node - already have data for prefix */ + return BTRIE_DUPLICATE_PREFIX; + } + else { + assert(clen < end || (lc_is_terminal(lc_node) && len > end)); + /* Need to insert new TBM node at clen */ + if (clen > pos) { + split_lc_node(btrie, lc_node, pos, clen - pos); + node = lc_node->ptr.child; + assert(is_lc_node(node)); + pos = clen; + } + convert_lc_node(btrie, &node->lc_node, pos); + } + } + else if (is_empty_node(node)) { + /* at empty TBM node - just replace with terminal LC node */ + init_terminal_node(btrie, node, pos, prefix, len, data); + btrie->n_entries++; + btrie->n_tbm_nodes--; + return BTRIE_OKAY; + } + else { + struct tbm_node *tbm_node = &node->tbm_node; + unsigned end = pos + TBM_STRIDE; + + if (len < end) { + unsigned plen = len - pos; + unsigned pfx = extract_bits(prefix, pos, plen); + + if (tbm_data_p(tbm_node, pfx, plen) != NULL) + return BTRIE_DUPLICATE_PREFIX; /* prefix already has data */ + else { + tbm_insert_data(btrie, tbm_node, pfx, plen, data); + btrie->n_entries++; + return BTRIE_OKAY; + } + } + else { + unsigned pfx = extract_bits(prefix, pos, TBM_STRIDE); + + /* follow extending path */ + node = tbm_ext_path(tbm_node, pfx); + if (node == NULL) + node = tbm_insert_ext_path(btrie, tbm_node, pfx); + pos = end; + } + } + } +} + +static const void * +search_trie(const node_t *node, unsigned pos, + const btrie_oct_t *prefix, unsigned len) +{ + /* remember last TBM node seen with internal data */ + const struct tbm_node *int_node = 0; + unsigned int_pfx = 0, int_plen = 0; + + while (node) { + if (is_lc_node(node)) { + const struct lc_node *lc_node = &node->lc_node; + unsigned end = pos + lc_len(lc_node); + if (len < end) + break; + if (!prefixes_equal(prefix + lc_shift(pos), lc_node->prefix, + end - lc_base(pos))) + break; + + if (lc_is_terminal(lc_node)) + return lc_node->ptr.data; /* found terminal node */ + + pos = end; + node = lc_node->ptr.child; + } + else { + const struct tbm_node *tbm_node = &node->tbm_node; + unsigned end = pos + TBM_STRIDE; + if (len < end) { + unsigned plen = len - pos; + unsigned pfx = extract_bits(prefix, pos, plen); + if (has_internal_data(tbm_node, pfx, plen)) { + int_node = tbm_node; + int_pfx = pfx; + int_plen = plen; + } + break; + } + else { + unsigned pfx = extract_bits(prefix, pos, TBM_STRIDE); + if (has_internal_data(tbm_node, pfx >> 1, TBM_STRIDE - 1)) { + int_node = tbm_node; + int_pfx = pfx >> 1; + int_plen = TBM_STRIDE - 1; + } + pos = end; + node = tbm_ext_path(tbm_node, pfx); + } + } + } + + if (int_node) { + const void **data_p = tbm_data_p(int_node, int_pfx, int_plen); + while (data_p == NULL) { + assert(int_plen > 0); + int_pfx >>= 1; + int_plen--; + data_p = tbm_data_p(int_node, int_pfx, int_plen); + } + return *data_p; + } + + return NULL; +} + +struct btrie * +btrie_init(struct mempool *mp) +{ + struct btrie *btrie; + + if (!(btrie = mp_alloc(mp, sizeof(*btrie), 1))) + return NULL; + memset(btrie, 0, sizeof(*btrie)); + btrie->mp = mp; + btrie->alloc_total = sizeof(*btrie); + + /* count the empty root node */ + btrie->n_tbm_nodes = 1; + + return btrie; +} + +enum btrie_result +btrie_add_prefix(struct btrie *btrie, + const btrie_oct_t *prefix, unsigned len, const void *data) +{ + enum btrie_result rv; + if ((rv = setjmp(btrie->exception)) != 0) + return rv; /* out of memory */ + + return add_to_trie(btrie, &btrie->root, 0, prefix, len, data); +} + +const void * +btrie_lookup(const struct btrie *btrie, const btrie_oct_t *prefix, unsigned len) +{ + return search_trie(&btrie->root, 0, prefix, len); +} + +/**************************************************************** + * + * btrie_stats() - statistics reporting + */ + +#ifdef BTRIE_EXTENDED_STATS + +/* Define BTRIE_EXTENDED_STATS to get extra statistics (including + * trie depth). This statistics require a traversal of the entire trie + * to compute, and so are disabled by default. + */ + +struct stats { + size_t max_depth; + size_t total_depth; +#ifndef NDEBUG + size_t n_lc_nodes; + size_t n_tbm_nodes; + size_t n_entries; + size_t alloc_data; + size_t alloc_waste; +#endif +}; + +static void +node_stats(const node_t *node, size_t depth, struct stats *stats) +{ + if (depth > stats->max_depth) + stats->max_depth = depth; + stats->total_depth += depth; + + if (is_lc_node(node)) { +#ifndef NDEBUG + stats->n_lc_nodes++; +#endif + if (!lc_is_terminal(&node->lc_node)) + node_stats(node->lc_node.ptr.child, depth + 1, stats); +#ifndef NDEBUG + else + stats->n_entries++; +#endif + } + else { + unsigned i; + unsigned nchildren = count_bits(node->tbm_node.ext_bm); +#ifndef NDEBUG + unsigned ndata = count_bits(node->tbm_node.int_bm); + + stats->n_tbm_nodes++; + stats->n_entries += ndata; + stats->alloc_data += ndata * sizeof(void *); + stats->alloc_waste += (ndata % 2) * sizeof(void *); +#endif + for (i = 0; i < nchildren; i++) + node_stats(&node->tbm_node.ptr.children[i], depth + 1, stats); + } +} +#endif /* BTRIE_EXTENDED_STATS */ + +#ifndef NDEBUG +static size_t +count_free(const struct btrie *btrie) +{ + size_t total = 0; + unsigned sz; + for (sz = 1; sz <= MAX_CHILD_ARRAY_LEN; sz++) { + const struct free_hunk *free = btrie->free_list[sz - 1]; + size_t n; + for (n = 0; free; n++) + free = free->next; + total += sz * n; + } + return total * sizeof(node_t); +} +#endif /* not NDEBUG */ + + +const char * +btrie_stats(const struct btrie *btrie) +{ + static char buf[128]; + size_t n_nodes = btrie->n_lc_nodes + btrie->n_tbm_nodes; + size_t alloc_free = (btrie->alloc_total + + sizeof(node_t) /* do not double-count the root node */ + - n_nodes * sizeof(node_t) + - btrie->alloc_data + - btrie->alloc_waste + - sizeof(*btrie)); +#ifdef BTRIE_EXTENDED_STATS + struct stats stats; + double average_depth; + + memset(&stats, 0, sizeof(stats)); + node_stats(&btrie->root, 0, &stats); + average_depth = (double)stats.total_depth / n_nodes; + +#ifndef NDEBUG + /* check the node counts */ + assert(stats.n_lc_nodes == btrie->n_lc_nodes); + assert(stats.n_tbm_nodes == btrie->n_tbm_nodes); + assert(stats.n_entries == btrie->n_entries); + assert(stats.alloc_data == btrie->alloc_data); + assert(stats.alloc_waste == btrie->alloc_waste); +#endif /* not NDEBUG */ +#endif /* BTRIE_EXTENDED_STATS */ + +#ifndef NDEBUG + /* check that we haven't lost any memory */ + assert(alloc_free == count_free(btrie)); +#endif + +#ifdef BTRIE_DEBUG_ALLOC + dump_alloc_hist(btrie); +#endif + + snprintf(buf, sizeof(buf), + "ents=%lu tbm=%lu lc=%lu mem=%.0fk free=%lu waste=%lu" +#ifdef BTRIE_EXTENDED_STATS + " depth=%.1f/%lu" +#endif + , + (long unsigned)btrie->n_entries, + (long unsigned)btrie->n_tbm_nodes, + (long unsigned)btrie->n_lc_nodes, + (double)btrie->alloc_total / 1024, + (long unsigned)alloc_free, + (long unsigned)btrie->alloc_waste +#ifdef BTRIE_EXTENDED_STATS + , average_depth, (long unsigned)stats.max_depth +#endif + ); + + buf[sizeof(buf) - 1] = '\0'; + return buf; +} + + +/****************************************************************/ + + +#ifndef NO_MASTER_DUMP + +struct walk_context { + btrie_walk_cb_t *callback; + void *user_data; + + btrie_oct_t prefix[(BTRIE_MAX_PREFIX + 7) / 8]; +}; + +static void +walk_node(const node_t *node, unsigned pos, struct walk_context *ctx); + +static void +walk_tbm_node(const struct tbm_node *node, unsigned pos, + unsigned pfx, unsigned plen, + struct walk_context *ctx) +{ + btrie_oct_t *prefix = ctx->prefix; + int pbyte = pos / 8; + btrie_oct_t pbit = 0x80 >> (pos % 8); + const void **data_p = tbm_data_p(node, pfx, plen); + + if (pos >= BTRIE_MAX_PREFIX) { + /* This can/should not happen, but don't overwrite buffers if it does. */ + return; + } + + if (data_p) + ctx->callback(prefix, pos, *data_p, 0, ctx->user_data); + + /* walk children */ + if (plen < TBM_STRIDE - 1) { + /* children are internal prefixes in same node */ + walk_tbm_node(node, pos + 1, pfx << 1, plen + 1, ctx); + prefix[pbyte] |= pbit; + walk_tbm_node(node, pos + 1, (pfx << 1) + 1, plen + 1, ctx); + prefix[pbyte] &= ~pbit; + } + else { + /* children are extending paths */ + const node_t *ext_path; + if ((ext_path = tbm_ext_path(node, pfx << 1)) != NULL) + walk_node(ext_path, pos + 1, ctx); + if ((ext_path = tbm_ext_path(node, (pfx << 1) + 1)) != NULL) { + prefix[pbyte] |= pbit; + walk_node(ext_path, pos + 1, ctx); + prefix[pbyte] &= ~pbit; + } + } + + if (data_p) + ctx->callback(prefix, pos, *data_p, 1, ctx->user_data); +} + +static void +walk_lc_node(const struct lc_node *node, unsigned pos, + struct walk_context *ctx) +{ + btrie_oct_t *prefix = ctx->prefix; + unsigned end = pos + lc_len(node); + btrie_oct_t save_prefix = prefix[lc_shift(pos)]; + + if (end > BTRIE_MAX_PREFIX) { + /* This can/should not happen, but don't overwrite buffers if it does. */ + return; + } + + /* construct full prefix to node */ + memcpy(&prefix[lc_shift(pos)], node->prefix, lc_bytes(node, pos)); + if (end % 8) + prefix[end / 8] &= high_bits(end % 8); + + if (lc_is_terminal(node)) { + ctx->callback(prefix, end, node->ptr.data, 0, ctx->user_data); + ctx->callback(prefix, end, node->ptr.data, 1, ctx->user_data); + } + else + walk_node(node->ptr.child, end, ctx); + + prefix[lc_shift(pos)] = save_prefix; /* restore parents prefix */ + if (lc_bytes(node, pos) > 1) + memset(&prefix[lc_shift(pos) + 1], 0, lc_bytes(node, pos) - 1); +} + +static void +walk_node(const node_t *node, unsigned pos, struct walk_context *ctx) +{ + if (is_lc_node(node)) + walk_lc_node(&node->lc_node, pos, ctx); + else + walk_tbm_node(&node->tbm_node, pos, 0, 0, ctx); +} + +/* walk trie in lexicographical order + * + * calls callback twice (once preorder, once postorder) at each prefix + */ +void +btrie_walk(const struct btrie *btrie, + btrie_walk_cb_t *callback, void *user_data) +{ + struct walk_context ctx; + + memset(&ctx, 0, sizeof(ctx)); + ctx.callback = callback; + ctx.user_data = user_data; + + walk_node(&btrie->root, 0, &ctx); +} + + +#endif /* not NO_MASTER_DUMP */ + + +#ifdef TEST +/***************************************************************** + * + * Unit tests + * + */ +#include + +#ifndef UNUSED +# define UNUSED __attribute__((unused)) +#endif + +/* bogus replacements mp_alloc for running self-tests */ +void * +mp_alloc(UNUSED struct mempool *mp, unsigned sz, UNUSED int align) +{ + return malloc(sz); +} + + +#if 0 +# define PASS(name) puts("OK " name) +#else +# define PASS(name) fputs(".", stdout); fflush(stdout) +#endif + +const char * pgm_name = "???"; + +static void +test_struct_node_packing() +{ + node_t node; + + assert(sizeof(struct tbm_node) == 2 * sizeof(void *)); + assert(sizeof(struct lc_node) == 2 * sizeof(void *)); + assert(sizeof(node_t) == 2 * sizeof(void *)); + + /* The lc_node bit must be an alias for bit zero of int_bm, since + * that is the only unused bit in the TBM node structure. + */ + memset(&node, 0, sizeof(node)); + assert(node.tbm_node.int_bm == 0); + lc_init_flags(&node.lc_node, 0, 0); + assert(node.tbm_node.int_bm == bit(0)); + + PASS("test_struct_node_packing"); +} + +static void +test_bit() +{ + tbm_bitmap_t ones = ~(tbm_bitmap_t)0; + tbm_bitmap_t high_bit = ones ^ (ones >> 1); + + assert(bit(0) == high_bit); + assert(bit(1) == high_bit >> 1); + assert(bit(8 * sizeof(tbm_bitmap_t) - 1) == 1); + PASS("test_bit"); +} + +static void +test_count_bits() +{ + unsigned max_bits = sizeof(tbm_bitmap_t) * 8; + tbm_bitmap_t ones = ~(tbm_bitmap_t)0; + + assert(count_bits(0) == 0); + assert(count_bits(1) == 1); + assert(count_bits(2) == 1); + assert(count_bits(3) == 2); + assert(count_bits(ones) == max_bits); + assert(count_bits(~1) == max_bits - 1); + + /* count_bits(0x5555....) */ + assert(count_bits(ones / 3) == max_bits / 2); + /* count_bits(0x3333...) */ + assert(count_bits(ones / 5) == max_bits / 2); + /* count_bits(0x0f0f...) */ + assert(count_bits(ones / 17) == max_bits / 2); + /* count_bits(0x1010...) */ + assert(count_bits(ones / 255) == max_bits / 8); + + PASS("test_count_bits"); +} + +static void +test_count_bits_before() +{ + unsigned max_bits = sizeof(tbm_bitmap_t) * 8; + tbm_bitmap_t ones = ~(tbm_bitmap_t)0; + unsigned i; + + for (i = 0; i < max_bits; i++) { + assert(count_bits_before(0, i) == 0); + assert(count_bits_before(ones, i) == i); + } + + PASS("test_count_bits_before"); +} + +static void +test_count_bits_from() +{ + unsigned max_bits = sizeof(tbm_bitmap_t) * 8; + tbm_bitmap_t ones = ~(tbm_bitmap_t)0; + unsigned i; + + for (i = 0; i < max_bits; i++) { + assert(count_bits_from(0, i) == 0); + assert(count_bits_from(ones, i) == max_bits - i); + } + + PASS("test_count_bits_from"); +} + +static void +test_extract_bits() +{ + static btrie_oct_t prefix[] = { 0xff, 0x55, 0xaa, 0x00 }; + unsigned i; + + for (i = 0; i < 32; i++) + assert(extract_bits(prefix, i, 0) == 0); + + for (i = 0; i < 8; i++) + assert(extract_bits(prefix, i, 1) == 1); + for (i = 8; i < 16; i++) + assert(extract_bits(prefix, i, 1) == i % 2); + for (i = 16; i < 24; i++) + assert(extract_bits(prefix, i, 1) == (i + 1) % 2); + for (i = 24; i < 32; i++) + assert(extract_bits(prefix, i, 1) == 0); + + + assert(extract_bits(prefix, 2, 6) == 0x3f); + assert(extract_bits(prefix, 3, 6) == 0x3e); + assert(extract_bits(prefix, 4, 6) == 0x3d); + assert(extract_bits(prefix, 5, 6) == 0x3a); + assert(extract_bits(prefix, 6, 6) == 0x35); + assert(extract_bits(prefix, 7, 6) == 0x2a); + assert(extract_bits(prefix, 8, 6) == 0x15); + + PASS("test_extract_bits"); +} + +static void +test_high_bits() +{ + assert(high_bits(0) == 0x00); + assert(high_bits(1) == 0x80); + assert(high_bits(2) == 0xc0); + assert(high_bits(3) == 0xe0); + assert(high_bits(4) == 0xf0); + assert(high_bits(5) == 0xf8); + assert(high_bits(6) == 0xfc); + assert(high_bits(7) == 0xfe); + assert(high_bits(8) == 0xff); + PASS("test_high_bits"); +} + +static void +test_prefixes_equal() +{ + btrie_oct_t prefix1[LC_BYTES_PER_NODE]; + btrie_oct_t prefix2[LC_BYTES_PER_NODE]; + unsigned i; + memset(prefix1, 0xaa, LC_BYTES_PER_NODE); + memset(prefix2, 0xaa, LC_BYTES_PER_NODE); + + for (i = 0; i < 8 * LC_BYTES_PER_NODE; i++) { + assert(prefixes_equal(prefix1, prefix2, i)); + prefix1[i / 8] ^= 1 << (7 - i % 8); + assert(!prefixes_equal(prefix1, prefix2, 8 * LC_BYTES_PER_NODE)); + assert(prefixes_equal(prefix1, prefix2, i)); + if (i + 1 < 8 * LC_BYTES_PER_NODE) + assert(!prefixes_equal(prefix1, prefix2, i + 1)); + prefix1[i / 8] ^= 1 << (7 - i % 8); + } + PASS("test_prefixes_equal"); +} + +static void +test_common_prefix() +{ + btrie_oct_t prefix1[LC_BYTES_PER_NODE]; + btrie_oct_t prefix2[LC_BYTES_PER_NODE]; + unsigned i; + memset(prefix1, 0x55, LC_BYTES_PER_NODE); + memset(prefix2, 0x55, LC_BYTES_PER_NODE); + + for (i = 0; i < 8 * LC_BYTES_PER_NODE; i++) { + assert(common_prefix(prefix1, prefix2, i) == i); + prefix1[i / 8] ^= 1 << (7 - i % 8); + assert(common_prefix(prefix1, prefix2, 8 * LC_BYTES_PER_NODE) == i); + if (i + 1 < 8 * LC_BYTES_PER_NODE) + assert(common_prefix(prefix1, prefix2, i+1) == i); + prefix1[i / 8] ^= 1 << (7 - i % 8); + } + PASS("test_common_prefix"); +} + + +static void +test_base_index() +{ + assert(base_index(0,0) == 1); + assert(base_index(0,1) == 2); + assert(base_index(1,1) == 3); + assert(base_index(0,2) == 4); + assert(base_index(1,2) == 5); + assert(base_index(2,2) == 6); + assert(base_index(3,2) == 7); + PASS("test_base_index"); +} + +static void +test_has_internal_data() +{ + struct tbm_node node; + unsigned plen, pfx, bi; + for (plen = 0; plen < TBM_STRIDE; plen++) { + for (pfx = 0; pfx < 1U << plen; pfx++) { + tbm_bitmap_t ancestor_mask = 0; + for (bi = base_index(pfx, plen); bi; bi >>= 1) { + node.int_bm = bit(bi); + ancestor_mask |= bit(bi); + assert(has_internal_data(&node, pfx, plen)); + } + node.int_bm = ~ancestor_mask; + assert(!has_internal_data(&node, pfx, plen)); + } + } + PASS("test_has_internal_data"); +} + +/****************************************************************/ +static const btrie_oct_t numbered_bytes[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, +}; + +static void +check_non_terminal_lc_node(struct lc_node *node, unsigned len) +{ + assert(is_lc_node((node_t *)node)); + assert(!lc_is_terminal(node)); + assert(lc_len(node) == len); +} + +static void +check_terminal_lc_node(struct lc_node *node, unsigned len, const void *data) +{ + assert(is_lc_node((node_t *)node)); + assert(lc_is_terminal(node)); + assert(lc_len(node) == len); + assert(node->ptr.data == data); +} + +static void +test_init_terminal_node() +{ + struct btrie *btrie = btrie_init(NULL); + const void *data = (void *)0xdeadbeef; + node_t node; + struct lc_node *head = &node.lc_node; + + init_terminal_node(btrie, &node, 0, + numbered_bytes, 8 * LC_BYTES_PER_NODE, data); + check_terminal_lc_node(head, 8 * LC_BYTES_PER_NODE, data); + assert(memcmp(head->prefix, numbered_bytes, LC_BYTES_PER_NODE) == 0); + + init_terminal_node(btrie, &node, 7, + numbered_bytes, 8 * LC_BYTES_PER_NODE, data); + check_terminal_lc_node(head, 8 * LC_BYTES_PER_NODE - 7, data); + assert(memcmp(head->prefix, numbered_bytes, LC_BYTES_PER_NODE) == 0); + + init_terminal_node(btrie, &node, 0, + numbered_bytes, 2 * 8 * LC_BYTES_PER_NODE, data); + check_non_terminal_lc_node(head, 8 * LC_BYTES_PER_NODE); + assert(memcmp(head->prefix, numbered_bytes, LC_BYTES_PER_NODE) == 0); + { + struct lc_node *child = &head->ptr.child->lc_node; + check_terminal_lc_node(child, 8 * LC_BYTES_PER_NODE, data); + assert(memcmp(child->prefix, &numbered_bytes[LC_BYTES_PER_NODE], + LC_BYTES_PER_NODE) == 0); + } + + init_terminal_node(btrie, &node, 15, + numbered_bytes, 8 * LC_BYTES_PER_NODE + 15, data); + check_non_terminal_lc_node(head, 8 * LC_BYTES_PER_NODE - 7); + assert(memcmp(head->prefix, &numbered_bytes[1], LC_BYTES_PER_NODE) == 0); + { + struct lc_node *child = &head->ptr.child->lc_node; + check_terminal_lc_node(child, 7, data); + assert(child->prefix[0] == numbered_bytes[LC_BYTES_PER_NODE + 1]); + } + + PASS("test_init_terminal_node"); +} + +static void +test_coalesce_lc_node() +{ + struct btrie *btrie = btrie_init(NULL); + const void *data = (void *)0xdeadbeef; + node_t node; + struct lc_node *head = &node.lc_node; + + /* test merging */ + init_terminal_node(btrie, &node, 0, + numbered_bytes, 8 * (LC_BYTES_PER_NODE + 1), data); + check_non_terminal_lc_node(head, LC_BYTES_PER_NODE * 8); + lc_add_to_len(head, -8); + coalesce_lc_node(btrie, head, 8); + check_terminal_lc_node(head, LC_BYTES_PER_NODE * 8, data); + assert(head->prefix[LC_BYTES_PER_NODE - 1] + == numbered_bytes[LC_BYTES_PER_NODE]); + + /* test bit stealing */ + init_terminal_node(btrie, &node, 0, + numbered_bytes, 8 * (2 * LC_BYTES_PER_NODE), data); + check_non_terminal_lc_node(head, LC_BYTES_PER_NODE * 8); + lc_add_to_len(head, -15); + coalesce_lc_node(btrie, head, 15); + check_non_terminal_lc_node(head, LC_BYTES_PER_NODE * 8 - 7); + assert(memcmp(head->prefix, numbered_bytes, LC_BYTES_PER_NODE - 1) == 0); + assert(head->prefix[LC_BYTES_PER_NODE - 1] + == numbered_bytes[LC_BYTES_PER_NODE]); + { + struct lc_node *child = &head->ptr.child->lc_node; + check_terminal_lc_node(child, 8 * (LC_BYTES_PER_NODE - 1), data); + assert(memcmp(child->prefix, &numbered_bytes[LC_BYTES_PER_NODE + 1], + LC_BYTES_PER_NODE - 1) == 0); + } + + PASS("test_coalesce_lc_node"); +} + +static void +test_shorten_lc_node() +{ + struct btrie *btrie = btrie_init(NULL); + const void *data = (void *)0xdeadbeef; + node_t node, shorter; + + /* test shorten without shift */ + init_terminal_node(btrie, &node, 0, + numbered_bytes, 8 * LC_BYTES_PER_NODE, data); + memset(shorter.lc_node.prefix, 0xff, LC_BYTES_PER_NODE); + shorten_lc_node(btrie, &shorter, 7, &node.lc_node, 0); + check_terminal_lc_node(&shorter.lc_node, LC_BYTES_PER_NODE * 8 - 7, data); + assert(memcmp(shorter.lc_node.prefix, numbered_bytes, LC_BYTES_PER_NODE) + == 0); + + /* test shorten with shift */ + init_terminal_node(btrie, &node, 7, + numbered_bytes, 8 * LC_BYTES_PER_NODE, data); + memset(shorter.lc_node.prefix, 0xff, LC_BYTES_PER_NODE); + shorten_lc_node(btrie, &shorter, 9, &node.lc_node, 7); + check_terminal_lc_node(&shorter.lc_node, LC_BYTES_PER_NODE * 8 - 9, data); + assert(memcmp(shorter.lc_node.prefix, &numbered_bytes[1], + LC_BYTES_PER_NODE - 1) == 0); + + { + /* test child stealing */ + struct lc_node head; + node_t tail, shorter; + + lc_init_flags(&head, 0, 7); + head.ptr.child = &tail; + init_empty_node(btrie, &tail); + + shorten_lc_node(btrie, &shorter, 7, &head, 0); + assert(is_empty_node(&shorter)); + } + + PASS("test_shorten_lc_node"); +} + +static void +test_split_lc_node() +{ + struct btrie *btrie = btrie_init(NULL); + const void *data = (void *)0xdeadbeef; + struct lc_node node; + + init_terminal_node(btrie, (node_t *)&node, 1, numbered_bytes, 25, data); + split_lc_node(btrie, &node, 1, 8); + check_non_terminal_lc_node(&node, 8); + check_terminal_lc_node(&node.ptr.child->lc_node, 16, data); + + /* test conversion of terminal to non-terminal */ + init_terminal_node(btrie, (node_t *)&node, 7, numbered_bytes, 10, data); + split_lc_node(btrie, &node, 7, 3); + check_non_terminal_lc_node(&node, 3); + check_terminal_lc_node(&node.ptr.child->lc_node, 0, data); + + PASS("test_split_lc_node"); +} + +static void +test_convert_lc_node_1() +{ + struct btrie *btrie = btrie_init(NULL); + const void *data = (void *)0xdeadbeef; + struct lc_node head; + + /* test tail is left */ + lc_init_flags(&head, 0, 1); + head.prefix[0] = 0; + head.ptr.child = alloc_nodes(btrie, 1, 0); + init_terminal_node(btrie, head.ptr.child, 1, numbered_bytes, 1, data); + convert_lc_node_1(btrie, &head, 0); + { + node_t *result = (node_t *)&head; + assert(is_tbm_node(result)); + assert(result->tbm_node.ext_bm == 0); + assert(result->tbm_node.int_bm == bit(base_index(0, 1))); + assert(*tbm_data_p(&result->tbm_node, 0, 1) == data); + } + + /* test tail is right */ + lc_init_flags(&head, 0, 1); + head.prefix[0] = 1; + head.ptr.child = alloc_nodes(btrie, 1, 0); + init_terminal_node(btrie, head.ptr.child, 8, numbered_bytes, 10, data); + convert_lc_node_1(btrie, &head, 7); + { + node_t *result = (node_t *)&head; + assert(is_tbm_node(result)); + assert(result->tbm_node.ext_bm == 0); + assert(result->tbm_node.int_bm == bit(base_index(4, 3))); + assert(*tbm_data_p(&result->tbm_node, 4, 3) == data); + } + + PASS("test_convert_lc_node_1"); +} + +static void +test_convert_lc_node() +{ + struct btrie *btrie = btrie_init(NULL); + const void *data = (void *)0xdeadbeef; + node_t node; + + /* if (len >= TBM_STRIDE) */ + init_terminal_node(btrie, &node, 7, numbered_bytes, TBM_STRIDE + 7, data); + convert_lc_node(btrie, &node.lc_node, 7); + assert(is_tbm_node(&node)); + assert(node.tbm_node.ext_bm == bit(0)); + assert(node.tbm_node.int_bm == 0); + check_terminal_lc_node(&tbm_ext_path(&node.tbm_node, 0)->lc_node, 0, data); + + /* if (lc_is_terminal(node)) */ + init_terminal_node(btrie, &node, 0, numbered_bytes, 0, data); + convert_lc_node(btrie, &node.lc_node, 0); + assert(is_tbm_node(&node)); + assert(node.tbm_node.ext_bm == 0); + assert(node.tbm_node.int_bm == bit(base_index(0, 0))); + assert(*tbm_data_p(&node.tbm_node, 0, 0) == data); + + /* else */ + lc_init_flags(&node.lc_node, 0, TBM_STRIDE - 1); + node.lc_node.prefix[0] = 0; + node.lc_node.ptr.child = alloc_nodes(btrie, 1, 0); + init_empty_node(btrie, node.lc_node.ptr.child); + tbm_insert_data(btrie, &node.lc_node.ptr.child->tbm_node, 0, 0, data); + + convert_lc_node(btrie, &node.lc_node, 0); + assert(is_tbm_node(&node)); + assert(node.tbm_node.ext_bm == 0); + assert(node.tbm_node.int_bm == bit(base_index(0, TBM_STRIDE - 1))); + assert(*tbm_data_p(&node.tbm_node, 0, TBM_STRIDE - 1) == data); + + PASS("test_convert_lc_node"); +} + +static void +test_insert_lc_node() +{ + struct btrie *btrie = btrie_init(NULL); + const void *data = (void *)0xdeadbeef; + node_t node, tail; + + /* test optimized case, last_bit == 0 */ + init_terminal_node(btrie, &tail, 9, numbered_bytes, 17, data); + insert_lc_node(btrie, &node, 8, 0, 0, &tail); + check_terminal_lc_node(&node.lc_node, 9, data); + assert(memcmp(node.lc_node.prefix, &numbered_bytes[1], 2) == 0); + + /* test optimized case, last_bit == 1 */ + init_terminal_node(btrie, &tail, 7, &numbered_bytes[0x12], 15, data); + insert_lc_node(btrie, &node, 6, 0x10, 1, &tail); + check_terminal_lc_node(&node.lc_node, 9, data); + assert(node.lc_node.prefix[0] == 0x12); + assert(node.lc_node.prefix[1] == 0x13); + + /* test with shift */ + init_terminal_node(btrie, &tail, 0, numbered_bytes, 8, data); + insert_lc_node(btrie, &node, 7, 0x40, 1, &tail); + check_terminal_lc_node(&node.lc_node, 9, data); + assert(node.lc_node.prefix[0] == 0x41); + assert(node.lc_node.prefix[1] == numbered_bytes[0]); + + /* test with TBM node */ + init_empty_node(btrie, &tail); + insert_lc_node(btrie, &node, 6, 0x40, 0, &tail); + check_non_terminal_lc_node(&node.lc_node, 1); + assert(is_tbm_node(node.lc_node.ptr.child)); + + PASS("test_insert_lc_node"); +} + +static void +test_next_pbyte() +{ + assert(next_pbyte(0xff, 0, 1) == 0x80 >> (TBM_STRIDE - 1)); + assert(next_pbyte(0xff, 1, 1) == (0x80 | (0x80 >> TBM_STRIDE))); + assert(next_pbyte(0xff, 2, 1) == (0xc0 | (0x80 >> (TBM_STRIDE + 1)))); + assert(next_pbyte(0xff, 8 - TBM_STRIDE, 1) == 0); + assert(next_pbyte(0xff, 9 - TBM_STRIDE, 1) == 0x80); + + PASS("test_next_pbyte"); +} + +static void +test_init_tbm_node() +{ + struct btrie *btrie = btrie_init(NULL); + const void *data = (void *)0xdeadbeef; + unsigned lr; + node_t node; + + /* test root data */ + init_tbm_node(btrie, &node, 0, 0, &data, NULL, NULL); + assert(is_tbm_node(&node)); + assert(node.tbm_node.ext_bm == 0); + assert(node.tbm_node.int_bm == bit(base_index(0, 0))); + assert(*tbm_data_p(&node.tbm_node, 0, 0) == data); + + for (lr = 0; lr < 2; lr++) { + node_t child; + node_t *left = lr ? NULL : &child; + node_t *right = lr ? &child : NULL; + unsigned base = lr ? (1U << (TBM_STRIDE - 1)) : 0; + unsigned pfx; + + /* test with long LC node child */ + init_terminal_node(btrie, &child, 1, numbered_bytes, TBM_STRIDE + 1, data); + init_tbm_node(btrie, &node, 0, 0, NULL, left, right); + assert(is_tbm_node(&node)); + assert(node.tbm_node.ext_bm == bit(base)); + assert(node.tbm_node.int_bm == 0); + check_terminal_lc_node(&tbm_ext_path(&node.tbm_node, base)->lc_node, + 1, data); + + /* test with short LC node children */ + init_terminal_node(btrie, &child, 1, numbered_bytes, TBM_STRIDE - 1, data); + init_tbm_node(btrie, &node, 0, 0, NULL, left, right); + assert(is_tbm_node(&node)); + assert(node.tbm_node.ext_bm == 0); + assert(node.tbm_node.int_bm == bit(base_index(base >> 1, TBM_STRIDE-1))); + assert(*tbm_data_p(&node.tbm_node, base >> 1, TBM_STRIDE-1) == data); + + /* construct TBM node with all eight combinations of having data, + * left_ext and/or right_ext in its extending paths */ + init_empty_node(btrie, &child); + for (pfx = 0; pfx < 8; pfx++) { + if (pfx & 1) + tbm_insert_data(btrie, &child.tbm_node, pfx, TBM_STRIDE - 1, data); + if (pfx & 2) { + btrie_oct_t prefix0 = 0; + init_terminal_node(btrie, + tbm_insert_ext_path(btrie, &child.tbm_node, 2*pfx), + TBM_STRIDE + 1, + &prefix0, TBM_STRIDE + 2, data); + } + if (pfx & 4) { + btrie_oct_t prefix0 = 0x80 >> TBM_STRIDE; + init_terminal_node(btrie, + tbm_insert_ext_path(btrie, &child.tbm_node, 2*pfx+1), + TBM_STRIDE + 1, + &prefix0, TBM_STRIDE + 3, data); + } + } + init_tbm_node(btrie, &node, 0, 0, NULL, left, right); + for (pfx = 0; pfx < 8; pfx++) { + unsigned base = lr ? (1U << (TBM_STRIDE - 1)) : 0; + node_t *ext_path = tbm_ext_path(&node.tbm_node, base + pfx); + if (pfx == 0) + assert(ext_path == NULL); + else if (pfx == 1) + check_terminal_lc_node(&ext_path->lc_node, 0, data); + else if (pfx == 2) { + check_terminal_lc_node(&ext_path->lc_node, 2, data); + assert(ext_path->lc_node.prefix[0] == 0); + } + else if (pfx == 4) { + check_terminal_lc_node(&ext_path->lc_node, 3, data); + assert(ext_path->lc_node.prefix[0] == (0x80 >> TBM_STRIDE)); + } + else { + tbm_bitmap_t int_bm = 0; + assert(is_tbm_node(ext_path)); + if (pfx & 1) { + int_bm |= bit(base_index(0, 0)); + assert(*tbm_data_p(&ext_path->tbm_node, 0, 0) == data); + } + if (pfx & 2) { + int_bm |= bit(base_index(0, 2)); + assert(*tbm_data_p(&ext_path->tbm_node, 0, 2) == data); + } + if (pfx & 4) { + int_bm |= bit(base_index(4, 3)); + assert(*tbm_data_p(&ext_path->tbm_node, 4, 3) == data); + } + assert(ext_path->tbm_node.int_bm == int_bm); + } + } + } + + PASS("test_init_tbm_node"); +} + +static void +test_add_to_trie() +{ + struct btrie *btrie = btrie_init(NULL); + const void *data = (void *)0xdeadbeef; + enum btrie_result result; + unsigned pfx, plen; + node_t root; + + /* test initial insertion */ + init_empty_node(btrie, &root); + result = add_to_trie(btrie, &root, 0, + numbered_bytes, 8 * 2 * LC_BYTES_PER_NODE, data); + assert(result == BTRIE_OKAY); + check_non_terminal_lc_node(&root.lc_node, 8 * LC_BYTES_PER_NODE); + check_terminal_lc_node(&root.lc_node.ptr.child->lc_node, + 8 * LC_BYTES_PER_NODE, data); + + /* test can follow LC node to tail, and then detect duplicate prefix */ + result = add_to_trie(btrie, &root, 0, + numbered_bytes, 8 * 2 * LC_BYTES_PER_NODE, data); + assert(result == BTRIE_DUPLICATE_PREFIX); + + /* test can insert new TBM node within existing LC node */ + result = add_to_trie(btrie, &root, 0, + &numbered_bytes[1], 16, data); + assert(result == BTRIE_OKAY); + check_non_terminal_lc_node(&root.lc_node, 7); + assert(is_tbm_node(root.lc_node.ptr.child)); + + /* test can convert terminal LC node to TBM node */ + init_terminal_node(btrie, &root, 0, numbered_bytes, 12, data); + result = add_to_trie(btrie, &root, 0, numbered_bytes, 24, data); + assert(result == BTRIE_OKAY); + check_non_terminal_lc_node(&root.lc_node, 12); + assert(is_tbm_node(root.lc_node.ptr.child)); + + /* test can insert internal prefix data in TBM node */ + for (plen = 0; plen < TBM_STRIDE; plen++) { + for (pfx = 0; pfx < (1U << plen); pfx++) { + btrie_oct_t prefix0 = plen ? pfx << (8 - plen) : 0; + init_empty_node(btrie, &root); + init_terminal_node(btrie, tbm_insert_ext_path(btrie, &root.tbm_node, 0), + TBM_STRIDE, + numbered_bytes, 8, data); + result = add_to_trie(btrie, &root, 0, &prefix0, plen, data); + assert(result == BTRIE_OKAY); + assert(is_tbm_node(&root)); + assert(root.tbm_node.ext_bm == bit(0)); + assert(root.tbm_node.int_bm == bit(base_index(pfx, plen))); + assert(*tbm_data_p(&root.tbm_node, pfx, plen) == data); + + result = add_to_trie(btrie, &root, 0, &prefix0, plen, data); + assert(result == BTRIE_DUPLICATE_PREFIX); + } + } + + /* test can add extending paths to TBM node */ + for (pfx = 0; pfx < (1U << TBM_STRIDE); pfx++) { + btrie_oct_t prefix0 = pfx << (8 - TBM_STRIDE); + init_empty_node(btrie, &root); + tbm_insert_data(btrie, &root.tbm_node, 0, 0, data); + result = add_to_trie(btrie, &root, 0, &prefix0, 8, data); + assert(result == BTRIE_OKAY); + assert(is_tbm_node(&root)); + assert(root.tbm_node.ext_bm == bit(pfx)); + assert(root.tbm_node.int_bm == bit(base_index(0, 0))); + check_terminal_lc_node(&tbm_ext_path(&root.tbm_node, pfx)->lc_node, + 8 - TBM_STRIDE, data); + + result = add_to_trie(btrie, &root, 0, &prefix0, 8, data); + assert(result == BTRIE_DUPLICATE_PREFIX); + } + + /* test can follow extending path */ + init_empty_node(btrie, &root); + init_terminal_node(btrie, + tbm_insert_ext_path(btrie, &root.tbm_node, 0), TBM_STRIDE, + numbered_bytes, 8, data); + result = add_to_trie(btrie, &root, 0, numbered_bytes, 7, data); + assert(result == BTRIE_OKAY); + assert(root.tbm_node.ext_bm == bit(0)); + assert(root.tbm_node.int_bm == 0); + check_non_terminal_lc_node(&root.tbm_node.ptr.children[0].lc_node, + 7 - TBM_STRIDE); + + PASS("test_add_to_trie"); +} + +static void +test_search_trie() +{ + struct btrie *btrie = btrie_init(NULL); + const void *data01 = (void *)0xdead0001; + const void *data11 = (void *)0xdead0101; + const void *data = (void *)0xdeadbeef; + unsigned plen, pfx; + node_t root; + + /* test can follow chain of LC nodes to an exact match */ + init_empty_node(btrie, &root); + add_to_trie(btrie, &root, 0, + numbered_bytes, 8 * 2 * LC_BYTES_PER_NODE, data); + + assert(search_trie(&root, 0, numbered_bytes, 8 * 2 * LC_BYTES_PER_NODE) + == data); + assert(search_trie(&root, 0, numbered_bytes, 8 * 2 * LC_BYTES_PER_NODE + 1) + == data); + assert(search_trie(&root, 0, numbered_bytes, 8 * 2 * LC_BYTES_PER_NODE - 1) + == NULL); + assert(search_trie(&root, 0, &numbered_bytes[1], 8 * 2 * LC_BYTES_PER_NODE) + == NULL); + + /* test can follow extending path to an exact match */ + for (pfx = 0; pfx < (1U << TBM_STRIDE); pfx++) { + btrie_oct_t prefix0 = pfx << (8 - TBM_STRIDE); + init_empty_node(btrie, &root); + tbm_insert_data(btrie, &root.tbm_node, 0, 1, data01); + tbm_insert_data(btrie, &root.tbm_node, 1, 1, data11); + add_to_trie(btrie, &root, 0, &prefix0, 8, data); + assert(search_trie(&root, 0, &prefix0, 8) == data); + /* test that last matching TBM internal prefix gets picked up */ + if (prefix0 & 0x80) + assert(search_trie(&root, 0, &prefix0, 7) == data11); + else + assert(search_trie(&root, 0, &prefix0, 7) == data01); + prefix0 ^= 1 << (8 - TBM_STRIDE); + if (prefix0 & 0x80) + assert(search_trie(&root, 0, &prefix0, 8) == data11); + else + assert(search_trie(&root, 0, &prefix0, 8) == data01); + } + + /* test finding of TBM internal prefixes */ + init_empty_node(btrie, &root); + tbm_insert_data(btrie, &root.tbm_node, 0, 1, data01); + tbm_insert_data(btrie, &root.tbm_node, 1, 1, data11); + + assert(search_trie(&root, 0, numbered_bytes, 0) == NULL); + for (plen = 1; plen < TBM_STRIDE; plen++) { + for (pfx = 0; pfx < (1U << TBM_STRIDE); pfx++) { + btrie_oct_t prefix0 = pfx << (8 - plen); + if (prefix0 & 0x80) + assert(search_trie(&root, 0, &prefix0, plen) == data11); + else + assert(search_trie(&root, 0, &prefix0, plen) == data01); + } + } + + PASS("test_search_trie"); +} + +static int +unit_tests() +{ + test_struct_node_packing(); + test_bit(); + test_count_bits(); + test_count_bits_before(); + test_count_bits_from(); + test_extract_bits(); + test_high_bits(); + test_prefixes_equal(); + test_common_prefix(); + test_base_index(); + test_has_internal_data(); + + test_init_terminal_node(); + test_coalesce_lc_node(); + test_shorten_lc_node(); + test_split_lc_node(); + test_convert_lc_node_1(); + test_convert_lc_node(); + test_insert_lc_node(); + test_next_pbyte(); + test_init_tbm_node(); + test_add_to_trie(); + test_search_trie(); + + puts("\nOK"); + return 0; +} + +/***************************************************************** + * + * btrie_dump: print out the trie structure (for testing) + * + */ +#define INDENT_FILL "....:....|....:....|....:....|....:....|" + +static void dump_node(const node_t *node, unsigned pos, btrie_oct_t *prefix, + int indent); + +static void +dump_prefix(btrie_oct_t *prefix, unsigned len, int indent, const char *tail) +{ + unsigned i; + + printf("%*.*s0x", indent, indent, INDENT_FILL); + for (i = 0; i < len / 8; i++) + printf("%02x", prefix[i]); + if (len % 8) + printf("%02x", prefix[len / 8] & high_bits(len % 8)); + printf("/%u%s", len, tail); +} + +/* the opposite of extract_bits, sets a short string of bits from integer */ +static void +insert_bits(btrie_oct_t *prefix, unsigned pos, btrie_oct_t pfx, unsigned nbits) +{ + if (nbits != 0) { + unsigned v = (prefix[pos / 8] << 8) + prefix[pos / 8 + 1]; + unsigned mask = (1U << nbits) - 1; + unsigned shift = 16 - (pos % 8) - nbits; + v = (v & ~(mask << shift)) | (pfx << shift); + prefix[pos / 8] = v >> 8; + prefix[pos / 8 + 1] = (btrie_oct_t)v; + } +} + +static void +dump_tbm_node(const struct tbm_node *node, unsigned pos, + btrie_oct_t *prefix, int indent) +{ + unsigned pfx = 0, plen = 0; + + dump_prefix(prefix, pos, indent, " [tbm]\n"); + + for (;;) { + if (plen < TBM_STRIDE) { + const void **data_p = tbm_data_p(node, pfx, plen); + if (data_p) { + insert_bits(prefix, pos, pfx, plen); + dump_prefix(prefix, pos + plen, indent, ""); + printf(" [%u/%u] (%s)\n", pfx, plen, (const char *)*data_p); + } + plen++; + pfx <<= 1; + } + else { + const node_t *ext_path = tbm_ext_path(node, pfx); + if (ext_path) { + insert_bits(prefix, pos, pfx, TBM_STRIDE); + dump_node(ext_path, pos + TBM_STRIDE, prefix, indent + 1); + } + while (pfx & 1) { + if (--plen == 0) + return; + pfx >>= 1; + } + pfx++; + } + } +} + +static void +dump_lc_node(const struct lc_node *node, unsigned pos, + btrie_oct_t *prefix, int indent) +{ + unsigned end = pos + lc_len(node); + btrie_oct_t save_prefix = prefix[lc_shift(pos)]; + + memcpy(&prefix[lc_shift(pos)], node->prefix, lc_bytes(node, pos)); + + if (lc_is_terminal(node)) { + dump_prefix(prefix, end, indent, ""); + printf(" (%s)\n", (const char *)node->ptr.data); + } + else { + dump_prefix(prefix, end, indent, "\n"); + dump_node(node->ptr.child, end, prefix, indent + 1); + } + + prefix[lc_shift(pos)] = save_prefix; + if (lc_bytes(node, pos) > 1) + memset(&prefix[lc_shift(pos) + 1], 0, lc_bytes(node, pos) - 1); +} + +static void +dump_node(const node_t *node, unsigned pos, btrie_oct_t *prefix, int indent) +{ + if (is_lc_node(node)) + dump_lc_node(&node->lc_node, pos, prefix, indent); + else + dump_tbm_node(&node->tbm_node, pos, prefix, indent); +} + +static void +btrie_dump(struct btrie *btrie) +{ + btrie_oct_t prefix[(BTRIE_MAX_PREFIX + 7) / 8]; + + memset(prefix, 0, sizeof(prefix)); + dump_node(&btrie->root, 0, prefix, 0); + puts(btrie_stats(btrie)); +} + +/**************************************************************** + * + * test program - just enough to construct a trie and preform a lookup + * + */ + +#include + +static int +parse_prefix(const char *arg, btrie_oct_t prefix[16], unsigned *len) +{ + char addrbuf[128]; + return sscanf(arg, "%127[0-9a-fA-F:]/%u", addrbuf, len) == 2 + && inet_pton(AF_INET6, addrbuf, prefix) == 1; +} + + +static int +test_btrie(int argc, char *argv[]) +{ + struct btrie *btrie = btrie_init(NULL); + int i; + btrie_oct_t prefix[16]; + unsigned len; + + for (i = 1; i < argc-1; i++) { + if (!parse_prefix(argv[i], prefix, &len)) { + fprintf(stderr, "Can not parse arg '%s'\n", argv[i]); + return 1; + } + btrie_add_prefix(btrie, prefix, len, argv[i]); + } + + btrie_dump(btrie); + + + if (argc > 1) { + const void *data; + + if (!parse_prefix(argv[argc-1], prefix, &len)) { + fprintf(stderr, "Can not parse arg '%s'\n", argv[argc-1]); + return 1; + } + data = btrie_lookup(btrie, prefix, 128); + printf("lookup(%s) => %s\n", argv[argc-1], (const char *)data); + } + + return 0; +} + +int +main(int argc, char *argv[]) +{ + if ((pgm_name = strrchr(argv[0], '/')) != NULL) + pgm_name++; + else + pgm_name = argv[0]; + + if (argc > 1) + return test_btrie(argc, argv); + else + return unit_tests(); +} + +#endif /* TEST */ diff --git a/contrib/lc-btrie/btrie.h b/contrib/lc-btrie/btrie.h new file mode 100644 index 0000000..5df3433 --- /dev/null +++ b/contrib/lc-btrie/btrie.h @@ -0,0 +1,87 @@ +/* Level-Compressed Tree Bitmap (LC-TBM) Trie implementation + * + * Contributed by Geoffrey T. Dairiki + * + * This file is released under a "Three-clause BSD License". + * + * Copyright (c) 2013, Geoffrey T. Dairiki + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * * Neither the name of Geoffrey T. Dairiki nor the names of other + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GEOFFREY + * T. DAIRIKI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ +#ifndef _BTRIE_H_INCLUDED +#define _BTRIE_H_INCLUDED + +#include "config.h" + +#ifndef NO_STDINT_H +# include +typedef uint8_t btrie_oct_t; +#else +typedef unsigned char btrie_oct_t; +#endif + +/* maximum length of bit string btrie_walk() can handle + * + * note: this limit is necessitated by the use of fixed length buffers + * in btrie_walk() --- btrie_add_prefix() and btrie_lookup() impose no + * limit on the length of bitstrings + */ +#define BTRIE_MAX_PREFIX 128 + +struct btrie; +struct mempool; + +struct btrie * btrie_init(struct mempool *mp); + +enum btrie_result { + BTRIE_OKAY = 0, + BTRIE_ALLOC_FAILED = -1, + BTRIE_DUPLICATE_PREFIX = 1 +}; + +enum btrie_result btrie_add_prefix(struct btrie *btrie, + const btrie_oct_t *prefix, unsigned len, + const void *data); + +const void *btrie_lookup(const struct btrie *btrie, + const btrie_oct_t *pfx, unsigned len); + +const char *btrie_stats(const struct btrie *btrie); + +#ifndef NO_MASTER_DUMP +typedef void btrie_walk_cb_t(const btrie_oct_t *prefix, unsigned len, + const void *data, int post, void *user_data); + +void btrie_walk(const struct btrie *btrie, + btrie_walk_cb_t *callback, void *user_data); +#endif /* not NO_MASTER_DUMP */ + +#endif /* _BTRIE_H_INCLUDED */ diff --git a/contrib/lc-btrie/mempool.c b/contrib/lc-btrie/mempool.c new file mode 100644 index 0000000..3685fcd --- /dev/null +++ b/contrib/lc-btrie/mempool.c @@ -0,0 +1,108 @@ +/* memory pool implementation + */ + +#include +#include +#include "mempool.h" + +#define MEM_ALIGNMENT 16 /* Better for SSE */ +#define alignmask (MEM_ALIGNMENT-1) + +#define MEMPOOL_CHUNKSIZE (65536-sizeof(unsigned)*4) + +struct mempool_chunk { + char buf[MEMPOOL_CHUNKSIZE+MEM_ALIGNMENT]; + struct mempool_chunk *next; + unsigned size; +}; + +struct mempool_cfull { /* pseudo-chunk: one entry into full list */ + struct mempool_chunk *next; + char buf[1]; +}; + +void mp_init(struct mempool *mp) { + memset (mp, 0, sizeof (*mp)); +} + +void *mp_alloc(struct mempool *mp, unsigned size, int align) { + if (size >= MEMPOOL_CHUNKSIZE / 2) { + /* for large blocks, allocate separate "full" chunk */ + struct mempool_cfull *c = malloc (sizeof(*c)+size-1); + if (!c) { + return NULL; + } + c->next = mp->mp_fullc; + mp->mp_fullc = (struct mempool_chunk*)c; + return c->buf; + } + else { + struct mempool_chunk *c; + struct mempool_chunk *best; /* "best fit" chunk */ + unsigned avg; /* average data size: total size / numallocs */ + + ++mp->mp_nallocs; mp->mp_datasz += size; + avg = mp->mp_datasz / mp->mp_nallocs; + + /* round size up to a multiple of alignto */ + if (align) { + size = (size + alignmask) & ~alignmask; + } + + for(c = mp->mp_chunk, best = NULL; c; c = c->next) + if (c->size >= size && (!best || best->size > c->size)) { + best = c; + if (c->size - size < avg) { + break; + } + } + + if (best != NULL) { /* found a free chunk */ + char *b; + if (align) { + best->size &= ~alignmask; + } + + b = best->buf + MEMPOOL_CHUNKSIZE - best->size; + best->size -= size; + + if (best->size < avg) { + struct mempool_chunk **cp = &mp->mp_chunk; + while(*cp != best) { + cp = &(*cp)->next; + } + *cp = best->next; + best->next = mp->mp_fullc; + mp->mp_fullc = best; + } + return b; + } + + else { /* no sutable chunks -> allocate new one */ + c = (struct mempool_chunk *)malloc(sizeof(*c)); + if (!c) { + return NULL; + } + + c->next = mp->mp_chunk; + mp->mp_chunk = c; + c->size = MEMPOOL_CHUNKSIZE - size; + + return c->buf; + } + } +} + +void mp_free(struct mempool *mp) { + struct mempool_chunk *c; + + while((c = mp->mp_chunk) != NULL) { + mp->mp_chunk = c->next; + free(c); + } + + while((c = mp->mp_fullc) != NULL) { + mp->mp_fullc = c->next; + free(c); + } +} diff --git a/contrib/lc-btrie/mempool.h b/contrib/lc-btrie/mempool.h new file mode 100644 index 0000000..2d91ca6 --- /dev/null +++ b/contrib/lc-btrie/mempool.h @@ -0,0 +1,22 @@ +/* memory pool #include file + */ + +#ifndef _MEMPOOL_H_INCLUDED +#define _MEMPOOL_H_INCLUDED + +struct mempool_chunk; + +struct mempool { /* free-once memory pool. All members are private */ + struct mempool_chunk *mp_chunk; /* list of chunks with free space */ + struct mempool_chunk *mp_fullc; /* list of full chunks */ + unsigned mp_nallocs; /* number of allocs so far */ + unsigned mp_datasz; /* size of allocated data */ + const char *mp_lastbuf; /* last allocated string */ + unsigned mp_lastlen; /* length of lastbuf */ +}; + +void mp_init(struct mempool *mp); +void *mp_alloc(struct mempool *mp, unsigned size, int align); +void mp_free(struct mempool *mp); + +#endif diff --git a/contrib/zstd/CMakeLists.txt b/contrib/zstd/CMakeLists.txt new file mode 100644 index 0000000..9ba24f7 --- /dev/null +++ b/contrib/zstd/CMakeLists.txt @@ -0,0 +1,17 @@ +SET(ZSTDSRC divsufsort.c + entropy_common.c + fse_compress.c + fse_decompress.c + huf_compress.c + huf_decompress.c + zbuff_compress.c + zbuff_decompress.c + zdict.c + zstd_common.c + zstd_compress.c + zstd_decompress.c) + +ADD_LIBRARY(rmilter-zstd STATIC ${ZSTDSRC}) +if ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") + SET_TARGET_PROPERTIES(rmilter-zstd PROPERTIES COMPILE_FLAGS "-O3") +endif () diff --git a/contrib/zstd/LICENSE b/contrib/zstd/LICENSE new file mode 100644 index 0000000..a793a80 --- /dev/null +++ b/contrib/zstd/LICENSE @@ -0,0 +1,30 @@ +BSD License + +For Zstandard software + +Copyright (c) 2016-present, Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/zstd/PATENTS b/contrib/zstd/PATENTS new file mode 100644 index 0000000..15b4a2e --- /dev/null +++ b/contrib/zstd/PATENTS @@ -0,0 +1,33 @@ +Additional Grant of Patent Rights Version 2 + +"Software" means the Zstandard software distributed by Facebook, Inc. + +Facebook, Inc. ("Facebook") hereby grants to each recipient of the Software +("you") a perpetual, worldwide, royalty-free, non-exclusive, irrevocable +(subject to the termination provision below) license under any Necessary +Claims, to make, have made, use, sell, offer to sell, import, and otherwise +transfer the Software. For avoidance of doubt, no license is granted under +Facebook’s rights in any patent claims that are infringed by (i) modifications +to the Software made by you or any third party or (ii) the Software in +combination with any software or other technology. + +The license granted hereunder will terminate, automatically and without notice, +if you (or any of your subsidiaries, corporate affiliates or agents) initiate +directly or indirectly, or take a direct financial interest in, any Patent +Assertion: (i) against Facebook or any of its subsidiaries or corporate +affiliates, (ii) against any party if such Patent Assertion arises in whole or +in part from any software, technology, product or service of Facebook or any of +its subsidiaries or corporate affiliates, or (iii) against any party relating +to the Software. Notwithstanding the foregoing, if Facebook or any of its +subsidiaries or corporate affiliates files a lawsuit alleging patent +infringement against you in the first instance, and you respond by filing a +patent infringement counterclaim in that lawsuit against that party that is +unrelated to the Software, the license granted hereunder will not terminate +under section (i) of this paragraph due to such counterclaim. + +A "Necessary Claim" is a claim of a patent owned by Facebook that is +necessarily infringed by the Software standing alone. + +A "Patent Assertion" is any lawsuit or other action alleging direct, indirect, +or contributory infringement or inducement to infringe any patent, including a +cross-claim or counterclaim. diff --git a/contrib/zstd/README.md b/contrib/zstd/README.md new file mode 100644 index 0000000..e4af990 --- /dev/null +++ b/contrib/zstd/README.md @@ -0,0 +1,94 @@ + **Zstd**, short for Zstandard, is a fast lossless compression algorithm, + targeting real-time compression scenarios at zlib-level and better compression ratios. + +It is provided as an open-source BSD-licensed **C** library. +For other programming languages, +you can consult a list of known ports on [Zstandard homepage](http://www.zstd.net/#other-languages). + +|Branch |Status | +|------------|---------| +|master | [![Build Status](https://travis-ci.org/facebook/zstd.svg?branch=master)](https://travis-ci.org/facebook/zstd) | +|dev | [![Build Status](https://travis-ci.org/facebook/zstd.svg?branch=dev)](https://travis-ci.org/facebook/zstd) | + +As a reference, several fast compression algorithms were tested and compared on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by @inikep compiled with gcc 5.4.0, with the [Silesia compression corpus]. + +[lzbench]: https://github.com/inikep/lzbench +[Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia + + +|Name | Ratio | C.speed | D.speed | +|-----------------|-------|--------:|--------:| +| | | MB/s | MB/s | +|**zstd 0.8.2 -1**|**2.877**|**330**| **940** | +| [zlib] 1.2.8 -1 | 2.730 | 95 | 360 | +| brotli 0.4 -0 | 2.708 | 320 | 375 | +| QuickLZ 1.5 | 2.237 | 510 | 605 | +| LZO 2.09 | 2.106 | 610 | 870 | +| [LZ4] r131 | 2.101 | 620 | 3100 | +| Snappy 1.1.3 | 2.091 | 480 | 1600 | +| LZF 3.6 | 2.077 | 375 | 790 | + +[zlib]:http://www.zlib.net/ +[LZ4]: http://www.lz4.org/ + +Zstd can also offer stronger compression ratios at the cost of compression speed. +Speed vs Compression trade-off is configurable by small increment. Decompression speed is preserved and remain roughly the same at all settings, a property shared by most LZ compression algorithms, such as [zlib] or lzma. + +The following tests were run on a Core i7-3930K CPU @ 4.5GHz, using [lzbench], an open-source in-memory benchmark by @inikep compiled with gcc 5.2.1, on the [Silesia compression corpus]. + +Compression Speed vs Ratio | Decompression Speed +---------------------------|-------------------- +![Compression Speed vs Ratio](images/Cspeed4.png "Compression Speed vs Ratio") | ![Decompression Speed](images/Dspeed4.png "Decompression Speed") + +Several algorithms can produce higher compression ratio but at slower speed, falling outside of the graph. +For a larger picture including very slow modes, [click on this link](images/DCspeed5.png) . + + +### The case for Small Data compression + +Previous charts provide results applicable to typical files and streams scenarios (several MB). Small data come with different perspectives. The smaller the amount of data to compress, the more difficult it is to achieve any significant compression. + +This problem is common to any compression algorithm. The reason is, compression algorithms learn from past data how to compress future data. But at the beginning of a new file, there is no "past" to build upon. + +To solve this situation, Zstd offers a __training mode__, which can be used to tune the algorithm for a selected type of data, by providing it with a few samples. The result of the training is stored in a file called "dictionary", which can be loaded before compression and decompression. Using this dictionary, the compression ratio achievable on small data improves dramatically : + +![Compressing Small Data](images/smallData.png "Compressing Small Data") + +These compression gains are achieved while simultaneously providing faster compression and decompression speeds. + +Dictionary work if there is some correlation in a family of small data (there is no _universal dictionary_). +Hence, deploying one dictionary per type of data will provide the greater benefits. Dictionary gains are mostly effective in the first few KB. Then, the compression algorithm will rely more and more on previously decoded content to compress the rest of the file. + +#### Dictionary compression How To : + +1) Create the dictionary + +`zstd --train FullPathToTrainingSet/* -o dictionaryName` + +2) Compress with dictionary + +`zstd FILE -D dictionaryName` + +3) Decompress with dictionary + +`zstd --decompress FILE.zst -D dictionaryName` + +### Status + +Zstandard is currently deployed within Facebook. It is used daily to compress and decompress very large amount of data in multiple formats and use cases. +Zstandard is considered safe for production environments. + +### License + +Zstandard is [BSD-licensed](LICENSE). We also provide an [additional patent grant](PATENTS). + +### Contributing + +The "dev" branch is the one where all contributions will be merged before reaching "master". +If you plan to propose a patch, please commit into the "dev" branch or its own feature branch. +Direct commit to "master" are not permitted. +For more information, please read [CONTRIBUTING](CONTRIBUTING.md). + +### Miscellaneous + +Zstd entropy stage is provided by [Huff0 and FSE, from Finite State Entropy library](https://github.com/Cyan4973/FiniteStateEntropy). diff --git a/contrib/zstd/bitstream.h b/contrib/zstd/bitstream.h new file mode 100644 index 0000000..e96798f --- /dev/null +++ b/contrib/zstd/bitstream.h @@ -0,0 +1,414 @@ +/* ****************************************************************** + bitstream + Part of FSE library + header file (to include) + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/*-**************************************** +* Dependencies +******************************************/ +#include "mem.h" /* unaligned access routines */ +#include "error_private.h" /* error codes and messages */ + + +/*========================================= +* Target specific +=========================================*/ +#if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +#endif + + +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. +* A critical property of these streams is that they encode and decode in **reverse** direction. +* So the first bit sequence you add will be the last to be read, like a LIFO stack. +*/ +typedef struct +{ + size_t bitContainer; + int bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ + + +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef struct +{ + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ + + +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/*-************************************************************** +* Internal functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (register U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse ( &r, val ); + return (unsigned) r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return 31 - __builtin_clz (val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; +# endif +} + +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */ + + +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(void*) + * @return : 0 if success, + otherwise an error code (can be tested using ERR_isError() ) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr); + if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! BIT_addBits() : + can add up to 26 bits into `bitC`. + Does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) +{ + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits) +{ + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast() : + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ +} + +/*! BIT_flushBits() : + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ +} + +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + + if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */ + + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); +} + + +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : +* Initialize a BIT_DStream_t. +* `bitD` : a pointer to an already allocated BIT_DStream_t structure. +* `srcSize` must be the *exact* size of the bitStream, in bytes. +* @return : size of stream (== srcSize) or an errorCode if a problem is detected +*/ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ + bitD->start = (const char*)srcBuffer; + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + } else { + bitD->start = (const char*)srcBuffer; + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + default:; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +{ + return bitContainer >> start; +} + +MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +{ +#if defined(__BMI__) && defined(__GNUC__) /* experimental */ +# if defined(__x86_64__) + if (sizeof(bitContainer)==8) + return _bextr_u64(bitContainer, start, nbBits); + else +# endif + return _bextr_u32(bitContainer, start, nbBits); +#else + return (bitContainer >> start) & BIT_mask[nbBits]; +#endif +} + +MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ + return bitContainer & BIT_mask[nbBits]; +} + +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified. + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted + */ + MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +{ +#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); +#else + U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); +#endif +} + +/*! BIT_lookBitsFast() : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1; + return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); +} + +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. + */ +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_readBitsFast() : +* unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_reloadDStream() : +* Refill `BIT_DStream_t` from src buffer previously defined (see BIT_initDStream() ). +* This function is safe, it guarantees it will not read beyond src buffer. +* @return : status of `BIT_DStream_t` internal register. + if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should not happen => corruption detected */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; + } + if (bitD->ptr == bitD->start) { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + return result; + } +} + +/*! BIT_endOfDStream() : +* @return Tells if DStream has exactly reached its end (all bits consumed). +*/ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ diff --git a/contrib/zstd/divsufsort.c b/contrib/zstd/divsufsort.c new file mode 100644 index 0000000..60cceb0 --- /dev/null +++ b/contrib/zstd/divsufsort.c @@ -0,0 +1,1913 @@ +/* + * divsufsort.c for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*- Compiler specifics -*/ +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#endif + +#if defined(_MSC_VER) +# pragma warning(disable : 4244) +# pragma warning(disable : 4127) /* C4127 : Condition expression is constant */ +#endif + + +/*- Dependencies -*/ +#include +#include +#include + +#include "divsufsort.h" + +/*- Constants -*/ +#if defined(INLINE) +# undef INLINE +#endif +#if !defined(INLINE) +# define INLINE __inline +#endif +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) +# undef ALPHABET_SIZE +#endif +#if !defined(ALPHABET_SIZE) +# define ALPHABET_SIZE (256) +#endif +#define BUCKET_A_SIZE (ALPHABET_SIZE) +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) +#if defined(SS_INSERTIONSORT_THRESHOLD) +# if SS_INSERTIONSORT_THRESHOLD < 1 +# undef SS_INSERTIONSORT_THRESHOLD +# define SS_INSERTIONSORT_THRESHOLD (1) +# endif +#else +# define SS_INSERTIONSORT_THRESHOLD (8) +#endif +#if defined(SS_BLOCKSIZE) +# if SS_BLOCKSIZE < 0 +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (0) +# elif 32768 <= SS_BLOCKSIZE +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (32767) +# endif +#else +# define SS_BLOCKSIZE (1024) +#endif +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ +#if SS_BLOCKSIZE == 0 +# define SS_MISORT_STACKSIZE (96) +#elif SS_BLOCKSIZE <= 4096 +# define SS_MISORT_STACKSIZE (16) +#else +# define SS_MISORT_STACKSIZE (24) +#endif +#define SS_SMERGE_STACKSIZE (32) +#define TR_INSERTIONSORT_THRESHOLD (8) +#define TR_STACKSIZE (64) + + +/*- Macros -*/ +#ifndef SWAP +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) +#endif /* SWAP */ +#ifndef MIN +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) +#endif /* MIN */ +#ifndef MAX +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) +#endif /* MAX */ +#define STACK_PUSH(_a, _b, _c, _d)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize++].d = (_d);\ + } while(0) +#define STACK_PUSH5(_a, _b, _c, _d, _e)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ + } while(0) +#define STACK_POP(_a, _b, _c, _d)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ + } while(0) +#define STACK_POP5(_a, _b, _c, _d, _e)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ + } while(0) +#define BUCKET_A(_c0) bucket_A[(_c0)] +#if ALPHABET_SIZE == 256 +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) +#else +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) +#endif + + +/*- Private Functions -*/ + +static const int lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +int +ss_ilg(int n) { +#if SS_BLOCKSIZE == 0 + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#elif SS_BLOCKSIZE < 256 + return lg_table[n]; +#else + return (n & 0xff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]; +#endif +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + +#if SS_BLOCKSIZE != 0 + +static const int sqq_table[256] = { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 +}; + +static INLINE +int +ss_isqrt(int x) { + int y, e; + + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } + e = (x & 0xffff0000) ? + ((x & 0xff000000) ? + 24 + lg_table[(x >> 24) & 0xff] : + 16 + lg_table[(x >> 16) & 0xff]) : + ((x & 0x0000ff00) ? + 8 + lg_table[(x >> 8) & 0xff] : + 0 + lg_table[(x >> 0) & 0xff]); + + if(e >= 16) { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if(e >= 24) { y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } else if(e >= 8) { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } else { + return sqq_table[x] >> 4; + } + + return (x < (y * y)) ? y - 1 : y; +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Compares two suffixes. */ +static INLINE +int +ss_compare(const unsigned char *T, + const int *p1, const int *p2, + int depth) { + const unsigned char *U1, *U2, *U1n, *U2n; + + for(U1 = T + depth + *p1, + U2 = T + depth + *p2, + U1n = T + *(p1 + 1) + 2, + U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) { + } + + return U1 < U1n ? + (U2 < U2n ? *U1 - *U2 : 1) : + (U2 < U2n ? -1 : 0); +} + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) + +/* Insertionsort for small size groups */ +static +void +ss_insertionsort(const unsigned char *T, const int *PA, + int *first, int *last, int depth) { + int *i, *j; + int t; + int r; + + for(i = last - 2; first <= i; --i) { + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); + if(last <= j) { break; } + } + if(r == 0) { *j = ~*j; } + *(j - 1) = t; + } +} + +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +void +ss_fixdown(const unsigned char *Td, const int *PA, + int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = Td[PA[SA[k = j++]]]; + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +ss_median3(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3) { + int *t; + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if(Td[PA[*v2]] > Td[PA[*v3]]) { + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +ss_median5(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return ss_median3(Td, PA, first, middle, last - 1); + } else { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +/* Binary partition for substrings. */ +static INLINE +int * +ss_partition(const int *PA, + int *first, int *last, int depth) { + int *a, *b; + int t; + for(a = first - 1, b = last;;) { + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if(b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; + } + if(first < a) { *first = ~*first; } + return a; +} + +/* Multikey introsort for medium size groups. */ +static +void +ss_mintrosort(const unsigned char *T, const int *PA, + int *first, int *last, + int depth) { +#define STACK_SIZE SS_MISORT_STACKSIZE + struct { int *a, *b, c; int d; } stack[STACK_SIZE]; + const unsigned char *Td; + int *a, *b, *c, *d, *e, *f; + int s, t; + int ssize; + int limit; + int v, x = 0; + + for(ssize = 0, limit = ss_ilg(last - first);;) { + + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { +#if 1 < SS_INSERTIONSORT_THRESHOLD + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } +#endif + STACK_POP(first, last, depth, limit); + continue; + } + + Td = T + depth; + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if(limit < 0) { + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { + if((x = Td[PA[*a]]) != v) { + if(1 < (a - first)) { break; } + v = x; + first = a; + } + } + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, a, depth); + } + if((a - first) <= (last - a)) { + if(1 < (a - first)) { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } else { + first = a, limit = -1; + } + } else { + if(1 < (last - a)) { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } else { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; + } + + /* choose pivot */ + a = ss_pivot(Td, PA, first, last); + v = Td[PA[*a]]; + SWAP(*first, *a); + + /* partition */ + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if((a - first) <= (last - c)) { + if((last - c) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } else if((a - first) <= (c - b)) { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } else { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } else { + if((a - first) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } else if((last - c) <= (c - b)) { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } else { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + } else { + limit += 1; + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } +#undef STACK_SIZE +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + + +/*---------------------------------------------------------------------------*/ + +#if SS_BLOCKSIZE != 0 + +static INLINE +void +ss_blockswap(int *a, int *b, int n) { + int t; + for(; 0 < n; --n, ++a, ++b) { + t = *a, *a = *b, *b = t; + } +} + +static INLINE +void +ss_rotate(int *first, int *middle, int *last) { + int *a, *b, t; + int l, r; + l = middle - first, r = last - middle; + for(; (0 < l) && (0 < r);) { + if(l == r) { ss_blockswap(first, middle, l); break; } + if(l < r) { + a = last - 1, b = middle - 1; + t = *a; + do { + *a-- = *b, *b-- = *a; + if(b < first) { + *a = t; + last = a; + if((r -= l + 1) <= l) { break; } + a -= 1, b = middle - 1; + t = *a; + } + } while(1); + } else { + a = first, b = middle; + t = *a; + do { + *a++ = *b, *b++ = *a; + if(last <= b) { + *a = t; + first = a + 1; + if((l -= r + 1) <= r) { break; } + a += 1, b = middle; + t = *a; + } + } while(1); + } + } +} + + +/*---------------------------------------------------------------------------*/ + +static +void +ss_inplacemerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int depth) { + const int *p; + int *a, *b; + int len, half; + int q, r; + int x; + + for(;;) { + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } + else { x = 0; p = PA + *(last - 1); } + for(a = first, len = middle - first, half = len >> 1, r = -1; + 0 < len; + len = half, half >>= 1) { + b = a + half; + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); + if(q < 0) { + a = b + 1; + half -= (len & 1) ^ 1; + } else { + r = q; + } + } + if(a < middle) { + if(r == 0) { *a = ~*a; } + ss_rotate(a, middle, last); + last -= middle - a; + middle = a; + if(first == middle) { break; } + } + --last; + if(x != 0) { while(*--last < 0) { } } + if(middle == last) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Merge-forward with internal buffer. */ +static +void +ss_mergeforward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + int *a, *b, *c, *bufend; + int t; + int r; + + bufend = buf + (middle - first) - 1; + ss_blockswap(buf, first, middle - first); + + for(t = *(a = first), b = buf, c = middle;;) { + r = ss_compare(T, PA + *b, PA + *c, depth); + if(r < 0) { + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + } else if(r > 0) { + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } else { + *c = ~*c; + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } + } +} + +/* Merge-backward with internal buffer. */ +static +void +ss_mergebackward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + const int *p1, *p2; + int *a, *b, *c, *bufend; + int t; + int r; + int x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(buf, middle, last - middle); + + x = 0; + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } + else { p1 = PA + *bufend; } + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } + else { p2 = PA + *(middle - 1); } + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { + r = ss_compare(T, p1, p2, depth); + if(0 < r) { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = *b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + } else if(r < 0) { + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } else { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = ~*b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } + } +} + +/* D&C based merge. */ +static +void +ss_swapmerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int bufsize, int depth) { +#define STACK_SIZE SS_SMERGE_STACKSIZE +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) +#define MERGE_CHECK(a, b, c)\ + do {\ + if(((c) & 1) ||\ + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ + *(a) = ~*(a);\ + }\ + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ + *(b) = ~*(b);\ + }\ + } while(0) + struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; + int *l, *r, *lm, *rm; + int m, len, half; + int ssize; + int check, next; + + for(check = 0, ssize = 0;;) { + if((last - middle) <= bufsize) { + if((first < middle) && (middle < last)) { + ss_mergebackward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + if((middle - first) <= bufsize) { + if(first < middle) { + ss_mergeforward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; + 0 < len; + len = half, half >>= 1) { + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { + m += half + 1; + half -= (len & 1) ^ 1; + } + } + + if(0 < m) { + lm = middle - m, rm = middle + m; + ss_blockswap(lm, middle, m); + l = r = middle, next = 0; + if(rm < last) { + if(*rm < 0) { + *rm = ~*rm; + if(first < lm) { for(; *--l < 0;) { } next |= 4; } + next |= 1; + } else if(first < lm) { + for(; *r < 0; ++r) { } + next |= 2; + } + } + + if((l - first) <= (last - r)) { + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); + middle = lm, last = l, check = (check & 3) | (next & 4); + } else { + if((next & 2) && (r == middle)) { next ^= 6; } + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); + first = r, middle = rm, check = (next & 3) | (check & 4); + } + } else { + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { + *middle = ~*middle; + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + } + } +#undef STACK_SIZE +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Substring sort */ +static +void +sssort(const unsigned char *T, const int *PA, + int *first, int *last, + int *buf, int bufsize, + int depth, int n, int lastsuffix) { + int *a; +#if SS_BLOCKSIZE != 0 + int *b, *middle, *curbuf; + int j, k, curbufsize, limit; +#endif + int i; + + if(lastsuffix != 0) { ++first; } + +#if SS_BLOCKSIZE == 0 + ss_mintrosort(T, PA, first, last, depth); +#else + if((bufsize < SS_BLOCKSIZE) && + (bufsize < (last - first)) && + (bufsize < (limit = ss_isqrt(last - first)))) { + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } + buf = middle = last - limit, bufsize = limit; + } else { + middle = last, limit = 0; + } + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); +#endif + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, middle, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, middle, depth); +#endif + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { + if(i & 1) { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if(limit != 0) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, middle, last, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, middle, last, depth); +#endif + ss_inplacemerge(T, PA, first, middle, last, depth); + } +#endif + + if(lastsuffix != 0) { + /* Insert last type B* suffix. */ + int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for(a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +int +tr_ilg(int n) { + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +} + + +/*---------------------------------------------------------------------------*/ + +/* Simple insertionsort for small size groups. */ +static +void +tr_insertionsort(const int *ISAd, int *first, int *last) { + int *a, *b; + int t, r; + + for(a = first + 1; a < last; ++a) { + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); + if(b < first) { break; } + } + if(r == 0) { *b = ~*b; } + *(b + 1) = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_fixdown(const int *ISAd, int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = ISAd[SA[k = j++]]; + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +tr_heapsort(const int *ISAd, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + tr_fixdown(ISAd, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { + int *t; + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } + if(ISAd[*v2] > ISAd[*v3]) { + if(ISAd[*v1] > ISAd[*v3]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +tr_median5(const int *ISAd, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } + if(ISAd[*v3] > ISAd[*v4]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +tr_pivot(const int *ISAd, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return tr_median3(ISAd, first, middle, last - 1); + } else { + t >>= 2; + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(ISAd, middle - t, middle, middle + t); + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(ISAd, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +typedef struct _trbudget_t trbudget_t; +struct _trbudget_t { + int chance; + int remain; + int incval; + int count; +}; + +static INLINE +void +trbudget_init(trbudget_t *budget, int chance, int incval) { + budget->chance = chance; + budget->remain = budget->incval = incval; +} + +static INLINE +int +trbudget_check(trbudget_t *budget, int size) { + if(size <= budget->remain) { budget->remain -= size; return 1; } + if(budget->chance == 0) { budget->count += size; return 0; } + budget->remain += budget->incval - size; + budget->chance -= 1; + return 1; +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_partition(const int *ISAd, + int *first, int *middle, int *last, + int **pa, int **pb, int v) { + int *a, *b, *c, *d, *e, *f; + int t, s; + int x = 0; + + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + first += (b - a), last -= (d - c); + } + *pa = first, *pb = last; +} + +static +void +tr_copy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + /* sort suffixes of middle partition + by using sorted order of suffixes of left and right partition. */ + int *c, *d, *e; + int s, v; + + v = b - SA - 1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + ISA[s] = d - SA; + } + } + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + ISA[s] = d - SA; + } + } +} + +static +void +tr_partialcopy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + int *c, *d, *e; + int s, v; + int rank, lastrank, newrank = -1; + + v = b - SA - 1; + lastrank = -1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } + + lastrank = -1; + for(e = d; first <= e; --e) { + rank = ISA[*e]; + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } + if(newrank != rank) { ISA[*e] = newrank; } + } + + lastrank = -1; + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } +} + +static +void +tr_introsort(int *ISA, const int *ISAd, + int *SA, int *first, int *last, + trbudget_t *budget) { +#define STACK_SIZE TR_STACKSIZE + struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; + int *a, *b, *c; + int t; + int v, x = 0; + int incr = ISAd - ISA; + int limit, next; + int ssize, trlink = -1; + + for(ssize = 0, limit = tr_ilg(last - first);;) { + + if(limit < 0) { + if(limit == -1) { + /* tandem repeat partition */ + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); + + /* update ranks */ + if(a < last) { + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + } + if(b < last) { + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } + } + + /* push */ + if(1 < (b - a)) { + STACK_PUSH5(NULL, a, b, 0, 0); + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); + trlink = ssize - 2; + } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); + last = a, limit = tr_ilg(a - first); + } else if(1 < (last - b)) { + first = b, limit = tr_ilg(last - b); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); + first = b, limit = tr_ilg(last - b); + } else if(1 < (a - first)) { + last = a, limit = tr_ilg(a - first); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else if(limit == -2) { + /* tandem repeat copy */ + a = stack[--ssize].b, b = stack[ssize].c; + if(stack[ssize].d == 0) { + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); + } + STACK_POP5(ISAd, first, last, limit, trlink); + } else { + /* sorted partition */ + if(0 <= *first) { + a = first; + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); + first = a; + } + if(first < last) { + a = first; do { *a = ~*a; } while(*++a < 0); + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } + + /* push */ + if(trbudget_check(budget, a - first)) { + if((a - first) <= (last - a)) { + STACK_PUSH5(ISAd, a, last, -3, trlink); + ISAd += incr, last = a, limit = next; + } else { + if(1 < (last - a)) { + STACK_PUSH5(ISAd + incr, first, a, next, trlink); + first = a, limit = -3; + } else { + ISAd += incr, last = a, limit = next; + } + } + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + if(1 < (last - a)) { + first = a, limit = -3; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + continue; + } + + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { + tr_insertionsort(ISAd, first, last); + limit = -3; + continue; + } + + if(limit-- == 0) { + tr_heapsort(ISAd, first, last - first); + for(a = last - 1; first < a; a = b) { + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } + } + limit = -3; + continue; + } + + /* choose pivot */ + a = tr_pivot(ISAd, first, last); + SWAP(*first, *a); + v = ISAd[*first]; + + /* partition */ + tr_partition(ISAd, first, first + 1, last, &a, &b, v); + if((last - first) != (b - a)) { + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; + + /* update ranks */ + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } + + /* push */ + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { + if((a - first) <= (last - b)) { + if((last - b) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((a - first) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + if((a - first) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((last - b) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + } else { + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + first = b; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + last = a; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + } else { + if(trbudget_check(budget, last - first)) { + limit = tr_ilg(last - first), ISAd += incr; + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } +#undef STACK_SIZE +} + + + +/*---------------------------------------------------------------------------*/ + +/* Tandem repeat sort */ +static +void +trsort(int *ISA, int *SA, int n, int depth) { + int *ISAd; + int *first, *last; + trbudget_t budget; + int t, skip, unsorted; + + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { + first = SA; + skip = 0; + unsorted = 0; + do { + if((t = *first) < 0) { first -= t; skip += t; } + else { + if(skip != 0) { *(first + skip) = skip; skip = 0; } + last = SA + ISA[t] + 1; + if(1 < (last - first)) { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &budget); + if(budget.count != 0) { unsorted += budget.count; } + else { skip = first - last; } + } else if((last - first) == 1) { + skip = -1; + } + first = last; + } + } while(first < (SA + n)); + if(skip != 0) { *(first + skip) = skip; } + if(unsorted == 0) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Sorts suffixes of type B*. */ +static +int +sort_typeBstar(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int openMP) { + int *PAb, *ISAb, *buf; +#ifdef LIBBSC_OPENMP + int *curbuf; + int l; +#endif + int i, j, k, t, m, bufsize; + int c0, c1; +#ifdef LIBBSC_OPENMP + int d0, d1; +#endif + (void)openMP; + + /* Initialize bucket arrays. */ + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } + + /* Count the number of occurrences of the first one or two characters of each + type A, B and B* suffix. Moreover, store the beginning position of all + type B* suffixes into the array SA. */ + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { + /* type A suffix. */ + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); + if(0 <= i) { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; +/* +note: + A type B* suffix is lexicographically smaller than a type B suffix that + begins with the same first two characters. +*/ + + /* Calculate the index of start/end point of each bucket. */ + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; /* start point */ + i = t + BUCKET_B(c0, c0); + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); + } + } + + if(0 < m) { + /* Sort the type B* suffixes by their first two characters. */ + PAb = SA + n - m; ISAb = SA + m; + for(i = m - 2; 0 <= i; --i) { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + + /* Sort the type B* substrings using sssort. */ +#ifdef LIBBSC_OPENMP + if (openMP) + { + buf = SA + m; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1) + { + bufsize = (n - (2 * m)) / omp_get_num_threads(); + curbuf = buf + omp_get_thread_num() * bufsize; + k = 0; + for(;;) { + #pragma omp critical(sssort_lock) + { + if(0 < (l = j)) { + d0 = c0, d1 = c1; + do { + k = BUCKET_BSTAR(d0, d1); + if(--d1 <= d0) { + d1 = ALPHABET_SIZE - 1; + if(--d0 < 0) { break; } + } + } while(((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if(l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); + } + } + } + else + { + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } + } +#else + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } +#endif + + /* Compute ranks of type B* substrings. */ + for(i = m - 1; 0 <= i; --i) { + if(0 <= SA[i]) { + j = i; + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if(i <= 0) { break; } + } + j = i; + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); + ISAb[SA[i]] = j; + } + + /* Construct the inverse suffix array of type B* suffixes using trsort. */ + trsort(ISAb, SA, m, 1); + + /* Set the sorted order of tyoe B* suffixes. */ + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if(0 <= i) { + t = i; + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } + + /* Calculate the index of start/end point of each bucket. */ + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { + i = BUCKET_A(c0 + 1) - 1; + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ + + /* Move all type B* suffixes to the correct position. */ + for(i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ + } + } + + return m; +} + +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ +static +void +construct_SA(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } + + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else { + assert(s < 0); + *i = ~s; + } + } +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT_indexes(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m, + unsigned char * num_indexes, int * indexes) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + int mod = n / 8; + { + mod |= mod >> 1; mod |= mod >> 2; + mod |= mod >> 4; mod |= mod >> 8; + mod |= mod >> 16; mod >>= 1; + + *num_indexes = (unsigned char)((n - 1) / (mod + 1)); + } + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA; + + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + if (T[n - 2] < c2) { + if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[n - 2]); + } + else { + *k++ = n - 1; + } + + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA; + + c0 = T[--s]; + *i = c0; + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + if((0 < s) && (T[s - 1] < c0)) { + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[s - 1]); + } else + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP) { + int *bucket_A, *bucket_B; + int m; + int err = 0; + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if(n == 0) { return 0; } + else if(n == 1) { SA[0] = 0; return 0; } + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Suffixsort. */ + if((bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } else { + err = -2; + } + + free(bucket_B); + free(bucket_A); + + return err; +} + +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) { + int *B; + int *bucket_A, *bucket_B; + int m, pidx, i; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + + if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Burrows-Wheeler Transform. */ + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP); + + if (num_indexes == NULL || indexes == NULL) { + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + } else { + pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes); + } + + /* Copy to output string. */ + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } + for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } + pidx += 1; + } else { + pidx = -2; + } + + free(bucket_B); + free(bucket_A); + if(A == NULL) { free(B); } + + return pidx; +} diff --git a/contrib/zstd/divsufsort.h b/contrib/zstd/divsufsort.h new file mode 100644 index 0000000..5440994 --- /dev/null +++ b/contrib/zstd/divsufsort.h @@ -0,0 +1,67 @@ +/* + * divsufsort.h for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DIVSUFSORT_H +#define _DIVSUFSORT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/*- Prototypes -*/ + +/** + * Constructs the suffix array of a given string. + * @param T [0..n-1] The input string. + * @param SA [0..n-1] The output array of suffixes. + * @param n The length of the given string. + * @param openMP enables OpenMP optimization. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP); + +/** + * Constructs the burrows-wheeler transformed string of a given string. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string. (can be T) + * @param A [0..n-1] The temporary array. (can be NULL) + * @param n The length of the given string. + * @param num_indexes The length of secondary indexes array. (can be NULL) + * @param indexes The secondary indexes array. (can be NULL) + * @param openMP enables OpenMP optimization. + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* _DIVSUFSORT_H */ diff --git a/contrib/zstd/entropy_common.c b/contrib/zstd/entropy_common.c new file mode 100644 index 0000000..acd9669 --- /dev/null +++ b/contrib/zstd/entropy_common.c @@ -0,0 +1,222 @@ +/* + Common functions of New Generation Entropy library + Copyright (C) 2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*************************************************************************** */ + +/* ************************************* +* Dependencies +***************************************/ +#include "mem.h" +#include "error_private.h" /* ERR_*, ERROR */ +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ +#include "huf.h" + + +/*-**************************************** +* FSE Error Management +******************************************/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } + +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/* ************************************************************** +* HUF Error Management +****************************************************************/ +unsigned HUF_isError(size_t code) { return ERR_isError(code); } + +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +static short FSE_abs(short a) { return (short)(a<0 ? -a : a); } + +size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + if (hbSize < 4) return ERROR(srcSize_wrong); + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<1) & (charnum<=*maxSVPtr)) { + if (previous0) { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) { + n0 += 24; + if (ip < iend-5) { + ip += 2; + bitStream = MEM_readLE32(ip) >> bitCount; + } else { + bitStream >>= 16; + bitCount += 16; + } } + while ((bitStream & 3) == 3) { + n0 += 3; + bitStream >>= 2; + bitCount += 2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + bitStream = MEM_readLE32(ip) >> bitCount; + } else { + bitStream >>= 2; + } } + { short const max = (short)((2*threshold-1)-remaining); + short count; + + if ((bitStream & (threshold-1)) < (U32)max) { + count = (short)(bitStream & (threshold-1)); + bitCount += nbBits-1; + } else { + count = (short)(bitStream & (2*threshold-1)); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + remaining -= FSE_abs(count); + normalizedCounter[charnum++] = count; + previous0 = !count; + while (remaining < threshold) { + nbBits--; + threshold >>= 1; + } + + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> (bitCount & 31); + } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ + if (remaining != 1) return ERROR(corruption_detected); + if (bitCount > 32) return ERROR(corruption_detected); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + return ip-istart; +} + + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . +*/ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 weightTotal; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + size_t oSize; + + /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) { /* special header */ + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + { U32 n; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } + else { /* header compressed with FSE (normal case) */ + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankStats, 0, (HUF_TABLELOG_ABSOLUTEMAX + 1) * sizeof(U32)); + weightTotal = 0; + { U32 n; for (n=0; n= HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } } + + /* get last non-null symbol weight (implied, total must be 2^n) */ + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected); + *tableLogPtr = tableLog; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << BIT_highbit32(rest); + U32 const lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + return iSize+1; +} diff --git a/contrib/zstd/error_private.h b/contrib/zstd/error_private.h new file mode 100644 index 0000000..d27e15a --- /dev/null +++ b/contrib/zstd/error_private.h @@ -0,0 +1,104 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************************** +* Dependencies +******************************************/ +#include /* size_t */ +#include "error_public.h" /* enum list */ + + +/* **************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#ifdef ERROR +# undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#endif +#define ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } + + +/*-**************************************** +* Error Strings +******************************************/ + +ERR_STATIC const char* ERR_getErrorString(ERR_enum code) +{ + static const char* notErrorCode = "Unspecified error code"; + switch( code ) + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(version_unsupported): return "Version not supported"; + case PREFIX(parameter_unknown): return "Unknown parameter type"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode"; + case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size incorrect"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(maxCode): + default: return notErrorCode; + } +} + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getErrorString(ERR_getErrorCode(code)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ diff --git a/contrib/zstd/error_public.h b/contrib/zstd/error_public.h new file mode 100644 index 0000000..d46abd2 --- /dev/null +++ b/contrib/zstd/error_public.h @@ -0,0 +1,59 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#ifndef ERROR_PUBLIC_H_MODULE +#define ERROR_PUBLIC_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/*===== dependency =====*/ +#include /* size_t */ + + +/*-**************************************** +* error codes list +******************************************/ +typedef enum { + ZSTD_error_no_error, + ZSTD_error_GENERIC, + ZSTD_error_prefix_unknown, + ZSTD_error_version_unsupported, + ZSTD_error_parameter_unknown, + ZSTD_error_frameParameter_unsupported, + ZSTD_error_frameParameter_unsupportedBy32bits, + ZSTD_error_compressionParameter_unsupported, + ZSTD_error_init_missing, + ZSTD_error_memory_allocation, + ZSTD_error_stage_wrong, + ZSTD_error_dstSize_tooSmall, + ZSTD_error_srcSize_wrong, + ZSTD_error_corruption_detected, + ZSTD_error_checksum_wrong, + ZSTD_error_tableLog_tooLarge, + ZSTD_error_maxSymbolValue_tooLarge, + ZSTD_error_maxSymbolValue_tooSmall, + ZSTD_error_dictionary_corrupted, + ZSTD_error_dictionary_wrong, + ZSTD_error_maxCode +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare directly with enum list published into "error_public.h" */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +const char* ZSTD_getErrorString(ZSTD_ErrorCode code); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_PUBLIC_H_MODULE */ diff --git a/contrib/zstd/fse.h b/contrib/zstd/fse.h new file mode 100644 index 0000000..720d54b --- /dev/null +++ b/contrib/zstd/fse.h @@ -0,0 +1,628 @@ +/* ****************************************************************** + FSE : Finite State Entropy codec + Public Prototypes declaration + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ +#ifndef FSE_H +#define FSE_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*-***************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ + + +/*-**************************************** +* FSE simple functions +******************************************/ +/*! FSE_compress() : + Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. + if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*! FSE_decompress(): + Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . + + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! + Why ? : making this distinction requires a header. + Header management is intentionally delegated to the user layer, which can better manage special cases. +*/ +size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); + + +/*-***************************************** +* Tool functions +******************************************/ +size_t FSE_compressBound(size_t size); /* maximum compressed size */ + +/* Error Management */ +unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ + + +/*-***************************************** +* FSE advanced functions +******************************************/ +/*! FSE_compress2() : + Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' + Both parameters can be defined as '0' to mean : use default value + @return : size of compressed data + Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. + if FSE_isError(return), it's an error code. +*/ +size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + + +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' + +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ + +/* *** COMPRESSION *** */ + +/*! FSE_count(): + Provides the precise count of each byte within a table 'count'. + 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + *maxSymbolValuePtr will be updated if detected smaller than initial value. + @return : the count of the most frequent symbol (which is not identified). + if return == srcSize, there is only one symbol. + Can also return an error code, which can be tested with FSE_isError(). */ +size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); + +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + + +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ +FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue); +void FSE_freeCTable (FSE_CTable* ct); + +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); + +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). + +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). + +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). + +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). + +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). + +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ + + +/* *** DECOMPRESSION *** */ + +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); + +/*! Constructor and Destructor of FSE_DTable. + Note that its size depends on 'tableLog' */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +FSE_DTable* FSE_createDTable(unsigned tableLog); +void FSE_freeDTable(FSE_DTable* dt); + +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ +size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ +size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); + +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) + +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ + + +#ifdef FSE_STATIC_LINKING_ONLY + +/* *** Dependency *** */ +#include "bitstream.h" + + +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) (size + (size>>7)) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; + statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1)); + statePtr->stateLog = tableLog; +} + + +/*! FSE_initCState2() : +* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) +* uses the smallest state value possible, saving the cost of this symbol */ +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + } +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) +{ + const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + +/* ====== Decompression ====== */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/* ************************************************************** +* Tuning parameters +****************************************************************/ +/*!MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#define FSE_MAX_MEMORY_USAGE 14 +#define FSE_DEFAULT_MEMORY_USAGE 13 + +/*!FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#define FSE_MAX_SYMBOL_VALUE 255 + + +/* ************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION +#define FSE_DECODE_TYPE FSE_decode_t + + +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/* *************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + +#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3) + + +#endif /* FSE_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* FSE_H */ diff --git a/contrib/zstd/fse_compress.c b/contrib/zstd/fse_compress.c new file mode 100644 index 0000000..386b2c0 --- /dev/null +++ b/contrib/zstd/fse_compress.c @@ -0,0 +1,807 @@ +/* ****************************************************************** + FSE : Finite State Entropy encoder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# ifdef __GNUC__ +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ +#include "bitstream.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************************************** +* Complex types +****************************************************************/ +typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ +size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + U32 const tableSize = 1 << tableLog; + U32 const tableMask = tableSize - 1; + void* const ptr = ct; + U16* const tableU16 = ( (U16*) ptr) + 2; + void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; + + FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ + U32 highThreshold = tableSize-1; + + /* CTable header */ + tableU16[-2] = (U16) tableLog; + tableU16[-1] = (U16) maxSymbolValue; + + /* For explanations on how to distribute symbol values over the table : + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + /* symbol start positions */ + { U32 u; + cumul[0] = 0; + for (u=1; u<=maxSymbolValue+1; u++) { + if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ + cumul[u] = cumul[u-1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); + } else { + cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + } } + cumul[maxSymbolValue+1] = tableSize+1; + } + + /* Spread symbols */ + { U32 position = 0; + U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + int nbOccurences; + for (nbOccurences=0; nbOccurences highThreshold) position = (position + step) & tableMask; /* Low proba area */ + } } + + if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u> 3) + 3; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} + +static short FSE_abs(short a) { return (short)(a<0 ? -a : a); } + +static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE* const ostart = (BYTE*) header; + BYTE* out = ostart; + BYTE* const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + bitStream = 0; + bitCount = 0; + /* Table Size */ + bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; + + /* Init */ + remaining = tableSize+1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = tableLog+1; + + while (remaining>1) { /* stops at 1 */ + if (previous0) { + unsigned start = charnum; + while (!normalizedCounter[charnum]) charnum++; + while (charnum >= start+24) { + start+=24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE) bitStream; + out[1] = (BYTE)(bitStream>>8); + out+=2; + bitStream>>=16; + } + while (charnum >= start+3) { + start+=3; + bitStream += 3 << bitCount; + bitCount += 2; + } + bitStream += (charnum-start) << bitCount; + bitCount += 2; + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + { short count = normalizedCounter[charnum++]; + const short max = (short)((2*threshold-1)-remaining); + remaining -= FSE_abs(count); + if (remaining<1) return ERROR(GENERIC); + count++; /* +1 for extra accuracy */ + if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += count << bitCount; + bitCount += nbBits; + bitCount -= (count>=1; + } + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out+= (bitCount+7) /8; + + if (charnum > maxSymbolValue + 1) return ERROR(GENERIC); + + return (out-ostart); +} + + +size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); +} + + + +/*-************************************************************** +* Counting histogram +****************************************************************/ +/*! FSE_count_simple + This function just counts byte values within `src`, + and store the histogram into table `count`. + This function is unsafe : it doesn't check that all values within `src` can fit into `count`. + For this reason, prefer using a table `count` with 256 elements. + @return : count of most numerous element +*/ +static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + + + memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip max) max = count[s]; } + + return (size_t)max; +} + + +static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + unsigned checkMax) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned max=0; + + + U32 Counting1[256] = { 0 }; + U32 Counting2[256] = { 0 }; + U32 Counting3[256] = { 0 }; + U32 Counting4[256] = { 0 }; + + /* safety checks */ + if (!sourceSize) { + memset(count, 0, maxSymbolValue + 1); + *maxSymbolValuePtr = 0; + return 0; + } + if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ipmaxSymbolValue; s--) { + Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; + if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); + } } + + { U32 s; for (s=0; s<=maxSymbolValue; s++) { + count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; + if (count[s] > max) max = count[s]; + }} + + while (!count[maxSymbolValue]) maxSymbolValue--; + *maxSymbolValuePtr = maxSymbolValue; + return (size_t)max; +} + +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); + return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 0); +} + +size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + if (*maxSymbolValuePtr <255) + return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 1); + *maxSymbolValuePtr = 255; + return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize); +} + + + +/*-************************************************************** +* FSE Compression Code +****************************************************************/ +/*! FSE_sizeof_CTable() : + FSE_CTable is a variable size structure which contains : + `U16 tableLog;` + `U16 maxSymbolValue;` + `U16 nextStateNumber[1 << tableLog];` // This size is variable + `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable +Allocation is manual (C standard does not support variable-size structures). +*/ + +size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return size; +} + +FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return (FSE_CTable*)malloc(size); +} + +void FSE_freeCTable (FSE_CTable* ct) { free(ct); } + +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + return minBits; +} + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) +{ + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; + return tableLog; +} + +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} + + +/* Secondary normalization method. + To be used when primary method fails. */ + +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) +{ + U32 s; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + U32 lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = -1; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + norm[s]=-2; + } + ToDistribute = (1 << tableLog) - distributed; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) { + if ((norm[s] == -2) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue+1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) maxV=s, maxC=count[s]; + norm[maxV] += (short)ToDistribute; + return 0; + } + + { + U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ((((U64)1<> vStepLog); + U32 sEnd = (U32)(end >> vStepLog); + U32 weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } } } + + return 0; +} + + +size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t total, + unsigned maxSymbolValue) +{ + /* Sanity checks */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + + U64 const scale = 62 - tableLog; + U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ + U64 const vStep = 1ULL<<(scale-20); + int stillToDistribute = 1<> tableLog); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == total) return 0; /* rle special case */ + if (count[s] == 0) { normalizedCounter[s]=0; continue; } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = -1; + stillToDistribute--; + } else { + short proba = (short)((count[s]*step) >> scale); + if (proba<8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s]*step) - ((U64)proba< restToBeat; + } + if (proba > largestP) largestP=proba, largest=s; + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); + if (FSE_isError(errorCode)) return errorCode; + } + else normalizedCounter[largest] += (short)stillToDistribute; + } + +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + printf("%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U<>1); /* assumption : tableLog >= 1 */ + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* header */ + tableU16[-2] = (U16) nbBits; + tableU16[-1] = (U16) maxSymbolValue; + + /* Build table */ + for (s=0; s FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } + + /* 2 or 4 encoding per loop */ + for ( ; ip>istart ; ) { + + FSE_encodeSymbol(&bitC, &CState2, *--ip); + + if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ + FSE_FLUSHBITS(&bitC); + + FSE_encodeSymbol(&bitC, &CState1, *--ip); + + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } + + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); +} + +size_t FSE_compress_usingCTable (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct) +{ + const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); +} + + +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } + +size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) +{ + const BYTE* const istart = (const BYTE*) src; + const BYTE* ip = istart; + + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + U32 count[FSE_MAX_SYMBOL_VALUE+1]; + S16 norm[FSE_MAX_SYMBOL_VALUE+1]; + CTable_max_t ct; + size_t errorCode; + + /* init conditions */ + if (srcSize <= 1) return 0; /* Uncompressible */ + if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; + + /* Scan input and build symbol stats */ + errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize); + if (FSE_isError(errorCode)) return errorCode; + if (errorCode == srcSize) return 1; + if (errorCode == 1) return 0; /* each symbol only present once */ + if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ + + tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); + errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue); + if (FSE_isError(errorCode)) return errorCode; + + /* Write table description header */ + errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog); + if (FSE_isError(errorCode)) return errorCode; + op += errorCode; + + /* Compress */ + errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct); + if (errorCode == 0) return 0; /* not enough space for compressed data */ + op += errorCode; + + /* check compressibility */ + if ( (size_t)(op-ostart) >= srcSize-1 ) + return 0; + + return op-ostart; +} + +size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize) +{ + return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); +} + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/contrib/zstd/fse_decompress.c b/contrib/zstd/fse_decompress.c new file mode 100644 index 0000000..032e657 --- /dev/null +++ b/contrib/zstd/fse_decompress.c @@ -0,0 +1,326 @@ +/* ****************************************************************** + FSE : Finite State Entropy decoder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# ifdef __GNUC__ +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ +#include "bitstream.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + +/* check and forward error code */ +#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } + + +/* ************************************************************** +* Complex types +****************************************************************/ +typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ +FSE_DTable* FSE_createDTable (unsigned tableLog) +{ + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); +} + +void FSE_freeDTable (FSE_DTable* dt) +{ + free(dt); +} + +size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + { FSE_DTableHeader DTableH; + DTableH.tableLog = (U16)tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + { U32 const tableMask = tableSize-1; + U32 const step = FSE_TABLESTEP(tableSize); + U32 s, position = 0; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; utableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSV1 = tableMask+1; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) { + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state1); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); + break; + } + + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state2); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } + + return op-ostart; +} + + +size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + + if (cSrcSize<2) return ERROR(srcSize_wrong); /* too small input size */ + + /* normal FSE decoding mode */ + { size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(NCountLength)) return NCountLength; + if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size */ + ip += NCountLength; + cSrcSize -= NCountLength; + } + + CHECK_F( FSE_buildDTable (dt, counting, maxSymbolValue, tableLog) ); + + return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); /* always return, even if it is an error code */ +} + + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/contrib/zstd/huf.h b/contrib/zstd/huf.h new file mode 100644 index 0000000..29bab4b --- /dev/null +++ b/contrib/zstd/huf.h @@ -0,0 +1,228 @@ +/* ****************************************************************** + Huffman coder, part of New Generation Entropy library + header file + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ +#ifndef HUF_H_298734234 +#define HUF_H_298734234 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* *** Dependencies *** */ +#include /* size_t */ + + +/* *** simple functions *** */ +/** +HUF_compress() : + Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. + 'dst' buffer must be already allocated. + Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). + `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + @return : size of compressed data (<= `dstCapacity`). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single repeated byte symbol (RLE compression). + if HUF_isError(return), compression failed (more details using HUF_getErrorName()) +*/ +size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/** +HUF_decompress() : + Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', + into already allocated buffer 'dst', of minimum size 'dstSize'. + `dstSize` : **must** be the ***exact*** size of original (uncompressed) data. + Note : in contrast with FSE, HUF_decompress can regenerate + RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + because it knows size to regenerate. + @return : size of regenerated data (== dstSize), + or an error code, which can be tested using HUF_isError() +*/ +size_t HUF_decompress(void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize); + + +/* **************************************** +* Tool functions +******************************************/ +#define HUF_BLOCKSIZE_MAX (128 * 1024) +size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ + +/* Error Management */ +unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ + + +/* *** Advanced function *** */ + +/** HUF_compress2() : +* Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` */ +size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + + +#ifdef HUF_STATIC_LINKING_ONLY + +/* *** Dependencies *** */ +#include "mem.h" /* U32 */ + + +/* *** Constants *** */ +#define HUF_TABLELOG_ABSOLUTEMAX 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */ +#define HUF_SYMBOLVALUE_MAX 255 +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" +#endif + + +/* **************************************** +* Static allocation +******************************************/ +/* HUF buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of HUF's Compression Table */ +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + U32 name##hb[maxSymbolValue+1]; \ + void* name##hv = &(name##hb); \ + HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ + +/* static allocation of HUF's DTable */ +typedef U32 HUF_DTable; +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1)*0x1000001) } +#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog)*0x1000001) } + + +/* **************************************** +* Advanced decompression functions +******************************************/ +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ + + +/* **************************************** +* HUF detailed API +******************************************/ +/*! +HUF_compress() does the following: +1. count symbol occurrence from source[] into table count[] using FSE_count() +2. (optional) refine tableLog using HUF_optimalTableLog() +3. build Huffman table from count using HUF_buildCTable() +4. save Huffman table to memory buffer using HUF_writeCTable() +5. encode the data stream using HUF_compress4X_usingCTable() + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and regenerate 'CTable' using external methods. +*/ +/* FSE_count() : find it within "fse.h" */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); +typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); +size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); + + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize); + +/** HUF_readCTable() : +* Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize); + + +/* +HUF_decompress() does the following: +1. select the decompression algorithm (X2, X4) based on pre-computed heuristics +2. build Huffman table from save, using HUF_readDTableXn() +3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable +*/ + +/** HUF_selectDecoder() : +* Tells which decoder is likely to decode faster, +* based on a set of pre-determined metrics. +* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . +* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); + +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize); + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); + + +/* single stream variants */ + +size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); + + +#endif /* HUF_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* HUF_H_298734234 */ diff --git a/contrib/zstd/huf_compress.c b/contrib/zstd/huf_compress.c new file mode 100644 index 0000000..c2dd13c --- /dev/null +++ b/contrib/zstd/huf_compress.c @@ -0,0 +1,548 @@ +/* ****************************************************************** + Huffman encoder, part of New Generation Entropy library + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +/* inline is defined */ +#elif defined(_MSC_VER) +# define inline __inline +#else +# define inline /* disable inline */ +#endif + + +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +#include /* memcpy, memset */ +#include /* printf (debug) */ +#include "bitstream.h" +#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ +#include "fse.h" /* header compression */ +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************************************** +* Utils +****************************************************************/ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); +} + + +/* ******************************************************* +* HUF : Huffman block compression +*********************************************************/ +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt within "huf.h" */ + +typedef struct nodeElt_s { + U32 count; + U16 parent; + BYTE byte; + BYTE nbBits; +} nodeElt; + +/*! HUF_writeCTable() : + `CTable` : huffman tree to save, using huf representation. + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, + const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) +{ + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; + BYTE* op = (BYTE*)dst; + U32 n; + + /* check conditions */ + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC); + + /* convert to weight */ + bitsToWeight[0] = 0; + for (n=1; n1) & (size < maxSymbolValue/2)) { /* FSE compressed */ + op[0] = (BYTE)size; + return size+1; + } + } + + /* raw values */ + if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); + huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause issue in final combination */ + for (n=0; n HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall); + + /* Prepare base value per rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } } + + /* fill nbBits */ + { U32 n; for (n=0; n0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } + } + + return readSize; +} + + +static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) +{ + const U32 largestBits = huffNode[lastNonNull].nbBits; + if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ + + /* there are several too large elements (at least >= 2) */ + { int totalCost = 0; + const U32 baseCost = 1 << (largestBits - maxNbBits); + U32 n = lastNonNull; + + while (huffNode[n].nbBits > maxNbBits) { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)maxNbBits; + n --; + } /* n stops at huffNode[n].nbBits <= maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ + + /* renorm totalCost */ + totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ + + /* repay normalized cost */ + { U32 const noSymbol = 0xF0F0F0F0; + U32 rankLast[HUF_TABLELOG_MAX+2]; + int pos; + + /* Get pos of last (smallest) symbol per rank */ + memset(rankLast, 0xF0, sizeof(rankLast)); + { U32 currentNbBits = maxNbBits; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ + rankLast[maxNbBits-currentNbBits] = pos; + } } + + while (totalCost > 0) { + U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { + U32 highPos = rankLast[nBitsToDecrease]; + U32 lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noSymbol) continue; + if (lowPos == noSymbol) break; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } } + /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + nBitsToDecrease ++; + totalCost -= 1 << (nBitsToDecrease-1); + if (rankLast[nBitsToDecrease-1] == noSymbol) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ + huffNode[rankLast[nBitsToDecrease]].nbBits ++; + if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol; + else { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ + } } /* while (totalCost > 0) */ + + while (totalCost < 0) { /* Sometimes, cost correction overshoot */ + if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ + while (huffNode[n].nbBits == maxNbBits) n--; + huffNode[n+1].nbBits--; + rankLast[1] = n+1; + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } } } /* there are several too large elements (at least >= 2) */ + + return maxNbBits; +} + + +typedef struct { + U32 base; + U32 current; +} rankPos; + +static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) +{ + rankPos rank[32]; + U32 n; + + memset(rank, 0, sizeof(rank)); + for (n=0; n<=maxSymbolValue; n++) { + U32 r = BIT_highbit32(count[n] + 1); + rank[r].base ++; + } + for (n=30; n>0; n--) rank[n-1].base += rank[n].base; + for (n=0; n<32; n++) rank[n].current = rank[n].base; + for (n=0; n<=maxSymbolValue; n++) { + U32 const c = count[n]; + U32 const r = BIT_highbit32(c+1) + 1; + U32 pos = rank[r].current++; + while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--; + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; + } +} + + +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) +size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) +{ + nodeElt huffNode0[2*HUF_SYMBOLVALUE_MAX+1 +1]; + nodeElt* huffNode = huffNode0 + 1; + U32 n, nonNullRank; + int lowS, lowN; + U16 nodeNb = STARTNODE; + U32 nodeRoot; + + /* safety checks */ + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC); + memset(huffNode0, 0, sizeof(huffNode0)); + + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue); + + /* init for parents */ + nonNullRank = maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); + + /* create parents */ + while (nodeNb <= nodeRoot) { + U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = nodeNb; + nodeNb++; + } + + /* distribute weights (unlimited tree height) */ + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); + + /* fill result into tree (val, nbBits) */ + { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine stating value per rank */ + { U16 min = 0; + for (n=maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; n<=maxSymbolValue; n++) + tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ + for (n=0; n<=maxSymbolValue; n++) + tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ + } + + return maxNbBits; +} + +static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} + +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } + +#define HUF_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) + +#define HUF_FLUSHBITS_1(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) + +#define HUF_FLUSHBITS_2(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t n; + const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize)); + BIT_CStream_t bitC; + + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + { size_t const errorCode = BIT_initCStream(&bitC, op, oend-op); + if (HUF_isError(errorCode)) return 0; } + + n = srcSize & ~3; /* join to mod 4 */ + switch (srcSize & 3) + { + case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); + HUF_FLUSHBITS_2(&bitC); + case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); + HUF_FLUSHBITS_1(&bitC); + case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); + HUF_FLUSHBITS(&bitC); + case 0 : + default: ; + } + + for (; n>0; n-=4) { /* note : n&3==0 at this stage */ + HUF_encodeSymbol(&bitC, ip[n- 1], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 2], CTable); + HUF_FLUSHBITS_2(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 3], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 4], CTable); + HUF_FLUSHBITS(&bitC); + } + + return BIT_closeCStream(&bitC); +} + + +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ + + { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; + op += cSize; + } + + return op-ostart; +} + + +static size_t HUF_compress_internal ( + void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + unsigned singleStream) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + U32 count[HUF_SYMBOLVALUE_MAX+1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1]; + + /* checks & inits */ + if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ + if (!dstSize) return 0; /* cannot fit within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; + + /* Scan input and build symbol stats */ + { size_t const largest = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize); + if (HUF_isError(largest)) return largest; + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ + } + + /* Build Huffman Tree */ + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog); + if (HUF_isError(maxBits)) return maxBits; + huffLog = (U32)maxBits; + } + + /* Write table description header */ + { size_t const hSize = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog); + if (HUF_isError(hSize)) return hSize; + if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ + op += hSize; + } + + /* Compress */ + { size_t const cSize = (singleStream) ? + HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : /* single segment */ + HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); + if (HUF_isError(cSize)) return cSize; + if (cSize==0) return 0; /* uncompressible */ + op += cSize; + } + + /* check compressibility */ + if ((size_t)(op-ostart) >= srcSize-1) + return 0; + + return op-ostart; +} + + +size_t HUF_compress1X (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1); +} + +size_t HUF_compress2 (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0); +} + + +size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT); +} diff --git a/contrib/zstd/huf_decompress.c b/contrib/zstd/huf_decompress.c new file mode 100644 index 0000000..a5521bd --- /dev/null +++ b/contrib/zstd/huf_decompress.c @@ -0,0 +1,891 @@ +/* ****************************************************************** + Huffman decoder, part of New Generation Entropy library + Copyright (C) 2013-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +/* inline is defined */ +#elif defined(_MSC_VER) || defined(__GNUC__) +# define inline __inline +#else +# define inline /* disable inline */ +#endif + + +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/* ************************************************************** +* Dependencies +****************************************************************/ +#include /* memcpy, memset */ +#include "bitstream.h" /* BIT_* */ +#include "fse.h" /* header compression */ +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ + +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + + +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ + +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ + +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + + HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + memcpy(DTable, &dtd, sizeof(dtd)); + } + + /* Prepare ranks */ + { U32 n, nextRankStart = 0; + for (n=1; n> 1; + U32 i; + HUF_DEltX2 D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); + for (i = rankVal[w]; i < rankVal[w] + length; i++) + dt[i] = D; + rankVal[w] += length; + } } + + return iSize; +} + + +static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to the end */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, hence no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + return pEnd-pStart; +} + +static size_t HUF_decompress1X2_usingDTable_internal( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + const void* dtPtr = DTable + 1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; } + + HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + return dstSize; +} + +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); +} + +size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2 (DCtx, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} + + +static size_t HUF_decompress4X2_usingDTable_internal( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable + 1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; } + { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; } + { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; } + { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; } + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) { + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; + } +} + + +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); +} + + +size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2 (dctx, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx); +} + +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + + +/* *************************/ +/* double-symbols decoding */ +/* *************************/ +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ + +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; + +static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX4 DElt; + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + + /* get pre-calculated rankVal */ + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill skipped values */ + if (minWeight>1) { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + { U32 s; for (s=0; s= 1 */ + + rankVal[weight] += length; + } } +} + +typedef U32 rankVal_t[HUF_TABLELOG_ABSOLUTEMAX][HUF_TABLELOG_ABSOLUTEMAX + 1]; + +static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill DTable */ + for (s=0; s= minBits) { /* enough room for a second symbol */ + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } else { + HUF_DEltX4 DElt; + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 const end = start + length; + U32 u; + for (u = start; u < end; u++) DTable[u] = DElt; + } } + rankVal[weight] += length; + } +} + +size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize) +{ + BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; + sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; + U32 rankStats[HUF_TABLELOG_ABSOLUTEMAX + 1] = { 0 }; + U32 rankStart0[HUF_TABLELOG_ABSOLUTEMAX + 2] = { 0 }; + U32* const rankStart = rankStart0+1; + rankVal_t rankVal; + U32 tableLog, maxW, sizeOfSort, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr; + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compilation fails here, assertion is false */ + if (maxTableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge); + /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; w> consumed; + } } } } + + HUF_fillDTableX4(dt, maxTableLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); + + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; +} + + +static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + } } + return 1; +} + + +#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_1(p, bitDPtr); + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + } + + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + + +static size_t HUF_decompress1X4_usingDTable_internal( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; + + /* Init */ + { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; + } + + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); + } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + +size_t HUF_decompress1X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); +} + +size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX4 (DCtx, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx); +} + +size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + +static size_t HUF_decompress4X4_usingDTable_internal( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; + const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; } + { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; } + { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; } + { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; } + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_1(op1, &bitD1); + HUF_DECODE_SYMBOLX4_1(op2, &bitD2); + HUF_DECODE_SYMBOLX4_1(op3, &bitD3); + HUF_DECODE_SYMBOLX4_1(op4, &bitD4); + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_0(op1, &bitD1); + HUF_DECODE_SYMBOLX4_0(op2, &bitD2); + HUF_DECODE_SYMBOLX4_0(op3, &bitD3); + HUF_DECODE_SYMBOLX4_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + + +size_t HUF_decompress4X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); +} + + +size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX4 (dctx, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); +} + +size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} + + +/* ********************************/ +/* Generic decompression selector */ +/* ********************************/ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) : + HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); +} + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); + return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) : + HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); +} + + +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ + {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ + {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ + {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ + {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ + {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ + {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ + {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ + {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ + {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ + {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ + {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ + {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ + {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ + {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ + {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ +}; + +/** HUF_selectDecoder() : +* Tells which decoder is likely to decode faster, +* based on a set of pre-determined metrics. +* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . +* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + /* decoder timing evaluation */ + U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */ + + return DTime1 < DTime0; +} + + +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); + +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 }; + + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); + } +} + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + } +} + +size_t HUF_decompress4X_hufOnly (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected); /* invalid */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + } +} + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); + return algoNb ? HUF_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; + } +} diff --git a/contrib/zstd/mem.h b/contrib/zstd/mem.h new file mode 100644 index 0000000..681dd35 --- /dev/null +++ b/contrib/zstd/mem.h @@ -0,0 +1,370 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-**************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ +#include /* memcpy */ + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif +#if defined(__GNUC__) +# define MEM_STATIC static __inline __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + +/* code only tested on 32 and 64 bits systems */ +#define MEM_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } +MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + + +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/*-************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets depending on alignment. + * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) /*|| defined(_MSC_VER)*/ || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard, by lying on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } +MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + __pragma( pack(push, 1) ) + typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign; + __pragma( pack(pop) ) +#else + typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; +#endif + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } +MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC size_t MEM_readST(const void* memPtr) +{ + size_t val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* MEM_FORCE_MEMORY_ACCESS */ + +MEM_STATIC U32 MEM_swap32(U32 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_ulong(in); +#elif defined (__GNUC__) + return __builtin_bswap32(in); +#else + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +#endif +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif defined (__GNUC__) + return __builtin_bswap64(in); +#else + return ((in << 56) & 0xff00000000000000ULL) | + ((in << 40) & 0x00ff000000000000ULL) | + ((in << 24) & 0x0000ff0000000000ULL) | + ((in << 8) & 0x000000ff00000000ULL) | + ((in >> 8) & 0x00000000ff000000ULL) | + ((in >> 24) & 0x0000000000ff0000ULL) | + ((in >> 40) & 0x000000000000ff00ULL) | + ((in >> 56) & 0x00000000000000ffULL); +#endif +} + +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} + +/*=== Little endian r/w ===*/ + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) { + MEM_write16(memPtr, val); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE*)memPtr)[2] = (BYTE)(val>>16); +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + return MEM_swap32(MEM_read32(memPtr)); +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, val32); + else + MEM_write32(memPtr, MEM_swap32(val32)); +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + return MEM_swap64(MEM_read64(memPtr)); +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, val64); + else + MEM_write64(memPtr, MEM_swap64(val64)); +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +/*=== Big endian r/w ===*/ + +MEM_STATIC U32 MEM_readBE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap32(MEM_read32(memPtr)); + else + return MEM_read32(memPtr); +} + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, MEM_swap32(val32)); + else + MEM_write32(memPtr, val32); +} + +MEM_STATIC U64 MEM_readBE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap64(MEM_read64(memPtr)); + else + return MEM_read64(memPtr); +} + +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, MEM_swap64(val64)); + else + MEM_write64(memPtr, val64); +} + +MEM_STATIC size_t MEM_readBEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); +} + +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); +} + + +/* function safe only for comparisons */ +MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ diff --git a/contrib/zstd/zbuff.h b/contrib/zstd/zbuff.h new file mode 100644 index 0000000..f99e061 --- /dev/null +++ b/contrib/zstd/zbuff.h @@ -0,0 +1,191 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +/* *************************************************************** +* NOTES/WARNINGS +*****************************************************************/ +/* The streaming API defined here will soon be deprecated by the +* new one in 'zstd.h'; consider migrating towards newer streaming +* API. See 'lib/README.md'. +*****************************************************************/ + +#ifndef ZSTD_BUFFERED_H_23987 +#define ZSTD_BUFFERED_H_23987 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Dependencies +***************************************/ +#include /* size_t */ + + +/* *************************************************************** +* Compiler specifics +*****************************************************************/ +/* ZSTD_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL */ +#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) +#else +# define ZSTDLIB_API +#endif + + +/* ************************************* +* Streaming functions +***************************************/ +/* This is the easier "buffered" streaming API, +* using an internal buffer to lift all restrictions on user-provided buffers +* which can be any size, any place, for both input and output. +* ZBUFF and ZSTD are 100% interoperable, +* frames created by one can be decoded by the other one */ + +typedef struct ZBUFF_CCtx_s ZBUFF_CCtx; +ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx(void); +ZSTDLIB_API size_t ZBUFF_freeCCtx(ZBUFF_CCtx* cctx); + +ZSTDLIB_API size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); + +ZSTDLIB_API size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr); +ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); +ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); + +/*-************************************************* +* Streaming compression - howto +* +* A ZBUFF_CCtx object is required to track streaming operation. +* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. +* ZBUFF_CCtx objects can be reused multiple times. +* +* Start by initializing ZBUF_CCtx. +* Use ZBUFF_compressInit() to start a new compression operation. +* Use ZBUFF_compressInitDictionary() for a compression which requires a dictionary. +* +* Use ZBUFF_compressContinue() repetitively to consume input stream. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written within *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each call, so save its content if it matters or change @dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's just a hint, to improve latency) +* or an error code, which can be tested using ZBUFF_isError(). +* +* At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush(). +* The nb of bytes written into `dst` will be reported into *dstCapacityPtr. +* Note that the function cannot output more than *dstCapacityPtr, +* therefore, some content might still be left into internal buffer if *dstCapacityPtr is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressEnd() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small. +* In which case, call again ZBUFF_compressFlush() to complete the flush. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : _recommended buffer_ sizes (not compulsory) : ZBUFF_recommendedCInSize() / ZBUFF_recommendedCOutSize() +* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, use this value to reduce intermediate stages (better latency) +* output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering. +* By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering. +* **************************************************/ + + +typedef struct ZBUFF_DCtx_s ZBUFF_DCtx; +ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx(void); +ZSTDLIB_API size_t ZBUFF_freeDCtx(ZBUFF_DCtx* dctx); + +ZSTDLIB_API size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx); +ZSTDLIB_API size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize); + +ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFF_DCtx object is required to track streaming operations. +* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. +* Use ZBUFF_decompressInit() to start a new decompression operation, +* or ZBUFF_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFF_DCtx objects can be re-init multiple times. +* +* Use ZBUFF_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : 0 when a frame is completely decoded and fully flushed, +* 1 when there is still some data left within internal buffer to flush, +* >1 when more data is expected, with value being a suggested next input size (it's just a hint, which helps latency), +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize() +* output : ZBUFF_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFF_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZSTDLIB_API unsigned ZBUFF_isError(size_t errorCode); +ZSTDLIB_API const char* ZBUFF_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZSTDLIB_API size_t ZBUFF_recommendedCInSize(void); +ZSTDLIB_API size_t ZBUFF_recommendedCOutSize(void); +ZSTDLIB_API size_t ZBUFF_recommendedDInSize(void); +ZSTDLIB_API size_t ZBUFF_recommendedDOutSize(void); + + +#ifdef ZBUFF_STATIC_LINKING_ONLY + +/* ==================================================================================== + * The definitions in this section are considered experimental. + * They should never be used in association with a dynamic library, as they may change in the future. + * They are provided for advanced usages. + * Use them only in association with static linking. + * ==================================================================================== */ + +/*--- Dependency ---*/ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */ +#include "zstd.h" + + +/*--- Custom memory allocator ---*/ +/*! ZBUFF_createCCtx_advanced() : + * Create a ZBUFF compression context using external alloc and free functions */ +ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem); + +/*! ZBUFF_createDCtx_advanced() : + * Create a ZBUFF decompression context using external alloc and free functions */ +ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem); + + +/*--- Advanced Streaming Initialization ---*/ +ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize); + +#endif /* ZBUFF_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_BUFFERED_H_23987 */ diff --git a/contrib/zstd/zbuff_compress.c b/contrib/zstd/zbuff_compress.c new file mode 100644 index 0000000..5095b43 --- /dev/null +++ b/contrib/zstd/zbuff_compress.c @@ -0,0 +1,319 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + + +/* ************************************* +* Dependencies +***************************************/ +#include +#include "error_private.h" +#include "zstd_internal.h" /* MIN, ZSTD_BLOCKHEADERSIZE, defaultCustomMem */ +#define ZBUFF_STATIC_LINKING_ONLY +#include "zbuff.h" + + +/* ************************************* +* Constants +***************************************/ +static size_t const ZBUFF_endFrameSize = ZSTD_BLOCKHEADERSIZE; + + +/*-*********************************************************** +* Streaming compression +* +* A ZBUFF_CCtx object is required to track streaming operation. +* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. +* Use ZBUFF_compressInit() to start a new compression operation. +* ZBUFF_CCtx objects can be reused multiple times. +* +* Use ZBUFF_compressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input. +* The content of dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer. +* Note that it will not output more than *dstCapacityPtr. +* Therefore, some content might still be left into its internal buffer if dst buffer is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressEnd() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) +* input : ZSTD_BLOCKSIZE_MAX (128 KB), internal unit size, it improves latency to use this value. +* output : ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize : ensures it's always possible to write/flush/end a full block at best speed. +* ***********************************************************/ + +typedef enum { ZBUFFcs_init, ZBUFFcs_load, ZBUFFcs_flush, ZBUFFcs_final } ZBUFF_cStage; + +/* *** Resources *** */ +struct ZBUFF_CCtx_s { + ZSTD_CCtx* zc; + char* inBuff; + size_t inBuffSize; + size_t inToCompress; + size_t inBuffPos; + size_t inBuffTarget; + size_t blockSize; + char* outBuff; + size_t outBuffSize; + size_t outBuffContentSize; + size_t outBuffFlushedSize; + ZBUFF_cStage stage; + U32 checksum; + U32 frameEnded; + ZSTD_customMem customMem; +}; /* typedef'd tp ZBUFF_CCtx within "zbuff.h" */ + +ZBUFF_CCtx* ZBUFF_createCCtx(void) +{ + return ZBUFF_createCCtx_advanced(defaultCustomMem); +} + +ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZBUFF_CCtx* zbc; + + if (!customMem.customAlloc && !customMem.customFree) + customMem = defaultCustomMem; + + if (!customMem.customAlloc || !customMem.customFree) + return NULL; + + zbc = (ZBUFF_CCtx*)customMem.customAlloc(customMem.opaque, sizeof(ZBUFF_CCtx)); + if (zbc==NULL) return NULL; + memset(zbc, 0, sizeof(ZBUFF_CCtx)); + memcpy(&zbc->customMem, &customMem, sizeof(ZSTD_customMem)); + zbc->zc = ZSTD_createCCtx_advanced(customMem); + if (zbc->zc == NULL) { ZBUFF_freeCCtx(zbc); return NULL; } + return zbc; +} + +size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc) +{ + if (zbc==NULL) return 0; /* support free on NULL */ + ZSTD_freeCCtx(zbc->zc); + if (zbc->inBuff) zbc->customMem.customFree(zbc->customMem.opaque, zbc->inBuff); + if (zbc->outBuff) zbc->customMem.customFree(zbc->customMem.opaque, zbc->outBuff); + zbc->customMem.customFree(zbc->customMem.opaque, zbc); + return 0; +} + + +/* ====== Initialization ====== */ + +size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + /* allocate buffers */ + { size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog; + if (zbc->inBuffSize < neededInBuffSize) { + zbc->inBuffSize = neededInBuffSize; + zbc->customMem.customFree(zbc->customMem.opaque, zbc->inBuff); /* should not be necessary */ + zbc->inBuff = (char*)zbc->customMem.customAlloc(zbc->customMem.opaque, neededInBuffSize); + if (zbc->inBuff == NULL) return ERROR(memory_allocation); + } + zbc->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize); + } + if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1) { + zbc->outBuffSize = ZSTD_compressBound(zbc->blockSize)+1; + zbc->customMem.customFree(zbc->customMem.opaque, zbc->outBuff); /* should not be necessary */ + zbc->outBuff = (char*)zbc->customMem.customAlloc(zbc->customMem.opaque, zbc->outBuffSize); + if (zbc->outBuff == NULL) return ERROR(memory_allocation); + } + + { size_t const errorCode = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params, pledgedSrcSize); + if (ZSTD_isError(errorCode)) return errorCode; } + + zbc->inToCompress = 0; + zbc->inBuffPos = 0; + zbc->inBuffTarget = zbc->blockSize; + zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0; + zbc->stage = ZBUFFcs_load; + zbc->checksum = params.fParams.checksumFlag > 0; + zbc->frameEnded = 0; + return 0; /* ready to go */ +} + + +size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); + return ZBUFF_compressInit_advanced(zbc, dict, dictSize, params, 0); +} + +size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) +{ + return ZBUFF_compressInitDictionary(zbc, NULL, 0, compressionLevel); +} + + +/* internal util function */ +MEM_STATIC size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + memcpy(dst, src, length); + return length; +} + + +/* ====== Compression ====== */ + +typedef enum { zbf_gather, zbf_flush, zbf_end } ZBUFF_flush_e; + +static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr, + ZBUFF_flush_e const flush) +{ + U32 someMoreWork = 1; + const char* const istart = (const char*)src; + const char* const iend = istart + *srcSizePtr; + const char* ip = istart; + char* const ostart = (char*)dst; + char* const oend = ostart + *dstCapacityPtr; + char* op = ostart; + + while (someMoreWork) { + switch(zbc->stage) + { + case ZBUFFcs_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */ + + case ZBUFFcs_load: + /* complete inBuffer */ + { size_t const toLoad = zbc->inBuffTarget - zbc->inBuffPos; + size_t const loaded = ZBUFF_limitCopy(zbc->inBuff + zbc->inBuffPos, toLoad, ip, iend-ip); + zbc->inBuffPos += loaded; + ip += loaded; + if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) ) { + someMoreWork = 0; break; /* not enough input to get a full block : stop there, wait for more */ + } } + /* compress current block (note : this stage cannot be stopped in the middle) */ + { void* cDst; + size_t cSize; + size_t const iSize = zbc->inBuffPos - zbc->inToCompress; + size_t oSize = oend-op; + if (oSize >= ZSTD_compressBound(iSize)) + cDst = op; /* compress directly into output buffer (avoid flush stage) */ + else + cDst = zbc->outBuff, oSize = zbc->outBuffSize; + cSize = (flush == zbf_end) ? + ZSTD_compressEnd(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize) : + ZSTD_compressContinue(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize); + if (ZSTD_isError(cSize)) return cSize; + if (flush == zbf_end) zbc->frameEnded = 1; + /* prepare next block */ + zbc->inBuffTarget = zbc->inBuffPos + zbc->blockSize; + if (zbc->inBuffTarget > zbc->inBuffSize) + zbc->inBuffPos = 0, zbc->inBuffTarget = zbc->blockSize; /* note : inBuffSize >= blockSize */ + zbc->inToCompress = zbc->inBuffPos; + if (cDst == op) { op += cSize; break; } /* no need to flush */ + zbc->outBuffContentSize = cSize; + zbc->outBuffFlushedSize = 0; + zbc->stage = ZBUFFcs_flush; /* continue to flush stage */ + } + + case ZBUFFcs_flush: + { size_t const toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize; + size_t const flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush); + op += flushed; + zbc->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { someMoreWork = 0; break; } /* dst too small to store flushed data : stop there */ + zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0; + zbc->stage = ZBUFFcs_load; + break; + } + + case ZBUFFcs_final: + someMoreWork = 0; /* do nothing */ + break; + + default: + return ERROR(GENERIC); /* impossible */ + } + } + + *srcSizePtr = ip - istart; + *dstCapacityPtr = op - ostart; + if (zbc->frameEnded) return 0; + { size_t hintInSize = zbc->inBuffTarget - zbc->inBuffPos; + if (hintInSize==0) hintInSize = zbc->blockSize; + return hintInSize; + } +} + +size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr) +{ + return ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, src, srcSizePtr, zbf_gather); +} + + + +/* ====== Finalize ====== */ + +size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr) +{ + size_t srcSize = 0; + ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, &srcSize, &srcSize, zbf_flush); /* use a valid src address instead of NULL */ + return zbc->outBuffContentSize - zbc->outBuffFlushedSize; +} + + +size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + *dstCapacityPtr; + BYTE* op = ostart; + + if (zbc->stage != ZBUFFcs_final) { + /* flush whatever remains */ + size_t outSize = *dstCapacityPtr; + size_t srcSize = 0; + size_t const notEnded = ZBUFF_compressContinue_generic(zbc, dst, &outSize, &srcSize, &srcSize, zbf_end); /* use a valid address instead of NULL */ + size_t const remainingToFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize; + op += outSize; + if (remainingToFlush) { + *dstCapacityPtr = op-ostart; + return remainingToFlush + ZBUFF_endFrameSize + (zbc->checksum * 4); + } + /* create epilogue */ + zbc->stage = ZBUFFcs_final; + zbc->outBuffContentSize = !notEnded ? 0 : + ZSTD_compressEnd(zbc->zc, zbc->outBuff, zbc->outBuffSize, NULL, 0); /* write epilogue into outBuff */ + } + + /* flush epilogue */ + { size_t const toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize; + size_t const flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush); + op += flushed; + zbc->outBuffFlushedSize += flushed; + *dstCapacityPtr = op-ostart; + if (toFlush==flushed) zbc->stage = ZBUFFcs_init; /* end reached */ + return toFlush - flushed; + } +} + + + +/* ************************************* +* Tool functions +***************************************/ +size_t ZBUFF_recommendedCInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } +size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize; } diff --git a/contrib/zstd/zbuff_decompress.c b/contrib/zstd/zbuff_decompress.c new file mode 100644 index 0000000..b20ee97 --- /dev/null +++ b/contrib/zstd/zbuff_decompress.c @@ -0,0 +1,252 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + + +/* ************************************* +* Dependencies +***************************************/ +#include +#include "error_private.h" +#include "zstd_internal.h" /* MIN, ZSTD_blockHeaderSize, ZSTD_BLOCKSIZE_MAX */ +#define ZBUFF_STATIC_LINKING_ONLY +#include "zbuff.h" + + +typedef enum { ZBUFFds_init, ZBUFFds_loadHeader, + ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage; + +/* *** Resource management *** */ +struct ZBUFF_DCtx_s { + ZSTD_DCtx* zd; + ZSTD_frameParams fParams; + ZBUFF_dStage stage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t blockSize; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + size_t lhSize; + ZSTD_customMem customMem; +}; /* typedef'd to ZBUFF_DCtx within "zbuff.h" */ + + +ZBUFF_DCtx* ZBUFF_createDCtx(void) +{ + return ZBUFF_createDCtx_advanced(defaultCustomMem); +} + +ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem) +{ + ZBUFF_DCtx* zbd; + + if (!customMem.customAlloc && !customMem.customFree) + customMem = defaultCustomMem; + + if (!customMem.customAlloc || !customMem.customFree) + return NULL; + + zbd = (ZBUFF_DCtx*)customMem.customAlloc(customMem.opaque, sizeof(ZBUFF_DCtx)); + if (zbd==NULL) return NULL; + memset(zbd, 0, sizeof(ZBUFF_DCtx)); + memcpy(&zbd->customMem, &customMem, sizeof(ZSTD_customMem)); + zbd->zd = ZSTD_createDCtx_advanced(customMem); + if (zbd->zd == NULL) { ZBUFF_freeDCtx(zbd); return NULL; } + zbd->stage = ZBUFFds_init; + return zbd; +} + +size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbd) +{ + if (zbd==NULL) return 0; /* support free on null */ + ZSTD_freeDCtx(zbd->zd); + if (zbd->inBuff) zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff); + if (zbd->outBuff) zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff); + zbd->customMem.customFree(zbd->customMem.opaque, zbd); + return 0; +} + + +/* *** Initialization *** */ + +size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbd, const void* dict, size_t dictSize) +{ + zbd->stage = ZBUFFds_loadHeader; + zbd->lhSize = zbd->inPos = zbd->outStart = zbd->outEnd = 0; + return ZSTD_decompressBegin_usingDict(zbd->zd, dict, dictSize); +} + +size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbd) +{ + return ZBUFF_decompressInitDictionary(zbd, NULL, 0); +} + + +/* internal util function */ +MEM_STATIC size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + memcpy(dst, src, length); + return length; +} + + +/* *** Decompression *** */ + +size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr) +{ + const char* const istart = (const char*)src; + const char* const iend = istart + *srcSizePtr; + const char* ip = istart; + char* const ostart = (char*)dst; + char* const oend = ostart + *dstCapacityPtr; + char* op = ostart; + U32 someMoreWork = 1; + + while (someMoreWork) { + switch(zbd->stage) + { + case ZBUFFds_init : + return ERROR(init_missing); + + case ZBUFFds_loadHeader : + { size_t const hSize = ZSTD_getFrameParams(&(zbd->fParams), zbd->headerBuffer, zbd->lhSize); + if (ZSTD_isError(hSize)) return hSize; + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zbd->lhSize; /* if hSize!=0, hSize > zbd->lhSize */ + if (toLoad > (size_t)(iend-ip)) { /* not enough input to load full header */ + memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip); + zbd->lhSize += iend-ip; + *dstCapacityPtr = 0; + return (hSize - zbd->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + memcpy(zbd->headerBuffer + zbd->lhSize, ip, toLoad); zbd->lhSize = hSize; ip += toLoad; + break; + } } + + /* Consume header */ + { size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zbd->zd); /* == ZSTD_frameHeaderSize_min */ + size_t const h1Result = ZSTD_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer, h1Size); + if (ZSTD_isError(h1Result)) return h1Result; /* should not happen : already checked */ + if (h1Size < zbd->lhSize) { /* long header */ + size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zbd->zd); + size_t const h2Result = ZSTD_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer+h1Size, h2Size); + if (ZSTD_isError(h2Result)) return h2Result; + } } + + zbd->fParams.windowSize = MAX(zbd->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + + /* Frame header instruct buffer sizes */ + { size_t const blockSize = MIN(zbd->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); + size_t const neededOutSize = zbd->fParams.windowSize + blockSize; + zbd->blockSize = blockSize; + if (zbd->inBuffSize < blockSize) { + zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff); + zbd->inBuffSize = blockSize; + zbd->inBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, blockSize); + if (zbd->inBuff == NULL) return ERROR(memory_allocation); + } + if (zbd->outBuffSize < neededOutSize) { + zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff); + zbd->outBuffSize = neededOutSize; + zbd->outBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, neededOutSize); + if (zbd->outBuff == NULL) return ERROR(memory_allocation); + } } + zbd->stage = ZBUFFds_read; + /* pass-through */ + + case ZBUFFds_read: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd); + if (neededInSize==0) { /* end of frame */ + zbd->stage = ZBUFFds_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + const int isSkipFrame = ZSTD_isSkipFrame(zbd->zd); + size_t const decodedSize = ZSTD_decompressContinue(zbd->zd, + zbd->outBuff + zbd->outStart, (isSkipFrame ? 0 : zbd->outBuffSize - zbd->outStart), + ip, neededInSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + ip += neededInSize; + if (!decodedSize && !isSkipFrame) break; /* this was just a header */ + zbd->outEnd = zbd->outStart + decodedSize; + zbd->stage = ZBUFFds_flush; + break; + } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zbd->stage = ZBUFFds_load; + /* pass-through */ + } + + case ZBUFFds_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd); + size_t const toLoad = neededInSize - zbd->inPos; /* should always be <= remaining space within inBuff */ + size_t loadedSize; + if (toLoad > zbd->inBuffSize - zbd->inPos) return ERROR(corruption_detected); /* should never happen */ + loadedSize = ZBUFF_limitCopy(zbd->inBuff + zbd->inPos, toLoad, ip, iend-ip); + ip += loadedSize; + zbd->inPos += loadedSize; + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + { const int isSkipFrame = ZSTD_isSkipFrame(zbd->zd); + size_t const decodedSize = ZSTD_decompressContinue(zbd->zd, + zbd->outBuff + zbd->outStart, zbd->outBuffSize - zbd->outStart, + zbd->inBuff, neededInSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + zbd->inPos = 0; /* input is consumed */ + if (!decodedSize && !isSkipFrame) { zbd->stage = ZBUFFds_read; break; } /* this was just a header */ + zbd->outEnd = zbd->outStart + decodedSize; + zbd->stage = ZBUFFds_flush; + /* pass-through */ + } } + + case ZBUFFds_flush: + { size_t const toFlushSize = zbd->outEnd - zbd->outStart; + size_t const flushedSize = ZBUFF_limitCopy(op, oend-op, zbd->outBuff + zbd->outStart, toFlushSize); + op += flushedSize; + zbd->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zbd->stage = ZBUFFds_read; + if (zbd->outStart + zbd->blockSize > zbd->outBuffSize) + zbd->outStart = zbd->outEnd = 0; + break; + } + /* cannot flush everything */ + someMoreWork = 0; + break; + } + default: return ERROR(GENERIC); /* impossible */ + } } + + /* result */ + *srcSizePtr = ip-istart; + *dstCapacityPtr = op-ostart; + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbd->zd); + if (!nextSrcSizeHint) return (zbd->outEnd != zbd->outStart); /* return 0 only if fully flushed too */ + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zbd->zd) == ZSTDnit_block); + if (zbd->inPos > nextSrcSizeHint) return ERROR(GENERIC); /* should never happen */ + nextSrcSizeHint -= zbd->inPos; /* already loaded*/ + return nextSrcSizeHint; + } +} + + +/* ************************************* +* Tool functions +***************************************/ +size_t ZBUFF_recommendedDInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize /* block header size*/ ; } +size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } diff --git a/contrib/zstd/zdict.c b/contrib/zstd/zdict.c new file mode 100644 index 0000000..adfe55c --- /dev/null +++ b/contrib/zstd/zdict.c @@ -0,0 +1,1005 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + +/*-************************************** +* Tuning parameters +****************************************/ +#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20) +#define ZDICT_MIN_SAMPLES_SIZE 512 + + +/*-************************************** +* Compiler Options +****************************************/ +/* Unix Large Files support (>4GB) */ +#define _FILE_OFFSET_BITS 64 +#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */ +# define _LARGEFILE_SOURCE +#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */ +# define _LARGEFILE64_SOURCE +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc, free */ +#include /* memset */ +#include /* fprintf, fopen, ftello64 */ +#include /* clock */ + +#include "mem.h" /* read */ +#include "error_private.h" +#include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */ +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "zstd_internal.h" /* includes zstd.h */ +#include "xxhash.h" +#include "divsufsort.h" +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif +#include "zdict.h" + + +/*-************************************* +* Constants +***************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define DICTLISTSIZE_DEFAULT 10000 + +#define NOISELENGTH 32 + +#define MINRATIO 4 +static const int g_compressionLevel_default = 5; +static const U32 g_selectivity_default = 9; +static const size_t g_provision_entropySize = 200; +static const size_t g_min_fast_dictContent = 192; + + +/*-************************************* +* Console display +***************************************/ +#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static unsigned g_displayLevel = 0; /* 0 : no display; 1: errors; 2: default; 4: full information */ + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if (ZDICT_clockSpan(g_time) > refreshRate) \ + { g_time = clock(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stdout); } } +static const clock_t refreshRate = CLOCKS_PER_SEC * 3 / 10; +static clock_t g_time = 0; + +static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; } + +static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length) +{ + const BYTE* const b = (const BYTE*)ptr; + size_t u; + for (u=0; u126) c = '.'; /* non-printable char */ + DISPLAYLEVEL(dlevel, "%c", c); + } +} + + +/*-******************************************************** +* Helper functions +**********************************************************/ +unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); } + +const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } + +unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dictBuffer) != ZSTD_DICT_MAGIC) return 0; + return MEM_readLE32((const char*)dictBuffer + 4); +} + + +/*-******************************************************** +* Dictionary training functions +**********************************************************/ +static unsigned ZDICT_NbCommonBytes (register size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +/*! ZDICT_count() : + Count the nb of common bytes between 2 pointers. + Note : this function presumes end of buffer followed by noisy guard band. +*/ +static size_t ZDICT_count(const void* pIn, const void* pMatch) +{ + const char* const pStart = (const char*)pIn; + for (;;) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { + pIn = (const char*)pIn+sizeof(size_t); + pMatch = (const char*)pMatch+sizeof(size_t); + continue; + } + pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff); + return (size_t)((const char*)pIn - pStart); + } +} + + +typedef struct { + U32 pos; + U32 length; + U32 savings; +} dictItem; + +static void ZDICT_initDictItem(dictItem* d) +{ + d->pos = 1; + d->length = 0; + d->savings = (U32)(-1); +} + + +#define LLIMIT 64 /* heuristic determined experimentally */ +#define MINMATCHLENGTH 7 /* heuristic determined experimentally */ +static dictItem ZDICT_analyzePos( + BYTE* doneMarks, + const int* suffix, U32 start, + const void* buffer, U32 minRatio) +{ + U32 lengthList[LLIMIT] = {0}; + U32 cumulLength[LLIMIT] = {0}; + U32 savings[LLIMIT] = {0}; + const BYTE* b = (const BYTE*)buffer; + size_t length; + size_t maxLength = LLIMIT; + size_t pos = suffix[start]; + U32 end = start; + dictItem solution; + + /* init */ + memset(&solution, 0, sizeof(solution)); + doneMarks[pos] = 1; + + /* trivial repetition cases */ + if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2)) + ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3)) + ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) { + /* skip and mark segment */ + U16 u16 = MEM_read16(b+pos+4); + U32 u, e = 6; + while (MEM_read16(b+pos+e) == u16) e+=2 ; + if (b[pos+e] == b[pos+e-1]) e++; + for (u=1; u=MINMATCHLENGTH); + + /* look backward */ + do { + length = ZDICT_count(b + pos, b + *(suffix+start-1)); + if (length >=MINMATCHLENGTH) start--; + } while(length >= MINMATCHLENGTH); + + /* exit if not found a minimum nb of repetitions */ + if (end-start < minRatio) { + U32 idx; + for(idx=start; idx= %i at pos %7u ", (U32)(end-start), MINMATCHLENGTH, (U32)pos); + DISPLAYLEVEL(4, "\n"); + + for (searchLength = MINMATCHLENGTH ; ; searchLength++) { + BYTE currentChar = 0; + U32 currentCount = 0; + U32 currentID = refinedStart; + U32 id; + U32 selectedCount = 0; + U32 selectedID = currentID; + for (id =refinedStart; id < refinedEnd; id++) { + if (b[ suffix[id] + searchLength] != currentChar) { + if (currentCount > selectedCount) { + selectedCount = currentCount; + selectedID = currentID; + } + currentID = id; + currentChar = b[ suffix[id] + searchLength]; + currentCount = 0; + } + currentCount ++; + } + if (currentCount > selectedCount) { /* for last */ + selectedCount = currentCount; + selectedID = currentID; + } + + if (selectedCount < minRatio) + break; + refinedStart = selectedID; + refinedEnd = refinedStart + selectedCount; + } + + /* evaluate gain based on new ref */ + start = refinedStart; + pos = suffix[refinedStart]; + end = start; + memset(lengthList, 0, sizeof(lengthList)); + + /* look forward */ + do { + end++; + length = ZDICT_count(b + pos, b + suffix[end]); + if (length >= LLIMIT) length = LLIMIT-1; + lengthList[length]++; + } while (length >=MINMATCHLENGTH); + + /* look backward */ + length = MINMATCHLENGTH; + while ((length >= MINMATCHLENGTH) & (start > 0)) { + length = ZDICT_count(b + pos, b + suffix[start - 1]); + if (length >= LLIMIT) length = LLIMIT - 1; + lengthList[length]++; + if (length >= MINMATCHLENGTH) start--; + } + + /* largest useful length */ + memset(cumulLength, 0, sizeof(cumulLength)); + cumulLength[maxLength-1] = lengthList[maxLength-1]; + for (i=(int)(maxLength-2); i>=0; i--) + cumulLength[i] = cumulLength[i+1] + lengthList[i]; + + for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break; + maxLength = i; + + /* reduce maxLength in case of final into repetitive data */ + { U32 l = (U32)maxLength; + BYTE const c = b[pos + maxLength-1]; + while (b[pos+l-2]==c) l--; + maxLength = l; + } + if (maxLength < MINMATCHLENGTH) return solution; /* skip : no long-enough solution */ + + /* calculate savings */ + savings[5] = 0; + for (i=MINMATCHLENGTH; i<=(int)maxLength; i++) + savings[i] = savings[i-1] + (lengthList[i] * (i-3)); + + DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f) \n", + (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength); + + solution.pos = (U32)pos; + solution.length = (U32)maxLength; + solution.savings = savings[maxLength]; + + /* mark positions done */ + { U32 id; + for (id=start; id solution.length) length = solution.length; + } + pEnd = (U32)(testedPos + length); + for (p=testedPos; ppos; + const U32 max = elt.pos + (elt.length-1); + + /* tail overlap */ + U32 u; for (u=1; u elt.pos) && (table[u].pos < max)) { /* overlap */ + /* append */ + U32 addedLength = table[u].pos - elt.pos; + table[u].length += addedLength; + table[u].pos = elt.pos; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + table[u].savings += elt.length / 8; /* rough approx */ + elt = table[u]; + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } } + + /* front overlap */ + for (u=1; u elt.pos) && (table[u].pos < elt.pos)) { /* overlap */ + /* append */ + int addedLength = (elt.pos + elt.length) - (table[u].pos + table[u].length); + table[u].savings += elt.length / 8; /* rough approx */ + if (addedLength > 0) { /* otherwise, already included */ + table[u].length += addedLength; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + } + elt = table[u]; + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } } + + return 0; +} + + +static void ZDICT_removeDictItem(dictItem* table, U32 id) +{ + /* convention : first element is nb of elts */ + U32 const max = table->pos; + U32 u; + if (!id) return; /* protection, should never happen */ + for (u=id; upos--; +} + + +static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt) +{ + /* merge if possible */ + U32 mergeId = ZDICT_checkMerge(table, elt, 0); + if (mergeId) { + U32 newMerge = 1; + while (newMerge) { + newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId); + if (newMerge) ZDICT_removeDictItem(table, mergeId); + mergeId = newMerge; + } + return; + } + + /* insert */ + { U32 current; + U32 nextElt = table->pos; + if (nextElt >= maxSize) nextElt = maxSize-1; + current = nextElt-1; + while (table[current].savings < elt.savings) { + table[current+1] = table[current]; + current--; + } + table[current+1] = elt; + table->pos = nextElt+1; + } +} + + +static U32 ZDICT_dictSize(const dictItem* dictList) +{ + U32 u, dictSize = 0; + for (u=1; u ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (U32)(ZDICT_MAX_SAMPLES_SIZE>>20)); + while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles]; + + /* sort */ + DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20)); + { int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0); + if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; } + } + suffix[bufferSize] = (int)bufferSize; /* leads into noise */ + suffix0[0] = (int)bufferSize; /* leads into noise */ + /* build reverse suffix sort */ + { size_t pos; + for (pos=0; pos < bufferSize; pos++) + reverseSuffix[suffix[pos]] = (U32)pos; + /* build file pos */ + filePos[0] = 0; + for (pos=1; pos> 21); + } +} + + +typedef struct +{ + ZSTD_CCtx* ref; + ZSTD_CCtx* zc; + void* workPlace; /* must be ZSTD_BLOCKSIZE_ABSOLUTEMAX allocated */ +} EStats_ress_t; + +#define MAXREPOFFSET 1024 + +static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params, + U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets, + const void* src, size_t srcSize) +{ + size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << params.cParams.windowLog); + size_t cSize; + + if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ + { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref); + if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; } + } + cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize); + if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; } + + if (cSize) { /* if == 0; block is not compressible */ + const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc); + + /* literals stats */ + { const BYTE* bytePtr; + for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) + countLit[*bytePtr]++; + } + + /* seqStats */ + { U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + ZSTD_seqToCodes(seqStorePtr); + + { const BYTE* codePtr = seqStorePtr->ofCode; + U32 u; + for (u=0; umlCode; + U32 u; + for (u=0; ullCode; + U32 u; + for (u=0; u= 2) { /* rep offsets */ + const seqDef* const seq = seqStorePtr->sequencesStart; + U32 offset1 = seq[0].offset - 3; + U32 offset2 = seq[1].offset - 3; + if (offset1 >= MAXREPOFFSET) offset1 = 0; + if (offset2 >= MAXREPOFFSET) offset2 = 0; + repOffsets[offset1] += 3; + repOffsets[offset2] += 1; + } } } +} + +/* +static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles) +{ + unsigned u; + size_t max=0; + for (u=0; u0; u--) { + offsetCount_t tmp; + if (table[u-1].count >= table[u].count) break; + tmp = table[u-1]; + table[u-1] = table[u]; + table[u] = tmp; + } +} + + +#define OFFCODE_MAX 30 /* only applicable to first block */ +static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, + unsigned compressionLevel, + const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles, + const void* dictBuffer, size_t dictBufferSize) +{ + U32 countLit[256]; + HUF_CREATE_STATIC_CTABLE(hufTable, 255); + U32 offcodeCount[OFFCODE_MAX+1]; + short offcodeNCount[OFFCODE_MAX+1]; + U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB)); + U32 matchLengthCount[MaxML+1]; + short matchLengthNCount[MaxML+1]; + U32 litLengthCount[MaxLL+1]; + short litLengthNCount[MaxLL+1]; + U32 repOffset[MAXREPOFFSET]; + offsetCount_t bestRepOffset[ZSTD_REP_NUM+1]; + EStats_ress_t esr; + ZSTD_parameters params; + U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total; + size_t pos = 0, errorCode; + size_t eSize = 0; + size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles); + size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles); + BYTE* dstPtr = (BYTE*)dstBuffer; + + /* init */ + esr.ref = ZSTD_createCCtx(); + esr.zc = ZSTD_createCCtx(); + esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX); + if (!esr.ref || !esr.zc || !esr.workPlace) { + eSize = ERROR(memory_allocation); + DISPLAYLEVEL(1, "Not enough memory \n"); + goto _cleanup; + } + if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */ + for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */ + for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1; + for (u=0; u<=MaxML; u++) matchLengthCount[u]=1; + for (u=0; u<=MaxLL; u++) litLengthCount[u]=1; + memset(repOffset, 0, sizeof(repOffset)); + repOffset[1] = repOffset[4] = repOffset[8] = 1; + memset(bestRepOffset, 0, sizeof(bestRepOffset)); + if (compressionLevel==0) compressionLevel=g_compressionLevel_default; + params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize); + { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0); + if (ZSTD_isError(beginResult)) { + eSize = ERROR(GENERIC); + DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n"); + goto _cleanup; + } } + + /* collect stats on all files */ + for (u=0; u 30) ? MINRATIO : nbSamples >> selectivity; + size_t const targetDictSize = maxDictSize; + size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + size_t dictSize = 0; + + /* checks */ + if (!dictList) return ERROR(memory_allocation); + if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); } + if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return 0; } /* not enough source to create dictionary */ + + /* init */ + ZDICT_initDictItem(dictList); + g_displayLevel = params.notificationLevel; + + /* build dictionary */ + ZDICT_trainBuffer(dictList, dictListSize, + samplesBuffer, samplesBuffSize, + samplesSizes, nbSamples, + minRep); + + /* display best matches */ + if (g_displayLevel>= 3) { + U32 const nb = MIN(25, dictList[0].pos); + U32 const dictContentSize = ZDICT_dictSize(dictList); + U32 u; + DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize); + DISPLAYLEVEL(3, "list %u best segments \n", nb); + for (u=1; u<=nb; u++) { + U32 pos = dictList[u].pos; + U32 length = dictList[u].length; + U32 printedLength = MIN(40, length); + DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", + u, length, pos, dictList[u].savings); + ZDICT_printHex(3, (const char*)samplesBuffer+pos, printedLength); + DISPLAYLEVEL(3, "| \n"); + } } + + + /* create dictionary */ + { U32 dictContentSize = ZDICT_dictSize(dictList); + if (dictContentSize < targetDictSize/2) { + DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize); + if (minRep > MINRATIO) { + DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1); + DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n"); + } + if (samplesBuffSize < 10 * targetDictSize) + DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20)); + } + + if ((dictContentSize > targetDictSize*2) && (nbSamples > 2*MINRATIO) && (selectivity>1)) { + U32 proposedSelectivity = selectivity-1; + while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; } + DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize); + DISPLAYLEVEL(2, "! you may consider decreasing selectivity to produce denser dictionary (-s%u) \n", proposedSelectivity); + DISPLAYLEVEL(2, "! but test its efficiency on samples \n"); + } + + /* limit dictionary size */ + { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */ + U32 currentSize = 0; + U32 n; for (n=1; n targetDictSize) { currentSize -= dictList[n].length; break; } + } + dictList->pos = n; + dictContentSize = currentSize; + } + + /* build dict content */ + { U32 u; + BYTE* ptr = (BYTE*)dictBuffer + maxDictSize; + for (u=1; upos; u++) { + U32 l = dictList[u].length; + ptr -= l; + if (ptr<(BYTE*)dictBuffer) { free(dictList); return ERROR(GENERIC); } /* should not happen */ + memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l); + } } + + dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize, + samplesBuffer, samplesSizes, nbSamples, + params); + } + + /* clean up */ + free(dictList); + return dictSize; +} + + +/* issue : samplesBuffer need to be followed by a noisy guard band. +* work around : duplicate the buffer, and add the noise */ +size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t params) +{ + size_t result; + void* newBuff; + size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */ + + newBuff = malloc(sBuffSize + NOISELENGTH); + if (!newBuff) return ERROR(memory_allocation); + + memcpy(newBuff, samplesBuffer, sBuffSize); + ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ + + result = ZDICT_trainFromBuffer_unsafe( + dictBuffer, dictBufferCapacity, + newBuff, samplesSizes, nbSamples, + params); + free(newBuff); + return result; +} + + +size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_params_t params; + memset(¶ms, 0, sizeof(params)); + return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + params); +} + +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_params_t params; + memset(¶ms, 0, sizeof(params)); + return ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + params); +} diff --git a/contrib/zstd/zdict.h b/contrib/zstd/zdict.h new file mode 100644 index 0000000..c84aedd --- /dev/null +++ b/contrib/zstd/zdict.h @@ -0,0 +1,111 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#ifndef DICTBUILDER_H_001 +#define DICTBUILDER_H_001 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*====== Dependencies ======*/ +#include /* size_t */ + + +/*====== Export for Windows ======*/ +/*! +* ZSTD_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZDICTLIB_API __declspec(dllexport) +#else +# define ZDICTLIB_API +#endif + + +/*! ZDICT_trainFromBuffer() : + Train a dictionary from an array of samples. + Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + The resulting dictionary will be saved into `dictBuffer`. + @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + or an error code, which can be tested with ZDICT_isError(). + Tips : In general, a reasonable dictionary has a size of ~ 100 KB. + It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`. + In general, it's recommended to provide a few thousands samples, but this can vary a lot. + It's recommended that total size of all samples be about ~x100 times the target size of dictionary. +*/ +ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + + +/*====== Helper functions ======*/ +ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ +ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); +ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); + + + +#ifdef ZDICT_STATIC_LINKING_ONLY + +/* ==================================================================================== + * The definitions in this section are considered experimental. + * They should never be used with a dynamic library, as they may change in the future. + * They are provided for advanced usages. + * Use them only in association with static linking. + * ==================================================================================== */ + +typedef struct { + unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ + int compressionLevel; /* 0 means default; target a specific zstd compression level */ + unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ + unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */ + unsigned reserved[2]; /* space for future parameters */ +} ZDICT_params_t; + + +/*! ZDICT_trainFromBuffer_advanced() : + Same as ZDICT_trainFromBuffer() with control over more parameters. + `parameters` is optional and can be provided with values set to 0 to mean "default". + @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`), + or an error code, which can be tested by ZDICT_isError(). + note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0. +*/ +size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t parameters); + + +/*! ZDICT_addEntropyTablesFromBuffer() : + + Given a content-only dictionary (built using any 3rd party algorithm), + add entropy tables computed from an array of samples. + Samples must be stored concatenated in a flat buffer `samplesBuffer`, + supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. + + The input dictionary content must be stored *at the end* of `dictBuffer`. + Its size is `dictContentSize`. + The resulting dictionary with added entropy tables will be *written back to `dictBuffer`*, + starting from its beginning. + @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`). +*/ +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + + + +#endif /* ZDICT_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif + +#endif /* DICTBUILDER_H_001 */ diff --git a/contrib/zstd/zstd.h b/contrib/zstd/zstd.h new file mode 100644 index 0000000..bd5d68b --- /dev/null +++ b/contrib/zstd/zstd.h @@ -0,0 +1,588 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*====== Dependency ======*/ +#include /* size_t */ + + +/*====== Export for Windows ======*/ +/*! +* ZSTD_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) +#else +# define ZSTDLIB_API +#endif + + +/*======= Version =======*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 0 +#define ZSTD_VERSION_RELEASE 0 + +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) + +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) +ZSTDLIB_API unsigned ZSTD_versionNumber (void); + + +/* ************************************* +* Simple API +***************************************/ +/*! ZSTD_compress() : + Compresses `src` buffer into already allocated `dst`. + Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + @return : the number of bytes written into `dst` (<= `dstCapacity), + or an error code if it fails (which can be tested using ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_getDecompressedSize() : +* @return : decompressed size as a 64-bits value _if known_, 0 otherwise. +* note 1 : decompressed size can be very large (64-bits value), +* potentially larger than what local system can handle as a single memory segment. +* In which case, it's necessary to use streaming mode to decompress data. +* note 2 : decompressed size is an optional field, that may not be present. +* When `return==0`, data to decompress can have any size. +* In which case, it's necessary to use streaming mode to decompress data. +* Optionally, application may rely on its own implied limits. +* (For example, application data could be necessarily cut into blocks <= 16 KB). +* note 3 : decompressed size could be wrong or intentionally modified ! +* Always ensure result fits within application's authorized limits ! +* Each application can set its own limits. +* note 4 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_decompress() : + `compressedSize` : must be the _exact_ size of compressed input, otherwise decompression will fail. + `dstCapacity` must be equal or larger than originalSize (see ZSTD_getDecompressedSize() ). + If originalSize is unknown, and if there is no implied application-specific limitations, + it's preferable to use streaming mode to decompress data. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + + +/*====== Helper functions ======*/ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ + + +/*-************************************* +* Explicit memory management +***************************************/ +/** Compression context */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); + +/** ZSTD_compressCCtx() : + Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()) */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); + +/** Decompression context */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); + +/** ZSTD_decompressDCtx() : +* Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-************************ +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : +* Compression using a predefined Dictionary (see dictBuilder/zdict.h). +* Note : This function load the dictionary, resulting in significant startup delay. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : +* Decompression using a predefined Dictionary (see dictBuilder/zdict.h). +* Dictionary must be identical to the one used during compression. +* Note : This function load the dictionary, resulting in significant startup delay */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*-************************** +* Fast Dictionary API +****************************/ +/*! ZSTD_createCDict() : +* Create a digested dictionary, ready to start compression operation without startup delay. +* `dict` can be released after ZSTD_CDict creation */ +typedef struct ZSTD_CDict_s ZSTD_CDict; +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : +* Compression using a digested Dictionary. +* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. +* Note that compression level is decided during dictionary creation */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + +/*! ZSTD_createDDict() : +* Create a digested dictionary, ready to start decompression operation without startup delay. +* `dict` can be released after creation */ +typedef struct ZSTD_DDict_s ZSTD_DDict; +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a digested Dictionary +* Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/*-************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + + +/*====== streaming compression ======*/ + +/*-*********************************************************************** +* Streaming compression - howto +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* +* Start by initializing ZSTD_CStream. +* Use ZSTD_initCStream() to start a new compression operation. +* Use ZSTD_initCStream_usingDict() for a compression which requires a dictionary. +* +* Use ZSTD_compressStream() repetitively to consume input stream. +* The function will automatically update both `pos`. +* Note that it may not consume the entire input, in which case `pos < size`, +* and it's up to the caller to present again remaining data. +* @return : a size hint, preferred nb of bytes to use as input for next function call +* (it's just a hint, to help latency a little, any other value will work fine) +* (note : the size hint is guaranteed to be <= ZSTD_CStreamInSize() ) +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data remains within buffer, using ZSTD_flushStream(). +* `output->pos` will be updated. +* Note some content might still be left within internal buffer if `output->size` is too small. +* @return : nb of bytes still present within internal buffer (0 if it's empty) +* or an error code, which can be tested using ZSTD_isError(). +* +* ZSTD_endStream() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* Similar to ZSTD_flushStream(), it may not be able to flush the full content if `output->size` is too small. +* In which case, call again ZSTD_endStream() to complete the flush. +* @return : nb of bytes still present within internal buffer (0 if it's empty) +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef struct ZSTD_CStream_s ZSTD_CStream; +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); + +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer */ + +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + + +/*====== decompression ======*/ + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation, +* or ZSTD_initDStream_usingDict() if decompression requires a dictionary. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos`. +* Note that it may not consume the entire input (pos < size), +* in which case it's up to the caller to present remaining input again. +* @return : 0 when a frame is completely decoded and fully flushed, +* 1 when there is still some data left within internal buffer to flush, +* >1 when more data is expected, with value being a suggested next input size (it's just a hint, which helps latency, any size is accepted), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************************/ + +typedef struct ZSTD_DStream_s ZSTD_DStream; +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer */ + +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + + + +#ifdef ZSTD_STATIC_LINKING_ONLY + +/* ==================================================================================== + * The definitions in this section are considered experimental. + * They should never be used with a dynamic library, as they may change in the future. + * They are provided for advanced usages. + * Use them only in association with static linking. + * ==================================================================================== */ + +/*--- Constants ---*/ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U + +#define ZSTD_WINDOWLOG_MAX_32 25 +#define ZSTD_WINDOWLOG_MAX_64 27 +#define ZSTD_WINDOWLOG_MAX ((U32)(MEM_32bits() ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 18 +#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) +#define ZSTD_CHAINLOG_MIN 4 +#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX +#define ZSTD_HASHLOG_MIN 12 +#define ZSTD_HASHLOG3_MAX 17 +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_SEARCHLENGTH_MAX 7 +#define ZSTD_SEARCHLENGTH_MIN 3 +#define ZSTD_TARGETLENGTH_MIN 4 +#define ZSTD_TARGETLENGTH_MAX 999 + +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ +static const size_t ZSTD_frameHeaderSize_min = 5; +static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; +static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */ + + +/*--- Types ---*/ +typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy; /* from faster to stronger */ + +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned searchLength; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; +} ZSTD_compressionParameters; + +typedef struct { + unsigned contentSizeFlag; /**< 1: content size will be in frame header (if known). */ + unsigned checksumFlag; /**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */ + unsigned noDictIDFlag; /**< 1: no dict ID will be saved into frame header (if dictionary compression) */ +} ZSTD_frameParameters; + +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +/* custom memory allocation functions */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; + + +/*-************************************* +* Advanced compression functions +***************************************/ +/*! ZSTD_estimateCCtxSize() : + * Gives the amount of memory allocated for a ZSTD_CCtx given a set of compression parameters. + * `frameContentSize` is an optional parameter, provide `0` if unknown */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams); + +/*! ZSTD_createCCtx_advanced() : + * Create a ZSTD compression context using external alloc and free functions */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); + +/*! ZSTD_createCDict_advanced() : + * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_parameters params, ZSTD_customMem customMem); + +/*! ZSTD_sizeofCCtx() : + * Gives the amount of memory used by a given ZSTD_CCtx */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); + +/*! ZSTD_getParams() : +* same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`. +* All fields of `ZSTD_frameParameters` are set to default (0) */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_getCParams() : +* @return ZSTD_compressionParameters structure for a selected compression level and srcSize. +* `srcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_checkCParams() : +* Ensure param values remain within authorized range */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustCParams() : +* optimize params for a given `srcSize` and `dictSize`. +* both values are optional, select `0` if unknown. */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : +* Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter */ +ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + + +/*--- Advanced Decompression functions ---*/ + +/*! ZSTD_estimateDCtxSize() : + * Gives the potential amount of memory allocated to create a ZSTD_DCtx */ +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_createDCtx_advanced() : + * Create a ZSTD decompression context using external alloc and free functions */ +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); + +/*! ZSTD_sizeofDCtx() : + * Gives the amount of memory used by a given ZSTD_DCtx */ +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); + + +/* ****************************************************************** +* Advanced Streaming functions +********************************************************************/ + +/*====== compression ======*/ + +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); + + +/*====== decompression ======*/ + +typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e; + +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); + +ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); + + +/* ****************************************************************** +* Buffer-less and synchronous inner streaming functions +********************************************************************/ +/* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with many restrictions (documented below). +* Prefer using normal streaming API for an easier experience */ + +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx); + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffer only. + - Interface is synchronous : input is consumed entirely and produce 1+ (or more) compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame, + Without last block mark, frames will be considered unfinished (broken) by decoders. + + You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame. +*/ + +typedef struct { + unsigned long long frameContentSize; + unsigned windowSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameParams; + +ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input, see details below */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + +/* + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameParams(). + It fills a ZSTD_frameParams structure which provide important information to correctly decode the frame, + such as the minimum rolling buffer size to allocate to decompress data (`windowSize`), + and the dictionary ID used. + (Note : content size is optional, it may not be present. 0 means : content size unknown). + Note that these values could be wrong, either because of data malformation, or because an attacker is spoofing deliberate false information. + As a consequence, check that values remain within valid application range, especially `windowSize`, before allocation. + Each application can set its own limit, depending on local restrictions. For extended interoperability, it is recommended to support at least 8 MB. + Frame parameters are extracted from the beginning of the compressed frame. + Data fragment must be large enough to ensure successful decoding, typically `ZSTD_frameHeaderSize_max` bytes. + @result : 0 : successful decoding, the `ZSTD_frameParams` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict(). + Alternatively, you can copy a prepared context, using ZSTD_copyDCtx(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize`. + They should preferably be located contiguously, prior to current block. + Alternatively, a round buffer of sufficient size is also possible. Sufficient size is determined by frame parameters. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference. + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_decompressContinue() always returns 0. + For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable. + It also returns Frame Size as fparamsPtr->frameContentSize. +*/ + + +/* ************************************** +* Block functions +****************************************/ +/*! Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). + User will have to take in charge required information to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : ZSTD_compressBegin() + + decompression : ZSTD_decompressBegin() + + variants _usingDict() are also allowed + + copyCCtx() and copyDCtx() work too + - Block size is limited, it must be <= ZSTD_getBlockSizeMax() + + If you need to compress more, cut data into multiple blocks + + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero. + In which case, nothing is produced into `dst`. + + User must test for such outcome and deal directly with uncompressed data + + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!! + + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history. + Use ZSTD_insertBlock() in such a case. +*/ + +#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) /* define, for static allocation */ +ZSTDLIB_API size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert block into `dctx` history. Useful for uncompressed blocks */ + + +#endif /* ZSTD_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_H_235446 */ diff --git a/contrib/zstd/zstd_common.c b/contrib/zstd/zstd_common.c new file mode 100644 index 0000000..54bc91c --- /dev/null +++ b/contrib/zstd/zstd_common.c @@ -0,0 +1,83 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc */ +#include "error_private.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */ +#include "zbuff.h" /* declaration of ZBUFF_isError, ZBUFF_getErrorName */ + + +/*-**************************************** +* Version +******************************************/ +unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; } + + +/*-**************************************** +* ZSTD Error Management +******************************************/ +/*! ZSTD_isError() : +* tells if a return value is an error code */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + +/*! ZSTD_getErrorName() : +* provides error code string from function result (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + +/*! ZSTD_getError() : +* convert a `size_t` function result into a proper ZSTD_errorCode enum */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } + +/*! ZSTD_getErrorString() : +* provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); } + + +/* ************************************************************** +* ZBUFF Error Management +****************************************************************/ +unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); } + +const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } + + + +/*=************************************************************** +* Custom allocator +****************************************************************/ +/* default uses stdlib */ +void* ZSTD_defaultAllocFunction(void* opaque, size_t size) +{ + void* address = malloc(size); + (void)opaque; + return address; +} + +void ZSTD_defaultFreeFunction(void* opaque, void* address) +{ + (void)opaque; + free(address); +} + +void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) +{ + return customMem.customAlloc(customMem.opaque, size); +} + +void ZSTD_free(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) + customMem.customFree(customMem.opaque, ptr); +} diff --git a/contrib/zstd/zstd_compress.c b/contrib/zstd/zstd_compress.c new file mode 100644 index 0000000..0116136 --- /dev/null +++ b/contrib/zstd/zstd_compress.c @@ -0,0 +1,3165 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include /* memset */ +#include "mem.h" +#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#include "xxhash.h" /* XXH_reset, update, digest */ +#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "zstd_internal.h" /* includes zstd.h */ + + +/*-************************************* +* Constants +***************************************/ +static const U32 g_searchStrength = 8; /* control skip over incompressible data */ +#define HASH_READ_SIZE 8 +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; + + +/*-************************************* +* Helper functions +***************************************/ +size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; } + + +/*-************************************* +* Sequence storage +***************************************/ +static void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthID = 0; +} + + +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CCtx_s +{ + const BYTE* nextSrc; /* next block here to continue on current prefix */ + const BYTE* base; /* All regular indexes relative to this position */ + const BYTE* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more data */ + U32 nextToUpdate; /* index from which to continue dictionary update */ + U32 nextToUpdate3; /* index from which to continue dictionary update */ + U32 hashLog3; /* dispatch table : larger == faster, more memory */ + U32 loadedDictEnd; + ZSTD_compressionStage_e stage; + U32 rep[ZSTD_REP_NUM]; + U32 savedRep[ZSTD_REP_NUM]; + U32 dictID; + ZSTD_parameters params; + void* workSpace; + size_t workSpaceSize; + size_t blockSize; + U64 frameContentSize; + XXH64_state_t xxhState; + ZSTD_customMem customMem; + + seqStore_t seqStore; /* sequences storage ptrs */ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + HUF_CElt* hufTable; + U32 flagStaticTables; + FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; +}; + +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return ZSTD_createCCtx_advanced(defaultCustomMem); +} + +ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_CCtx* cctx; + + if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; + if (!customMem.customAlloc || !customMem.customFree) return NULL; + + cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); + if (!cctx) return NULL; + memset(cctx, 0, sizeof(ZSTD_CCtx)); + memcpy(&(cctx->customMem), &customMem, sizeof(ZSTD_customMem)); + return cctx; +} + +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support free on NULL */ + ZSTD_free(cctx->workSpace, cctx->customMem); + ZSTD_free(cctx, cctx->customMem); + return 0; /* reserved as a potential error code in the future */ +} + +size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +{ + return sizeof(*cctx) + cctx->workSpaceSize; +} + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */ +{ + return &(ctx->seqStore); +} + + +#define CLAMP(val,min,max) { if (valmax) val=max; } +#define CLAMPCHECK(val,min,max) { if ((valmax)) return ERROR(compressionParameter_unsupported); } + +/** ZSTD_checkParams() : + ensure param values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); + CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); + CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); + CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); + { U32 const searchLengthMin = (cParams.strategy == ZSTD_fast || cParams.strategy == ZSTD_greedy) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN; + U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; + CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); } + CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); + if ((U32)(cParams.strategy) > (U32)ZSTD_btopt) return ERROR(compressionParameter_unsupported); + return 0; +} + + +/** ZSTD_checkCParams_advanced() : + temporary work-around, while the compressor compatibility remains limited regarding windowLog < 18 */ +size_t ZSTD_checkCParams_advanced(ZSTD_compressionParameters cParams, U64 srcSize) +{ + if (srcSize > (1ULL << ZSTD_WINDOWLOG_MIN)) return ZSTD_checkCParams(cParams); + if (cParams.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) return ERROR(compressionParameter_unsupported); + if (srcSize <= (1ULL << cParams.windowLog)) cParams.windowLog = ZSTD_WINDOWLOG_MIN; /* fake value - temporary work around */ + if (srcSize <= (1ULL << cParams.chainLog)) cParams.chainLog = ZSTD_CHAINLOG_MIN; /* fake value - temporary work around */ + if ((srcSize <= (1ULL << cParams.hashLog)) && ((U32)cParams.strategy < (U32)ZSTD_btlazy2)) cParams.hashLog = ZSTD_HASHLOG_MIN; /* fake value - temporary work around */ + return ZSTD_checkCParams(cParams); +} + + +/** ZSTD_adjustCParams() : + optimize cPar for a given input (`srcSize` and `dictSize`). + mostly downsizing to reduce memory consumption and initialization. + Both `srcSize` and `dictSize` are optional (use 0 if unknown), + but if both are 0, no optimization can be done. + Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */ +ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize) +{ + if (srcSize+dictSize == 0) return cPar; /* no size information available : no adjustment */ + + /* resize params, to use less memory when necessary */ + { U32 const minSrcSize = (srcSize==0) ? 500 : 0; + U64 const rSize = srcSize + dictSize + minSrcSize; + if (rSize < ((U64)1< srcLog) cPar.windowLog = srcLog; + } } + if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog; + { U32 const btPlus = (cPar.strategy == ZSTD_btlazy2) || (cPar.strategy == ZSTD_btopt); + U32 const maxChainLog = cPar.windowLog+btPlus; + if (cPar.chainLog > maxChainLog) cPar.chainLog = maxChainLog; } /* <= ZSTD_CHAINLOG_MAX */ + + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ + if ((cPar.hashLog < ZSTD_HASHLOG_MIN) && ( (U32)cPar.strategy >= (U32)ZSTD_btlazy2)) cPar.hashLog = ZSTD_HASHLOG_MIN; /* required to ensure collision resistance in bt */ + + return cPar; +} + + +size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams) +{ + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog); + U32 const divider = (cParams.searchLength==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = blockSize + 11*maxNbSeq; + + size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog); + size_t const hSize = ((size_t)1) << cParams.hashLog; + U32 const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog); + size_t const h3Size = ((size_t)1) << hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + + size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog); + size_t const h3Size = ((size_t)1) << hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + void* ptr; + + /* Check if workSpace is large enough, alloc a new one if needed */ + { size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<workSpaceSize < neededSpace) { + ZSTD_free(zc->workSpace, zc->customMem); + zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); + if (zc->workSpace == NULL) return ERROR(memory_allocation); + zc->workSpaceSize = neededSpace; + } } + + if (reset) memset(zc->workSpace, 0, tableSpace ); /* reset only tables */ + XXH64_reset(&zc->xxhState, 0); + zc->hashLog3 = hashLog3; + zc->hashTable = (U32*)(zc->workSpace); + zc->chainTable = zc->hashTable + hSize; + zc->hashTable3 = zc->chainTable + chainSize; + ptr = zc->hashTable3 + h3Size; + zc->hufTable = (HUF_CElt*)ptr; + zc->flagStaticTables = 0; + ptr = ((U32*)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */ + + zc->nextToUpdate = 1; + zc->nextSrc = NULL; + zc->base = NULL; + zc->dictBase = NULL; + zc->dictLimit = 0; + zc->lowLimit = 0; + zc->params = params; + zc->blockSize = blockSize; + zc->frameContentSize = frameContentSize; + { int i; for (i=0; irep[i] = repStartValue[i]; } + + if (params.cParams.strategy == ZSTD_btopt) { + zc->seqStore.litFreq = (U32*)ptr; + zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); + zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); + ptr = zc->seqStore.offCodeFreq + (MaxOff+1); + zc->seqStore.matchTable = (ZSTD_match_t*)ptr; + ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1; + zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr; + ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1; + zc->seqStore.litLengthSum = 0; + } + zc->seqStore.sequencesStart = (seqDef*)ptr; + ptr = zc->seqStore.sequencesStart + maxNbSeq; + zc->seqStore.llCode = (BYTE*) ptr; + zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; + zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; + zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; + + zc->stage = ZSTDcs_init; + zc->dictID = 0; + zc->loadedDictEnd = 0; + + return 0; +} + + +/*! ZSTD_copyCCtx() : +* Duplicate an existing context `srcCCtx` into another one `dstCCtx`. +* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx) +{ + if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong); + + memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, srcCCtx->frameContentSize, 0); + dstCCtx->params.fParams.contentSizeFlag = 0; /* content size different from the one set during srcCCtx init */ + + /* copy tables */ + { size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog); + size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog; + size_t const h3Size = (size_t)1 << srcCCtx->hashLog3; + size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); + memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); + } + + /* copy dictionary offsets */ + dstCCtx->nextToUpdate = srcCCtx->nextToUpdate; + dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3; + dstCCtx->nextSrc = srcCCtx->nextSrc; + dstCCtx->base = srcCCtx->base; + dstCCtx->dictBase = srcCCtx->dictBase; + dstCCtx->dictLimit = srcCCtx->dictLimit; + dstCCtx->lowLimit = srcCCtx->lowLimit; + dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd; + dstCCtx->dictID = srcCCtx->dictID; + + /* copy entropy tables */ + dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; + if (srcCCtx->flagStaticTables) { + memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4); + memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable)); + memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable)); + memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable)); + } + + return 0; +} + + +/*! ZSTD_reduceTable() : +* reduce table indexes by `reducerValue` */ +static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue) +{ + U32 u; + for (u=0 ; u < size ; u++) { + if (table[u] < reducerValue) table[u] = 0; + else table[u] -= reducerValue; + } +} + +/*! ZSTD_reduceIndex() : +* rescale all indexes to avoid future overflow (indexes are U32) */ +static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) +{ + { U32 const hSize = 1 << zc->params.cParams.hashLog; + ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); } + + { U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog); + ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); } + + { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; + ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); } +} + + +/*-******************************************************* +* Block entropic compression +*********************************************************/ + +/* See zstd_compression_format.md for detailed format description */ + +size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw); + return ZSTD_blockHeaderSize+srcSize; +} + + +static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + default: /*note : should not be necessary : flSize is within {1,2,3} */ + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + } + + memcpy(ostart + flSize, src, srcSize); + return srcSize + flSize; +} + +static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */ + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + } + + ostart[flSize] = *(const BYTE*)src; + return flSize+1; +} + + +static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } + +static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t const minGain = ZSTD_minGain(srcSize); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + + + /* small ? don't even attempt compression (speed opt) */ +# define LITERAL_NOENTROPY 63 + { size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ + if (zc->flagStaticTables && (lhSize==3)) { + hType = set_repeat; + singleStream = 1; + cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable); + } else { + cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11) + : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11); + } + + if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + if (cLitSize==1) + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + default: /* should not be necessary, lhSize is only {3,4,5} */ + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + } + return lhSize+cLitSize; +} + +static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24 }; + +static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + + +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +{ + BYTE const LL_deltaCode = 19; + BYTE const ML_deltaCode = 36; + const seqDef* const sequences = seqStorePtr->sequencesStart; + BYTE* const llCodeTable = seqStorePtr->llCode; + BYTE* const ofCodeTable = seqStorePtr->ofCode; + BYTE* const mlCodeTable = seqStorePtr->mlCode; + U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + U32 u; + for (u=0; u 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv]; + ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset); + mlCodeTable[u] = (mlv>127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv]; + } + if (seqStorePtr->longLengthID==1) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + if (seqStorePtr->longLengthID==2) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; +} + + +size_t ZSTD_compressSequences(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + size_t srcSize) +{ + const seqStore_t* seqStorePtr = &(zc->seqStore); + U32 count[MaxSeq+1]; + S16 norm[MaxSeq+1]; + FSE_CTable* CTable_LitLength = zc->litlengthCTable; + FSE_CTable* CTable_OffsetBits = zc->offcodeCTable; + FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* seqHead; + + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const litSize = seqStorePtr->lit - literals; + size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize); + if (ZSTD_isError(cSize)) return cSize; + op += cSize; + } + + /* Sequences Header */ + if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); + if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) goto _check_compressibility; + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + +#define MIN_SEQ_FOR_DYNAMIC_FSE 64 +#define MAX_SEQ_FOR_STATIC_FSE 1000 + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + + /* CTable for Literal Lengths */ + { U32 max = MaxLL; + size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = llCodeTable[0]; + FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); + LLtype = set_rle; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + LLtype = set_repeat; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { + FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); + LLtype = set_basic; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); + if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable(CTable_LitLength, norm, max, tableLog); + LLtype = set_compressed; + } } + + /* CTable for Offsets */ + { U32 max = MaxOff; + size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = ofCodeTable[0]; + FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); + Offtype = set_rle; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + Offtype = set_repeat; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) { + FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog); + Offtype = set_basic; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); + if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); + Offtype = set_compressed; + } } + + /* CTable for MatchLengths */ + { U32 max = MaxML; + size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *op++ = *mlCodeTable; + FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); + MLtype = set_rle; + } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + MLtype = set_repeat; + } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { + FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog); + MLtype = set_basic; + } else { + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); + if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } + FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); + { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return ERROR(GENERIC); + op += NCountSize; } + FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); + MLtype = set_compressed; + } } + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + zc->flagStaticTables = 0; + + /* Encoding Sequences */ + { BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + { size_t const errorCode = BIT_initCStream(&blockStream, op, oend-op); + if (ERR_isError(errorCode)) return ERROR(dstSize_tooSmall); } /* not enough space remaining */ + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + BIT_flushBits(&blockStream); /* (7)*/ + } } + + FSE_flushCState(&blockStream, &stateMatchLength); + FSE_flushCState(&blockStream, &stateOffsetBits); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + op += streamSize; + } } + + /* check compressibility */ +_check_compressibility: + { size_t const minGain = ZSTD_minGain(srcSize); + size_t const maxCSize = srcSize - minGain; + if ((size_t)(op-ostart) >= maxCSize) return 0; } + + /* confirm repcodes */ + { int i; for (i=0; irep[i] = zc->savedRep[i]; } + + return op - ostart; +} + + +/*! ZSTD_storeSeq() : + Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. + `offsetCode` : distance to match, or 0 == repCode. + `matchCode` : matchLength - MINMATCH +*/ +MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode) +{ +#if 0 /* for debug */ + static const BYTE* g_start = NULL; + const U32 pos = (U32)(literals - g_start); + if (g_start==NULL) g_start = literals; + //if ((pos > 1) && (pos < 50000)) + printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", + pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); +#endif + /* copy Literals */ + ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offset = offsetCode + 1; + + /* match Length */ + if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); } + seqStorePtr->sequences[0].matchLength = (U16)matchCode; + + seqStorePtr->sequences++; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (register size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } + if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } + +static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} + + +/*-************************************* +* Fast Scan +***************************************/ +static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) +{ + U32* const hashTable = zc->hashTable; + U32 const hBits = zc->params.cParams.hashLog; + const BYTE* const base = zc->base; + const BYTE* ip = base + zc->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const size_t fastHashFillStep = 3; + + while(ip <= iend) { + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); + ip += fastHashFillStep; + } +} + + +FORCE_INLINE +void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* const hashTable = cctx->hashTable; + U32 const hBits = cctx->params.cParams.hashLog; + seqStore_t* seqStorePtr = &(cctx->seqStore); + const BYTE* const base = cctx->base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = cctx->dictLimit; + const BYTE* const lowest = base + lowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1]; + U32 offsetSaved = 0; + + /* init */ + ip += (ip==lowest); + { U32 const maxRep = (U32)(ip-lowest); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h = ZSTD_hashPtr(ip, hBits, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + hashTable[h] = current; /* update hash table */ + + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + U32 offset; + if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip-match); + while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* save reps for next block */ + cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved; + cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + const U32 mls = ctx->params.cParams.searchLength; + switch(mls) + { + default: + case 4 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; + } +} + + +static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* hashTable = ctx->hashTable; + const U32 hBits = ctx->params.cParams.hashLog; + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = ctx->lowLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hBits, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* repMatch = repBase + repIndex; + size_t mLength; + hashTable[h] = current; /* update hash table */ + + if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ( (matchIndex < lowestIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; + U32 offset; + mLength = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset = current - matchIndex; + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } } + + /* found a match : store it */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; + hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; + size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + U32 const mls = ctx->params.cParams.searchLength; + switch(mls) + { + default: + case 4 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; + } +} + + +/*-************************************* +* Double Fast +***************************************/ +static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls) +{ + U32* const hashLarge = cctx->hashTable; + U32 const hBitsL = cctx->params.cParams.hashLog; + U32* const hashSmall = cctx->chainTable; + U32 const hBitsS = cctx->params.cParams.chainLog; + const BYTE* const base = cctx->base; + const BYTE* ip = base + cctx->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const size_t fastHashFillStep = 3; + + while(ip <= iend) { + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base); + hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base); + ip += fastHashFillStep; + } +} + + +FORCE_INLINE +void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* const hashLong = cctx->hashTable; + const U32 hBitsL = cctx->params.cParams.hashLog; + U32* const hashSmall = cctx->chainTable; + const U32 hBitsS = cctx->params.cParams.chainLog; + seqStore_t* seqStorePtr = &(cctx->seqStore); + const BYTE* const base = cctx->base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = cctx->dictLimit; + const BYTE* const lowest = base + lowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1]; + U32 offsetSaved = 0; + + /* init */ + ip += (ip==lowest); + { U32 const maxRep = (U32)(ip-lowest); + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + U32 const current = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 const matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + hashLong[h2] = hashSmall[h] = current; /* update hash tables */ + + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */ + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + U32 offset; + if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* match3 = base + matchIndex3; + hashLong[h3] = current + 1; + if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + mLength = ZSTD_count(ip+9, match3+8, iend) + 8; + ip++; + offset = (U32)(ip-match3); + while (((ip>anchor) & (match3>lowest)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip-match); + while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + } else { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } + + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = + hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* save reps for next block */ + cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved; + cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + const U32 mls = ctx->params.cParams.searchLength; + switch(mls) + { + default: + case 4 : + ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return; + } +} + + +static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 mls) +{ + U32* const hashLong = ctx->hashTable; + U32 const hBitsL = ctx->params.cParams.hashLog; + U32* const hashSmall = ctx->chainTable; + U32 const hBitsS = ctx->params.cParams.chainLog; + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 lowestIndex = ctx->lowLimit; + const BYTE* const dictStart = dictBase + lowestIndex; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const lowPrefixPtr = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; + + const U32 current = (U32)(ip-base); + const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ + const BYTE* repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ + + if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4; + ip++; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH); + } else { + if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8; + offset = current - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = current + 1; + if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8; + ip++; + offset = current+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend; + const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4; + offset = current - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else { + ip += ((ip-anchor) >> g_searchStrength) + 1; + continue; + } } + + /* found a match : store it */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; + hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; + hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + U32 const mls = ctx->params.cParams.searchLength; + switch(mls) + { + default: + case 4 : + ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return; + case 5 : + ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return; + case 6 : + ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return; + case 7 : + ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return; + } +} + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. +* ip : assumed <= iend-8 . +* @return : nb of positions added */ +static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares, + U32 extDict) +{ + U32* const hashTable = zc->hashTable; + U32 const hashLog = zc->params.cParams.hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = zc->chainTable; + U32 const btLog = zc->params.cParams.chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match = base + matchIndex; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = zc->lowLimit; + U32 matchEndIdx = current+8; + size_t bestLength = 8; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ + + if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + if (matchEndIdx > current + 8) return matchEndIdx - current - 8; + return 1; +} + + +static size_t ZSTD_insertBtAndFindBestMatch ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 nbCompares, const U32 mls, + U32 extDict) +{ + U32* const hashTable = zc->hashTable; + U32 const hashLog = zc->params.cParams.hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = zc->chainTable; + U32 const btLog = zc->params.cParams.chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const U32 current = (U32)(ip-base); + const U32 btLow = btMask >= current ? 0 : current - btMask; + const U32 windowLow = zc->lowLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1; + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; + if (ip+matchLength == iend) /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + return bestLength; +} + + +static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while(idx < target) + idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0); +} + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + +static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls) +{ + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1); +} + + +/** Tree updater, providing best match */ +static size_t ZSTD_BtFindBestMatch_extDict ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); +} + + +static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); + case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); + case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); + } +} + + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (ie. not within extDict) */ +FORCE_INLINE +U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) +{ + U32* const hashTable = zc->hashTable; + const U32 hashLog = zc->params.cParams.hashLog; + U32* const chainTable = zc->chainTable; + const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1; + const BYTE* const base = zc->base; + const U32 target = (U32)(ip - base); + U32 idx = zc->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + zc->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + + + +FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */ +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 mls, const U32 extDict) +{ + U32* const chainTable = zc->chainTable; + const U32 chainSize = (1 << zc->params.cParams.chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = zc->base; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 lowLimit = zc->lowLimit; + const U32 current = (U32)(ip-base); + const U32 minChain = current > chainSize ? current - chainSize : 0; + int nbAttempts=maxNbAttempts; + size_t ml=EQUAL_READ32-1; + + /* HC4 match finder */ + U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls); + + for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { + const BYTE* match; + size_t currentMl=0; + if ((!extDict) || matchIndex >= dictLimit) { + match = base + matchIndex; + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32; + } + + /* save best solution */ + if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + return ml; +} + + +FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); + } +} + + +FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_CCtx* zc, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 maxNbAttempts, const U32 matchLengthSearch) +{ + switch(matchLengthSearch) + { + default : + case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); + case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); + case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +FORCE_INLINE +void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base + ctx->dictLimit; + + U32 const maxSearches = 1 << ctx->params.cParams.searchLog; + U32 const mls = ctx->params.cParams.searchLength; + + typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; + U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0; + + /* init */ + ip += (ip==base); + ctx->nextToUpdate3 = ctx->nextToUpdate; + { U32 const maxRep = (U32)(ip-base); + if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; + if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; + } + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) { + /* repcode : we take it */ + matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < EQUAL_READ32) { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32; + int const gain2 = (int)(ml2 * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = ml2, offset = 0, start = ip; + } + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE])) /* only search for offset within prefix */ + { start--; matchLength++; } + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while ( (ip <= ilimit) + && ((offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } + + /* Save reps for next block */ + ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset; + ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); +} + +static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); +} + +static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); +} + +static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); +} + + +FORCE_INLINE +void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize, + const U32 searchMethod, const U32 depth) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const U32 dictLimit = ctx->dictLimit; + const U32 lowestIndex = ctx->lowLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ctx->lowLimit; + + const U32 maxSearches = 1 << ctx->params.cParams.searchLog; + const U32 mls = ctx->params.cParams.searchLength; + + typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit, + size_t* offsetPtr, + U32 maxNbAttempts, U32 matchLengthSearch); + searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; + + U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1]; + + /* init */ + ctx->nextToUpdate3 = ctx->nextToUpdate; + ip += (ip == prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 current = (U32)(ip-base); + + /* check repCode */ + { const U32 repIndex = (U32)(current+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < EQUAL_READ32) { + ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + int gain2 = (int)(repLength * 4); + int gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=99999999; + size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repIndex = (U32)((ip-base) - offset_2); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Save reps for next block */ + ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2; + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); +} + +static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); +} + +static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); +} + +static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ + ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); +} + + +/* The optimal parser */ +#include "zstd_opt.h" + +static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +#ifdef ZSTD_OPT_H_91842398743 + ZSTD_compressBlock_opt_generic(ctx, src, srcSize); +#else + (void)ctx; (void)src; (void)srcSize; + return; +#endif +} + +static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +#ifdef ZSTD_OPT_H_91842398743 + ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize); +#else + (void)ctx; (void)src; (void)srcSize; + return; +#endif +} + + +typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize); + +static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) +{ + static const ZSTD_blockCompressor blockCompressor[2][7] = { + { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt }, + { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict } + }; + + return blockCompressor[extDict][(U32)strat]; +} + + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); + const BYTE* const base = zc->base; + const BYTE* const istart = (const BYTE*)src; + const U32 current = (U32)(istart-base); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */ + ZSTD_resetSeqStore(&(zc->seqStore)); + if (current > zc->nextToUpdate + 384) + zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* update tree not updated after finding very long rep matches */ + blockCompressor(zc, src, srcSize); + return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize); +} + + +/*! ZSTD_compress_generic() : +* Compress a chunk of data into one or multiple blocks. +* All blocks will be terminated, all input will be consumed. +* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. +* Frame is supposed already started (header already produced) +* @return : compressed size, or an error code +*/ +static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastFrameChunk) +{ + size_t blockSize = cctx->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + U32 const maxDist = 1 << cctx->params.cParams.windowLog; + + if (cctx->params.fParams.checksumFlag) + XXH64_update(&cctx->xxhState, src, srcSize); + + while (remaining) { + U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); + size_t cSize; + + if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ + if (remaining < blockSize) blockSize = remaining; + + /* preemptive overflow correction */ + if (cctx->lowLimit > (1<<30)) { + U32 const btplus = (cctx->params.cParams.strategy == ZSTD_btlazy2) | (cctx->params.cParams.strategy == ZSTD_btopt); + U32 const chainMask = (1 << (cctx->params.cParams.chainLog - btplus)) - 1; + U32 const supLog = MAX(cctx->params.cParams.chainLog, 17 /* blockSize */); + U32 const newLowLimit = (cctx->lowLimit & chainMask) + (1 << supLog); /* preserve position % chainSize, ensure current-repcode doesn't underflow */ + U32 const correction = cctx->lowLimit - newLowLimit; + ZSTD_reduceIndex(cctx, correction); + cctx->base += correction; + cctx->dictBase += correction; + cctx->lowLimit = newLowLimit; + cctx->dictLimit -= correction; + if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0; + else cctx->nextToUpdate -= correction; + } + + if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) { + /* enforce maxDist */ + U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist; + if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit; + if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit; + } + + cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize); + if (ZSTD_isError(cSize)) return cSize; + + if (cSize == 0) { /* block is not compressible */ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3); + if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */ + memcpy(op + ZSTD_blockHeaderSize, ip, blockSize); + cSize = ZSTD_blockHeaderSize+blockSize; + } else { + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader24); + cSize += ZSTD_blockHeaderSize; + } + + remaining -= blockSize; + dstCapacity -= cSize; + ip += blockSize; + op += cSize; + } + + if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; + return op-ostart; +} + + +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID) +{ BYTE* const op = (BYTE*)dst; + U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const checksumFlag = params.fParams.checksumFlag>0; + U32 const windowSize = 1U << params.cParams.windowLog; + U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize > (pledgedSrcSize-1)); + BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params.fParams.contentSizeFlag ? + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : /* 0-3 */ + 0; + BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); + size_t pos; + + if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall); + + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + op[4] = frameHeaderDecriptionByte; pos=5; + if (!singleSegment) op[pos++] = windowLogByte; + switch(dictIDSizeCode) + { + default: /* impossible */ + case 0 : break; + case 1 : op[pos] = (BYTE)(dictID); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; + case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; + } + switch(fcsCode) + { + default: /* impossible */ + case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; + case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; + case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; + case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; + } + return pos; +} + + +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame, U32 lastFrameChunk) +{ + const BYTE* const ip = (const BYTE*) src; + size_t fhSize = 0; + + if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ + + if (frame && (cctx->stage==ZSTDcs_init)) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID); + if (ZSTD_isError(fhSize)) return fhSize; + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + /* Check if blocks follow each other */ + if (src != cctx->nextSrc) { + /* not contiguous */ + ptrdiff_t const delta = cctx->nextSrc - ip; + cctx->lowLimit = cctx->dictLimit; + cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base); + cctx->dictBase = cctx->base; + cctx->base -= delta; + cctx->nextToUpdate = cctx->dictLimit; + if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */ + } + + /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ + if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) { + ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase; + U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx; + cctx->lowLimit = lowLimitMax; + } + + cctx->nextSrc = ip + srcSize; + + { size_t const cSize = frame ? + ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); + if (ZSTD_isError(cSize)) return cSize; + return cSize + fhSize; + } +} + + +size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0); +} + + +size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx) +{ + return MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog); +} + +size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx); + if (srcSize > blockSizeMax) return ERROR(srcSize_wrong); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0); +} + + +static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize) +{ + const BYTE* const ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + /* input becomes current prefix */ + zc->lowLimit = zc->dictLimit; + zc->dictLimit = (U32)(zc->nextSrc - zc->base); + zc->dictBase = zc->base; + zc->base += ip - zc->nextSrc; + zc->nextToUpdate = zc->dictLimit; + zc->loadedDictEnd = (U32)(iend - zc->base); + + zc->nextSrc = iend; + if (srcSize <= HASH_READ_SIZE) return 0; + + switch(zc->params.cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength); + break; + + case ZSTD_dfast: + ZSTD_fillDoubleHashTable (zc, iend, zc->params.cParams.searchLength); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + ZSTD_insertAndFindFirstIndex (zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength); + break; + + case ZSTD_btlazy2: + case ZSTD_btopt: + ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength); + break; + + default: + return ERROR(GENERIC); /* strategy doesn't exist; impossible */ + } + + zc->nextToUpdate = zc->loadedDictEnd; + return 0; +} + + +/* Dictionary format : + Magic == ZSTD_DICT_MAGIC (4 bytes) + HUF_writeCTable(256) + FSE_writeNCount(off) + FSE_writeNCount(ml) + FSE_writeNCount(ll) + RepOffsets + Dictionary content +*/ +/*! ZSTD_loadDictEntropyStats() : + @return : size read from dictionary + note : magic number supposed already checked */ +static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + + { size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize); + if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); + dictPtr += hufHeaderSize; + } + + { short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + { size_t const errorCode = FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + { size_t const errorCode = FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + { size_t const errorCode = FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } + dictPtr += litlengthHeaderSize; + } + + if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); + cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted); + cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted); + cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); + dictPtr += 12; + + cctx->flagStaticTables = 1; + return dictPtr - (const BYTE*)dict; +} + +/** ZSTD_compress_insertDictionary() : +* @return : 0, or an error code */ +static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize) +{ + if ((dict==NULL) || (dictSize<=8)) return 0; + + /* default : dict is pure content */ + if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize); + zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4); + + /* known magic number : dict is parsed for entropy stats and content */ + { size_t const eSize_8 = ZSTD_loadDictEntropyStats(zc, (const char*)dict+8 /* skip dictHeader */, dictSize-8); + size_t const eSize = eSize_8 + 8; + if (ZSTD_isError(eSize_8)) return eSize_8; + return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize); + } +} + + +/*! ZSTD_compressBegin_internal() : +* @return : 0, or an error code */ +static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc, + const void* dict, size_t dictSize, + ZSTD_parameters params, U64 pledgedSrcSize) +{ + size_t const resetError = ZSTD_resetCCtx_advanced(zc, params, pledgedSrcSize, 1); + if (ZSTD_isError(resetError)) return resetError; + + return ZSTD_compress_insertDictionary(zc, dict, dictSize); +} + + +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + /* compression parameters verification and optimization */ + { size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize); + if (ZSTD_isError(errorCode)) return errorCode; } + + return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize); +} + + +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0); +} + + +size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel) +{ + return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel); +} + + +/*! ZSTD_writeEpilogue() : +* Ends a frame. +* @return : nb of bytes written into dst (or an error code) */ +static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + size_t fhSize = 0; + + if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */ + + /* special case : empty frame */ + if (cctx->stage == ZSTDcs_init) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0); + if (ZSTD_isError(fhSize)) return fhSize; + dstCapacity -= fhSize; + op += fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (cctx->stage != ZSTDcs_ending) { + /* write one last empty block, make it the "last" block */ + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; + if (dstCapacity<4) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + } + + if (cctx->params.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + if (dstCapacity<4) return ERROR(dstSize_tooSmall); + MEM_writeLE32(op, checksum); + op += 4; + } + + cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ + return op-ostart; +} + + +size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t endResult; + size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1); + if (ZSTD_isError(cSize)) return cSize; + endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); + if (ZSTD_isError(endResult)) return endResult; + return cSize + endResult; +} + + +static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + size_t const errorCode = ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize); + if(ZSTD_isError(errorCode)) return errorCode; + + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, srcSize); + if (ZSTD_isError(errorCode)) return errorCode; + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dictSize); + params.fParams.contentSizeFlag = 1; + return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); +} + +size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +{ + return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); +} + +size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel) +{ + size_t result; + ZSTD_CCtx ctxBody; + memset(&ctxBody, 0, sizeof(ctxBody)); + memcpy(&ctxBody.customMem, &defaultCustomMem, sizeof(ZSTD_customMem)); + result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_free(ctxBody.workSpace, defaultCustomMem); /* can't free ctxBody itself, as it's on stack; free only heap content */ + return result; +} + + +/* ===== Dictionary API ===== */ + +struct ZSTD_CDict_s { + void* dictContent; + size_t dictContentSize; + ZSTD_CCtx* refContext; +}; /* typedef'd tp ZSTD_CDict within "zstd.h" */ + +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem) +{ + if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; + if (!customMem.customAlloc || !customMem.customFree) return NULL; + + { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem); + void* const dictContent = ZSTD_malloc(dictSize, customMem); + ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem); + + if (!dictContent || !cdict || !cctx) { + ZSTD_free(dictContent, customMem); + ZSTD_free(cdict, customMem); + ZSTD_free(cctx, customMem); + return NULL; + } + + memcpy(dictContent, dict, dictSize); + { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0); + if (ZSTD_isError(errorCode)) { + ZSTD_free(dictContent, customMem); + ZSTD_free(cdict, customMem); + ZSTD_free(cctx, customMem); + return NULL; + } } + + cdict->dictContent = dictContent; + cdict->dictContentSize = dictSize; + cdict->refContext = cctx; + return cdict; + } +} + +ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize); + params.fParams.contentSizeFlag = 1; + return ZSTD_createCDict_advanced(dict, dictSize, params, allocator); +} + +size_t ZSTD_freeCDict(ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem cMem = cdict->refContext->customMem; + ZSTD_freeCCtx(cdict->refContext); + ZSTD_free(cdict->dictContent, cMem); + ZSTD_free(cdict, cMem); + return 0; + } +} + +/*! ZSTD_compress_usingCDict() : +* Compression using a digested Dictionary. +* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. +* Note that compression level is decided during dictionary creation */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict) +{ + size_t const errorCode = ZSTD_copyCCtx(cctx, cdict->refContext); + if (ZSTD_isError(errorCode)) return errorCode; + + if (cdict->refContext->params.fParams.contentSizeFlag==1) { + cctx->params.fParams.contentSizeFlag = 1; + cctx->frameContentSize = srcSize; + } + + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + + + +/* ****************************************************************** +* Streaming +********************************************************************/ + +typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage; + +struct ZSTD_CStream_s { + ZSTD_CCtx* zc; + char* inBuff; + size_t inBuffSize; + size_t inToCompress; + size_t inBuffPos; + size_t inBuffTarget; + size_t blockSize; + char* outBuff; + size_t outBuffSize; + size_t outBuffContentSize; + size_t outBuffFlushedSize; + ZSTD_cStreamStage stage; + U32 checksum; + U32 frameEnded; + ZSTD_customMem customMem; +}; /* typedef'd to ZSTD_CStream within "zstd.h" */ + +ZSTD_CStream* ZSTD_createCStream(void) +{ + return ZSTD_createCStream_advanced(defaultCustomMem); +} + +ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +{ + ZSTD_CStream* zcs; + + if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; + if (!customMem.customAlloc || !customMem.customFree) return NULL; + + zcs = (ZSTD_CStream*)ZSTD_malloc(sizeof(ZSTD_CStream), customMem); + if (zcs==NULL) return NULL; + memset(zcs, 0, sizeof(ZSTD_CStream)); + memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem)); + zcs->zc = ZSTD_createCCtx_advanced(customMem); + if (zcs->zc == NULL) { ZSTD_freeCStream(zcs); return NULL; } + return zcs; +} + +size_t ZSTD_freeCStream(ZSTD_CStream* zcs) +{ + if (zcs==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = zcs->customMem; + ZSTD_freeCCtx(zcs->zc); + ZSTD_free(zcs->inBuff, cMem); + ZSTD_free(zcs->outBuff, cMem); + ZSTD_free(zcs, cMem); + return 0; + } +} + + +/*====== Initialization ======*/ + +size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } +size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; } + +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + /* allocate buffers */ + { size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog; + if (zcs->inBuffSize < neededInBuffSize) { + zcs->inBuffSize = neededInBuffSize; + ZSTD_free(zcs->inBuff, zcs->customMem); /* should not be necessary */ + zcs->inBuff = (char*) ZSTD_malloc(neededInBuffSize, zcs->customMem); + if (zcs->inBuff == NULL) return ERROR(memory_allocation); + } + zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize); + } + if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize)+1) { + zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize)+1; + ZSTD_free(zcs->outBuff, zcs->customMem); /* should not be necessary */ + zcs->outBuff = (char*) ZSTD_malloc(zcs->outBuffSize, zcs->customMem); + if (zcs->outBuff == NULL) return ERROR(memory_allocation); + } + + { size_t const errorCode = ZSTD_compressBegin_advanced(zcs->zc, dict, dictSize, params, pledgedSrcSize); + if (ZSTD_isError(errorCode)) return errorCode; } + + zcs->inToCompress = 0; + zcs->inBuffPos = 0; + zcs->inBuffTarget = zcs->blockSize; + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + zcs->stage = zcss_load; + zcs->checksum = params.fParams.checksumFlag > 0; + zcs->frameEnded = 0; + return 0; /* ready to go */ +} + +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); + return ZSTD_initCStream_advanced(zcs, dict, dictSize, params, 0); +} + +size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) +{ + return ZSTD_initCStream_usingDict(zcs, NULL, 0, compressionLevel); +} + +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) +{ + return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->zc) + zcs->outBuffSize + zcs->inBuffSize; +} + +/*====== Compression ======*/ + +typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e; + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + memcpy(dst, src, length); + return length; +} + +static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr, + ZSTD_flush_e const flush) +{ + U32 someMoreWork = 1; + const char* const istart = (const char*)src; + const char* const iend = istart + *srcSizePtr; + const char* ip = istart; + char* const ostart = (char*)dst; + char* const oend = ostart + *dstCapacityPtr; + char* op = ostart; + + while (someMoreWork) { + switch(zcs->stage) + { + case zcss_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */ + + case zcss_load: + /* complete inBuffer */ + { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; + size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip); + zcs->inBuffPos += loaded; + ip += loaded; + if ( (zcs->inBuffPos==zcs->inToCompress) || (!flush && (toLoad != loaded)) ) { + someMoreWork = 0; break; /* not enough input to get a full block : stop there, wait for more */ + } } + /* compress current block (note : this stage cannot be stopped in the middle) */ + { void* cDst; + size_t cSize; + size_t const iSize = zcs->inBuffPos - zcs->inToCompress; + size_t oSize = oend-op; + if (oSize >= ZSTD_compressBound(iSize)) + cDst = op; /* compress directly into output buffer (avoid flush stage) */ + else + cDst = zcs->outBuff, oSize = zcs->outBuffSize; + cSize = (flush == zsf_end) ? + ZSTD_compressEnd(zcs->zc, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue(zcs->zc, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize); + if (ZSTD_isError(cSize)) return cSize; + if (flush == zsf_end) zcs->frameEnded = 1; + /* prepare next block */ + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + if (zcs->inBuffTarget > zcs->inBuffSize) + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */ + zcs->inToCompress = zcs->inBuffPos; + if (cDst == op) { op += cSize; break; } /* no need to flush */ + zcs->outBuffContentSize = cSize; + zcs->outBuffFlushedSize = 0; + zcs->stage = zcss_flush; /* pass-through to flush stage */ + } + + case zcss_flush: + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + op += flushed; + zcs->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { someMoreWork = 0; break; } /* dst too small to store flushed data : stop there */ + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + zcs->stage = zcss_load; + break; + } + + case zcss_final: + someMoreWork = 0; /* do nothing */ + break; + + default: + return ERROR(GENERIC); /* impossible */ + } + } + + *srcSizePtr = ip - istart; + *dstCapacityPtr = op - ostart; + if (zcs->frameEnded) return 0; + { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos; + if (hintInSize==0) hintInSize = zcs->blockSize; + return hintInSize; + } +} + +size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + size_t sizeRead = input->size - input->pos; + size_t sizeWritten = output->size - output->pos; + size_t const result = ZSTD_compressStream_generic(zcs, + (char*)(output->dst) + output->pos, &sizeWritten, + (const char*)(input->src) + input->pos, &sizeRead, zsf_gather); + input->pos += sizeRead; + output->pos += sizeWritten; + return result; +} + + +/*====== Finalize ======*/ + +/*! ZSTD_flushStream() : +* @return : amount of data remaining to flush */ +size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + size_t srcSize = 0; + size_t sizeWritten = output->size - output->pos; + size_t const result = ZSTD_compressStream_generic(zcs, + (char*)(output->dst) + output->pos, &sizeWritten, + &srcSize, &srcSize, /* use a valid src address instead of NULL */ + zsf_flush); + output->pos += sizeWritten; + if (ZSTD_isError(result)) return result; + return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */ +} + + +size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + BYTE* const ostart = (BYTE*)(output->dst) + output->pos; + BYTE* const oend = (BYTE*)(output->dst) + output->size; + BYTE* op = ostart; + + if (zcs->stage != zcss_final) { + /* flush whatever remains */ + size_t srcSize = 0; + size_t sizeWritten = output->size - output->pos; + size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end); /* use a valid src address instead of NULL */ + size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + op += sizeWritten; + if (remainingToFlush) { + output->pos += sizeWritten; + return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4); + } + /* create epilogue */ + zcs->stage = zcss_final; + zcs->outBuffContentSize = !notEnded ? 0 : + ZSTD_compressEnd(zcs->zc, zcs->outBuff, zcs->outBuffSize, NULL, 0); /* write epilogue, including final empty block, into outBuff */ + } + + /* flush epilogue */ + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + op += flushed; + zcs->outBuffFlushedSize += flushed; + output->pos += op-ostart; + if (toFlush==flushed) zcs->stage = zcss_init; /* end reached */ + return toFlush - flushed; + } +} + + + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_DEFAULT_CLEVEL 1 +#define ZSTD_MAX_CLEVEL 22 +int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" */ + /* W, C, H, S, L, TL, strat */ + { 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */ + { 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */ + { 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */ + { 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3.*/ + { 20, 18, 18, 1, 5, 16, ZSTD_dfast }, /* level 4.*/ + { 20, 15, 18, 3, 5, 16, ZSTD_greedy }, /* level 5 */ + { 21, 16, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */ + { 21, 17, 20, 3, 5, 16, ZSTD_lazy }, /* level 7 */ + { 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 20, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */ + { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */ + { 22, 21, 21, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */ + { 23, 22, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 16 */ + { 23, 21, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */ + { 23, 23, 22, 6, 5, 32, ZSTD_btopt }, /* level 18 */ + { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */ + { 25, 25, 23, 7, 3, 64, ZSTD_btopt }, /* level 20 */ + { 26, 26, 23, 7, 3,256, ZSTD_btopt }, /* level 21 */ + { 27, 27, 25, 9, 3,512, ZSTD_btopt }, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */ + { 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */ + { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */ + { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */ + { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/ + { 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/ + { 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/ + { 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */ + { 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/ + { 18, 18, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 12.*/ + { 18, 19, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13 */ + { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/ + { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/ + { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/ + { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/ + { 18, 19, 18, 11, 3,512, ZSTD_btopt }, /* level 20.*/ + { 18, 19, 18, 12, 3,512, ZSTD_btopt }, /* level 21.*/ + { 18, 19, 18, 13, 3,512, ZSTD_btopt }, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */ + { 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */ + { 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */ + { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */ + { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */ + { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */ + { 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */ + { 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */ + { 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */ + { 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/ + { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/ + { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/ + { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/ + { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/ + { 17, 18, 17, 9, 3,256, ZSTD_btopt }, /* level 20.*/ + { 17, 18, 17, 10, 3,256, ZSTD_btopt }, /* level 21.*/ + { 17, 18, 17, 11, 3,512, ZSTD_btopt }, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */ + { 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */ + { 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */ + { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/ + { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/ + { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/ + { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/ + { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/ + { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/ + { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btopt }, /* level 20.*/ + { 14, 15, 15, 9, 3,256, ZSTD_btopt }, /* level 21.*/ + { 14, 15, 15, 10, 3,256, ZSTD_btopt }, /* level 22.*/ +}, +}; + +/*! ZSTD_getCParams() : +* @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`. +* Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) +{ + ZSTD_compressionParameters cp; + size_t const addedSize = srcSize ? 0 : 500; + U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1; + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ + if (compressionLevel <= 0) compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */ + if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL; + cp = ZSTD_defaultCParameters[tableID][compressionLevel]; + if (MEM_32bits()) { /* auto-correction, for 32-bits mode */ + if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX; + if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX; + if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX; + } + cp = ZSTD_adjustCParams(cp, srcSize, dictSize); + return cp; +} + +/*! ZSTD_getParams() : +* same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`). +* All fields of `ZSTD_frameParameters` are set to default (0) */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) { + ZSTD_parameters params; + ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize); + memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + return params; +} diff --git a/contrib/zstd/zstd_decompress.c b/contrib/zstd/zstd_decompress.c new file mode 100644 index 0000000..fb1ee35 --- /dev/null +++ b/contrib/zstd/zstd_decompress.c @@ -0,0 +1,1581 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() will allocate memory, + * in memory stack (0), or in memory heap (1, requires malloc()) + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif + +/*! +* LEGACY_SUPPORT : +* if set to 1, ZSTD_decompress() can decode older formats (v0.1+) +*/ +#ifndef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 0 +#endif + + +/*! +* MAXWINDOWSIZE_DEFAULT : +* maximum window size accepted by DStream, by default. +* Frames requiring more memory will be rejected. +*/ +#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT +# define ZSTD_MAXWINDOWSIZE_DEFAULT (257 << 20) /* 257 MB */ +#endif + + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include /* memcpy, memmove, memset */ +#include "mem.h" /* low level memory routines */ +#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#include "xxhash.h" /* XXH64_* */ +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include "zstd_internal.h" + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +# include "zstd_legacy.h" +#endif + + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + + +/*-************************************* +* Macros +***************************************/ +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } + + +/*-************************************************************* +* Context management +***************************************************************/ +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +struct ZSTD_DCtx_s +{ + FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; + FSE_DTable OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; + FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + const void* previousDstEnd; + const void* base; + const void* vBase; + const void* dictEnd; + size_t expected; + U32 rep[ZSTD_REP_NUM]; + ZSTD_frameParams fParams; + blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; + U32 dictID; + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litBufSize; + size_t litSize; + size_t rleSize; + BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) { return sizeof(*dctx); } + +size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } + +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + dctx->expected = ZSTD_frameHeaderSize_min; + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->previousDstEnd = NULL; + dctx->base = NULL; + dctx->vBase = NULL; + dctx->dictEnd = NULL; + dctx->hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + MEM_STATIC_ASSERT(sizeof(dctx->rep)==sizeof(repStartValue)); + memcpy(dctx->rep, repStartValue, sizeof(repStartValue)); + return 0; +} + +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_DCtx* dctx; + + if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; + if (!customMem.customAlloc || !customMem.customFree) return NULL; + + dctx = (ZSTD_DCtx*) ZSTD_malloc(sizeof(ZSTD_DCtx), customMem); + if (!dctx) return NULL; + memcpy(&dctx->customMem, &customMem, sizeof(customMem)); + ZSTD_decompressBegin(dctx); + return dctx; +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + return ZSTD_createDCtx_advanced(defaultCustomMem); +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support free on NULL */ + ZSTD_free(dctx, dctx->customMem); + return 0; /* reserved as a potential error code in the future */ +} + +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max; + memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */ +} + + +/*-************************************************************* +* Decompression section +***************************************************************/ + +/* See compression format details in : zstd_compression_format.md */ + +/** ZSTD_frameHeaderSize() : +* srcSize must be >= ZSTD_frameHeaderSize_min. +* @return : size of the Frame Header */ +static size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); + { BYTE const fhd = ((const BYTE*)src)[4]; + U32 const dictID= fhd & 3; + U32 const singleSegment = (fhd >> 5) & 1; + U32 const fcsId = fhd >> 6; + return ZSTD_frameHeaderSize_min + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); + } +} + + +/** ZSTD_getFrameParams() : +* decode Frame Header, or require larger `srcSize`. +* @return : 0, `fparamsPtr` is correctly filled, +* >0, `srcSize` is too small, result is expected `srcSize`, +* or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + + if (srcSize < ZSTD_frameHeaderSize_min) return ZSTD_frameHeaderSize_min; + if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) { + if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + if (srcSize < ZSTD_skippableHeaderSize) return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */ + memset(fparamsPtr, 0, sizeof(*fparamsPtr)); + fparamsPtr->frameContentSize = MEM_readLE32((const char *)src + 4); + fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */ + return 0; + } + return ERROR(prefix_unknown); + } + + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize); + if (srcSize < fhsize) return fhsize; } + + { BYTE const fhdByte = ip[4]; + size_t pos = 5; + U32 const dictIDSizeCode = fhdByte&3; + U32 const checksumFlag = (fhdByte>>2)&1; + U32 const singleSegment = (fhdByte>>5)&1; + U32 const fcsID = fhdByte>>6; + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; + U32 windowSize = 0; + U32 dictID = 0; + U64 frameContentSize = 0; + if ((fhdByte & 0x08) != 0) return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */ + if (!singleSegment) { + BYTE const wlByte = ip[pos++]; + U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + if (windowLog > ZSTD_WINDOWLOG_MAX) return ERROR(frameParameter_unsupported); + windowSize = (1U << windowLog); + windowSize += (windowSize >> 3) * (wlByte&7); + } + + switch(dictIDSizeCode) + { + default: /* impossible */ + case 0 : break; + case 1 : dictID = ip[pos]; pos++; break; + case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(fcsID) + { + default: /* impossible */ + case 0 : if (singleSegment) frameContentSize = ip[pos]; break; + case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 2 : frameContentSize = MEM_readLE32(ip+pos); break; + case 3 : frameContentSize = MEM_readLE64(ip+pos); break; + } + if (!windowSize) windowSize = (U32)frameContentSize; + if (windowSize > windowSizeMax) return ERROR(frameParameter_unsupported); + fparamsPtr->frameContentSize = frameContentSize; + fparamsPtr->windowSize = windowSize; + fparamsPtr->dictID = dictID; + fparamsPtr->checksumFlag = checksumFlag; + } + return 0; +} + + +/** ZSTD_getDecompressedSize() : +* compatible with legacy mode +* @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - decompressed size is not present within frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) + if (ZSTD_isLegacy(src, srcSize)) return ZSTD_getDecompressedSize_legacy(src, srcSize); +#endif + { ZSTD_frameParams fparams; + size_t const frResult = ZSTD_getFrameParams(&fparams, src, srcSize); + if (frResult!=0) return 0; + return fparams.frameContentSize; + } +} + + +/** ZSTD_decodeFrameHeader() : +* `srcSize` must be the size provided by ZSTD_frameHeaderSize(). +* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t srcSize) +{ + size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, srcSize); + if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong); + if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); + return result; +} + + +typedef struct +{ + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; + +/*! ZSTD_getcBlockSize() : +* Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) +{ + if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected); + return cSize; + } +} + + +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall); + memcpy(dst, src, srcSize); + return srcSize; +} + + +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, size_t regenSize) +{ + if (srcSize != 1) return ERROR(srcSize_wrong); + if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall); + memset(dst, *(const BYTE*)src, regenSize); + return regenSize; +} + +/*! ZSTD_decodeLiteralsBlock() : + @return : nb of bytes read from src (< srcSize ) */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + + switch(litEncType) + { + case set_repeat: + if (dctx->litEntropy==0) return ERROR(dictionary_corrupted); + /* fall-through */ + case set_compressed: + if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + { singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + } + case 2: + /* 2 - 2 - 14 - 14 */ + { lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + } + case 3: + /* 2 - 2 - 18 - 18 */ + { lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + (istart[4] << 10); + break; + } + } + if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected); + if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); + + if (HUF_isError((litEncType==set_repeat) ? + ( singleStream ? + HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable) : + HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTable) ) : + ( singleStream ? + HUF_decompress1X2_DCtx(dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize) : + HUF_decompress4X_hufOnly (dctx->hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize)) )) + return ERROR(corruption_detected); + + dctx->litPtr = dctx->litBuffer; + dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH; + dctx->litSize = litSize; + dctx->litEntropy = 1; + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + if (litSize+lhSize > srcSize) return ERROR(corruption_detected); + memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+8; + dctx->litSize = litSize; + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litBufSize = srcSize-lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ + break; + } + if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected); + memset(dctx->litBuffer, istart[lhSize], litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH; + dctx->litSize = litSize; + return lhSize+1; + } + default: + return ERROR(corruption_detected); /* impossible */ + } + + } +} + + +/*! ZSTD_buildSeqTable() : + @return : nb bytes read from src, + or an error code if it fails, testable with ZSTD_isError() +*/ +FORCE_INLINE size_t ZSTD_buildSeqTable(FSE_DTable* DTable, symbolEncodingType_e type, U32 max, U32 maxLog, + const void* src, size_t srcSize, + const S16* defaultNorm, U32 defaultLog, U32 flagRepeatTable) +{ + switch(type) + { + case set_rle : + if (!srcSize) return ERROR(srcSize_wrong); + if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected); + FSE_buildDTable_rle(DTable, *(const BYTE*)src); /* if *src > max, data is corrupted */ + return 1; + case set_basic : + FSE_buildDTable(DTable, defaultNorm, max, defaultLog); + return 0; + case set_repeat: + if (!flagRepeatTable) return ERROR(corruption_detected); + return 0; + default : /* impossible */ + case set_compressed : + { U32 tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + if (FSE_isError(headerSize)) return ERROR(corruption_detected); + if (tableLog > maxLog) return ERROR(corruption_detected); + FSE_buildDTable(DTable, norm, max, tableLog); + return headerSize; + } } +} + + +size_t ZSTD_decodeSeqHeaders(int* nbSeqPtr, + FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, U32 flagRepeatTable, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + + /* check */ + if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); + + /* SeqHead */ + { int nbSeq = *ip++; + if (!nbSeq) { *nbSeqPtr=0; return 1; } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) + nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + else + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + *nbSeqPtr = nbSeq; + } + + /* FSE table descriptors */ + if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable); + if (ZSTD_isError(llhSize)) return ERROR(corruption_detected); + ip += llhSize; + } + { size_t const ofhSize = ZSTD_buildSeqTable(DTableOffb, OFtype, MaxOff, OffFSELog, ip, iend-ip, OF_defaultNorm, OF_defaultNormLog, flagRepeatTable); + if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected); + ip += ofhSize; + } + { size_t const mlhSize = ZSTD_buildSeqTable(DTableML, MLtype, MaxML, MLFSELog, ip, iend-ip, ML_defaultNorm, ML_defaultNormLog, flagRepeatTable); + if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected); + ip += mlhSize; + } } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; +} seq_t; + +typedef struct { + BIT_DStream_t DStream; + FSE_DState_t stateLL; + FSE_DState_t stateOffb; + FSE_DState_t stateML; + size_t prevOffset[ZSTD_REP_NUM]; +} seqState_t; + + +static seq_t ZSTD_decodeSequence(seqState_t* seqState) +{ + seq_t seq; + + U32 const llCode = FSE_peekSymbol(&seqState->stateLL); + U32 const mlCode = FSE_peekSymbol(&seqState->stateML); + U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ + + U32 const llBits = LL_bits[llCode]; + U32 const mlBits = ML_bits[mlCode]; + U32 const ofBits = ofCode; + U32 const totalBits = llBits+mlBits+ofBits; + + static const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + + static const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + static const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD }; + + /* sequence */ + { size_t offset; + if (!ofCode) + offset = 0; + else { + offset = OF_base[ofCode] + BIT_readBits(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + + if (ofCode <= 1) { + offset += (llCode==0); + if (offset) { + size_t const temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } else { + offset = seqState->prevOffset[0]; + } + } else { + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + seq.offset = offset; + } + + seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBits(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ + if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream); + + seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&seqState->DStream, llBits) : 0); /* <= 16 bits */ + if (MEM_32bits() || + (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream); + + /* ANS state update */ + FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + + return seq; +} + + +FORCE_INLINE +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit_w, + const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + /* check */ + if ((oLitEnd>oend_w) | (oMatchEnd>oend)) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (iLitEnd > litLimit_w) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + + /* copy Literals */ + ZSTD_copy8(op, *litPtr); + if (sequence.litLength > 8) + ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - base)) { + /* offset beyond prefix */ + if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); + match = dictEnd - (base-match); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = base; + } } + + /* match within prefix */ + if (sequence.offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */ + int const sub2 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= sub2; + } else { + ZSTD_copy8(op, match); + } + op += 8; match += 8; + + if (oMatchEnd > oend-(16-MINMATCH)) { + if (op < oend_w) { + ZSTD_wildcopy(op, match, oend_w - op); + match += oend_w - op; + op = oend_w; + } + while (op < oMatchEnd) *op++ = *match++; + } else { + ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ + } + return sequenceLength; +} + + +static size_t ZSTD_decompressSequences( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litLimit_w = litPtr + dctx->litBufSize - WILDCOPY_OVERLENGTH; + const BYTE* const litEnd = litPtr + dctx->litSize; + FSE_DTable* DTableLL = dctx->LLTable; + FSE_DTable* DTableML = dctx->MLTable; + FSE_DTable* DTableOffb = dctx->OffTable; + const BYTE* const base = (const BYTE*) (dctx->base); + const BYTE* const vBase = (const BYTE*) (dctx->vBase); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + int nbSeq; + + /* Build Decoding Tables */ + { size_t const seqHSize = ZSTD_decodeSeqHeaders(&nbSeq, DTableLL, DTableML, DTableOffb, dctx->fseEntropy, ip, seqSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + } + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + dctx->fseEntropy = 1; + { U32 i; for (i=0; irep[i]; } + { size_t const errorCode = BIT_initDStream(&(seqState.DStream), ip, iend-ip); + if (ERR_isError(errorCode)) return ERROR(corruption_detected); } + FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); + FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); + FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); + + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { + nbSeq--; + { seq_t const sequence = ZSTD_decodeSequence(&seqState); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litLimit_w, base, vBase, dictEnd); + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } } + + /* check if reached exact end */ + if (nbSeq) return ERROR(corruption_detected); + /* save reps for next block */ + { U32 i; for (i=0; irep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + + return op-ostart; +} + + +static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) +{ + if (dst != dctx->previousDstEnd) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); + dctx->base = dst; + dctx->previousDstEnd = dst; + } +} + + +static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + + if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(srcSize_wrong); + + /* Decode literals sub-block */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); +} + + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + ZSTD_checkContinuity(dctx, dst); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} + + +/** ZSTD_insertBlock() : + insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ +ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +{ + ZSTD_checkContinuity(dctx, blockStart); + dctx->previousDstEnd = (const char*)blockStart + blockSize; + return blockSize; +} + + +size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length) +{ + if (length > dstCapacity) return ERROR(dstSize_tooSmall); + memset(dst, byte, length); + return length; +} + + +/*! ZSTD_decompressFrame() : +* `dctx` must be properly initialized */ +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t remainingSize = srcSize; + + /* check */ + if (srcSize < ZSTD_frameHeaderSize_min+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min); + size_t result; + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + if (srcSize < frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + result = ZSTD_decodeFrameHeader(dctx, src, frameHeaderSize); + if (ZSTD_isError(result)) return result; + ip += frameHeaderSize; remainingSize -= frameHeaderSize; + } + + /* Loop on each block */ + while (1) { + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (cBlockSize > remainingSize) return ERROR(srcSize_wrong); + + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize); + break; + case bt_raw : + decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); + break; + case bt_rle : + decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize); + break; + case bt_reserved : + default: + return ERROR(corruption_detected); + } + + if (ZSTD_isError(decodedSize)) return decodedSize; + if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize); + op += decodedSize; + ip += cBlockSize; + remainingSize -= cBlockSize; + if (blockProperties.lastBlock) break; + } + + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + if (remainingSize<4) return ERROR(checksum_wrong); + checkRead = MEM_readLE32(ip); + if (checkRead != checkCalc) return ERROR(checksum_wrong); + remainingSize -= 4; + } + + if (remainingSize) return ERROR(srcSize_wrong); + return op-ostart; +} + + +/*! ZSTD_decompress_usingPreparedDCtx() : +* Same as ZSTD_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded. +* It avoids reloading the dictionary each time. +* `preparedDCtx` must have been properly initialized using ZSTD_decompressBegin_usingDict(). +* Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */ +size_t ZSTD_decompress_usingPreparedDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* refDCtx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + ZSTD_copyDCtx(dctx, refDCtx); + ZSTD_checkContinuity(dctx, dst); + return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); +} + + +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) + if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize); +#endif + ZSTD_decompressBegin_usingDict(dctx, dict, dictSize); + ZSTD_checkContinuity(dctx, dst); + return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize); +} + + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); +} + + +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE==1) + size_t regenSize; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + if (dctx==NULL) return ERROR(memory_allocation); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; +#else /* stack mode */ + ZSTD_DCtx dctx; + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif +} + + +/*-************************************** +* Advanced Streaming Decompression API +* Bufferless and synchronous +****************************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } + +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { + switch(dctx->stage) + { + default: /* should not happen */ + case ZSTDds_getFrameHeaderSize: + case ZSTDds_decodeFrameHeader: + return ZSTDnit_frameHeader; + case ZSTDds_decodeBlockHeader: + return ZSTDnit_blockHeader; + case ZSTDds_decompressBlock: + return ZSTDnit_block; + case ZSTDds_decompressLastBlock: + return ZSTDnit_lastBlock; + case ZSTDds_checkChecksum: + return ZSTDnit_checksum; + case ZSTDds_decodeSkippableHeader: + case ZSTDds_skipFrame: + return ZSTDnit_skippableFrame; + } +} + +int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } /* for zbuff */ + +/** ZSTD_decompressContinue() : +* @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) +* or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + /* Sanity check */ + if (srcSize != dctx->expected) return ERROR(srcSize_wrong); + if (dstCapacity) ZSTD_checkContinuity(dctx, dst); + + switch (dctx->stage) + { + case ZSTDds_getFrameHeaderSize : + if (srcSize != ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong); /* impossible */ + if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { + memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_min); + dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_min; /* magic number + skippable frame length */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } + dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_min); + if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; + memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_min); + if (dctx->headerSize > ZSTD_frameHeaderSize_min) { + dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_min; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + } + dctx->expected = 0; /* not necessary to copy more */ + + case ZSTDds_decodeFrameHeader: + { size_t result; + memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected); + result = ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize); + if (ZSTD_isError(result)) return result; + dctx->expected = ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + } + case ZSTDds_decodeBlockHeader: + { blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { + if (dctx->fParams.checksumFlag) { + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->expected = 3; /* go directly to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; + } + return 0; + } + case ZSTDds_decompressLastBlock: + case ZSTDds_decompressBlock: + { size_t rSize; + switch(dctx->bType) + { + case bt_compressed: + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); + break; + case bt_raw : + rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + break; + case bt_rle : + rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); + break; + case bt_reserved : /* should never happen */ + default: + return ERROR(corruption_detected); + } + if (ZSTD_isError(rSize)) return rSize; + if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + dctx->previousDstEnd = (char*)dst + rSize; + } + return rSize; + } + case ZSTDds_checkChecksum: + { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */ + if (check32 != h32) return ERROR(checksum_wrong); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + case ZSTDds_decodeSkippableHeader: + { memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected); + dctx->expected = MEM_readLE32(dctx->headerBuffer + 4); + dctx->stage = ZSTDds_skipFrame; + return 0; + } + case ZSTDds_skipFrame: + { dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + default: + return ERROR(GENERIC); /* impossible */ + } +} + + +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + dctx->dictEnd = dctx->previousDstEnd; + dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); + dctx->base = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; + return 0; +} + +static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t const dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + + { size_t const hSize = HUF_readDTableX4(dctx->hufTable, dict, dictSize); + if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); + dictPtr += hSize; + } + + { short offcodeNCount[MaxOff+1]; + U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + { size_t const errorCode = FSE_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + { size_t const errorCode = FSE_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + { size_t const errorCode = FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog); + if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); } + dictPtr += litlengthHeaderSize; + } + + if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); + dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted); + dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted); + dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); + dictPtr += 12; + + dctx->litEntropy = dctx->fseEntropy = 1; + return dictPtr - (const BYTE*)dict; +} + +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_DICT_MAGIC) { + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + 4); + + /* load entropy tables */ + dict = (const char*)dict + 8; + dictSize -= 8; + { size_t const eSize = ZSTD_loadEntropy(dctx, dict, dictSize); + if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); +} + + +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + { size_t const errorCode = ZSTD_decompressBegin(dctx); + if (ZSTD_isError(errorCode)) return errorCode; } + + if (dict && dictSize) { + size_t const errorCode = ZSTD_decompress_insertDictionary(dctx, dict, dictSize); + if (ZSTD_isError(errorCode)) return ERROR(dictionary_corrupted); + } + + return 0; +} + + +struct ZSTD_DDict_s { + void* dict; + size_t dictSize; + ZSTD_DCtx* refContext; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_customMem customMem) +{ + if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; + if (!customMem.customAlloc || !customMem.customFree) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); + void* const dictContent = ZSTD_malloc(dictSize, customMem); + ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem); + + if (!dictContent || !ddict || !dctx) { + ZSTD_free(dictContent, customMem); + ZSTD_free(ddict, customMem); + ZSTD_free(dctx, customMem); + return NULL; + } + + memcpy(dictContent, dict, dictSize); + { size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize); + if (ZSTD_isError(errorCode)) { + ZSTD_free(dictContent, customMem); + ZSTD_free(ddict, customMem); + ZSTD_free(dctx, customMem); + return NULL; + } } + + ddict->dict = dictContent; + ddict->dictSize = dictSize; + ddict->refContext = dctx; + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, ready to start decompression without startup delay. +* `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, allocator); +} + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->refContext->customMem; + ZSTD_freeDCtx(ddict->refContext); + ZSTD_free(ddict->dict, cMem); + ZSTD_free(ddict, cMem); + return 0; + } +} + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Use dictionary without significant overhead. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1) + if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dict, ddict->dictSize); +#endif + return ZSTD_decompress_usingPreparedDCtx(dctx, ddict->refContext, + dst, dstCapacity, + src, srcSize); +} + + +/*===================================== +* Streaming decompression +*====================================*/ + +typedef enum { zdss_init, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + +/* *** Resource management *** */ +struct ZSTD_DStream_s { + ZSTD_DCtx* zd; + ZSTD_frameParams fParams; + ZSTD_dStreamStage stage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t blockSize; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + size_t lhSize; + ZSTD_customMem customMem; + void* dictContent; + size_t dictSize; + const void* dictSource; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; +}; /* typedef'd to ZSTD_DStream within "zstd.h" */ + + +ZSTD_DStream* ZSTD_createDStream(void) +{ + return ZSTD_createDStream_advanced(defaultCustomMem); +} + +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + ZSTD_DStream* zds; + + if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem; + if (!customMem.customAlloc || !customMem.customFree) return NULL; + + zds = (ZSTD_DStream*) ZSTD_malloc(sizeof(ZSTD_DStream), customMem); + if (zds==NULL) return NULL; + memset(zds, 0, sizeof(ZSTD_DStream)); + memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem)); + zds->zd = ZSTD_createDCtx_advanced(customMem); + if (zds->zd == NULL) { ZSTD_freeDStream(zds); return NULL; } + zds->stage = zdss_init; + zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + return zds; +} + +size_t ZSTD_freeDStream(ZSTD_DStream* zds) +{ + if (zds==NULL) return 0; /* support free on null */ + { ZSTD_customMem const cMem = zds->customMem; + ZSTD_freeDCtx(zds->zd); + ZSTD_free(zds->inBuff, cMem); + ZSTD_free(zds->outBuff, cMem); + ZSTD_free(zds->dictContent, cMem); +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (zds->legacyContext) + ZSTD_freeLegacyStreamContext(zds->legacyContext, zds->previousLegacyVersion); +#endif + ZSTD_free(zds, cMem); + return 0; + } +} + + +/* *** Initialization *** */ + +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } + +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) +{ + zds->stage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; + if ((dict != zds->dictSource) | (dictSize != zds->dictSize)) { /* new dictionary */ + if (dictSize > zds->dictSize) { + ZSTD_free(zds->dictContent, zds->customMem); + zds->dictContent = ZSTD_malloc(dictSize, zds->customMem); + if (zds->dictContent == NULL) return ERROR(memory_allocation); + } + memcpy(zds->dictContent, dict, dictSize); + zds->dictSize = dictSize; + } + zds->legacyVersion = 0; + return 0; +} + +size_t ZSTD_initDStream(ZSTD_DStream* zds) +{ + return ZSTD_initDStream_usingDict(zds, NULL, 0); +} + +size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, + ZSTD_DStreamParameter_e paramType, unsigned paramValue) +{ + switch(paramType) + { + default : return ERROR(parameter_unknown); + case ZSTDdsp_maxWindowSize : zds->maxWindowSize = paramValue; break; + } + return 0; +} + + +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds) +{ + return sizeof(*zds) + ZSTD_sizeof_DCtx(zds->zd) + zds->inBuffSize + zds->outBuffSize; +} + + +/* *** Decompression *** */ + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + memcpy(dst, src, length); + return length; +} + + +size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + const char* const istart = (const char*)(input->src) + input->pos; + const char* const iend = (const char*)(input->src) + input->size; + const char* ip = istart; + char* const ostart = (char*)(output->dst) + output->pos; + char* const oend = (char*)(output->dst) + output->size; + char* op = ostart; + U32 someMoreWork = 1; + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + if (zds->legacyVersion) + return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); +#endif + + while (someMoreWork) { + switch(zds->stage) + { + case zdss_init : + return ERROR(init_missing); + + case zdss_loadHeader : + { size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize); + if (ZSTD_isError(hSize)) +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + { U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); + if (legacyVersion) { + size_t initResult; + initResult = ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion, + zds->dictContent, zds->dictSize); + if (ZSTD_isError(initResult)) return initResult; + zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; + return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); + } else { + return hSize; /* error */ + } } +#else + return hSize; +#endif + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ + if (toLoad > (size_t)(iend-ip)) { /* not enough input to load full header */ + memcpy(zds->headerBuffer + zds->lhSize, ip, iend-ip); + zds->lhSize += iend-ip; + input->pos = input->size; + return (hSize - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + break; + } } + + /* Consume header */ + ZSTD_decompressBegin_usingDict(zds->zd, zds->dictContent, zds->dictSize); + { size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->zd); /* == ZSTD_frameHeaderSize_min */ + size_t const h1Result = ZSTD_decompressContinue(zds->zd, NULL, 0, zds->headerBuffer, h1Size); + if (ZSTD_isError(h1Result)) return h1Result; /* should not happen : already checked */ + if (h1Size < zds->lhSize) { /* long header */ + size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->zd); + size_t const h2Result = ZSTD_decompressContinue(zds->zd, NULL, 0, zds->headerBuffer+h1Size, h2Size); + if (ZSTD_isError(h2Result)) return h2Result; + } } + + zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_unsupported); + + /* Frame header instruct buffer sizes */ + { size_t const blockSize = MIN(zds->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); + size_t const neededOutSize = zds->fParams.windowSize + blockSize; + zds->blockSize = blockSize; + if (zds->inBuffSize < blockSize) { + ZSTD_free(zds->inBuff, zds->customMem); + zds->inBuffSize = blockSize; + zds->inBuff = (char*)ZSTD_malloc(blockSize, zds->customMem); + if (zds->inBuff == NULL) return ERROR(memory_allocation); + } + if (zds->outBuffSize < neededOutSize) { + ZSTD_free(zds->outBuff, zds->customMem); + zds->outBuffSize = neededOutSize; + zds->outBuff = (char*)ZSTD_malloc(neededOutSize, zds->customMem); + if (zds->outBuff == NULL) return ERROR(memory_allocation); + } } + zds->stage = zdss_read; + /* pass-through */ + + case zdss_read: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->zd); + if (neededInSize==0) { /* end of frame */ + zds->stage = zdss_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + const int isSkipFrame = ZSTD_isSkipFrame(zds->zd); + size_t const decodedSize = ZSTD_decompressContinue(zds->zd, + zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), + ip, neededInSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + ip += neededInSize; + if (!decodedSize && !isSkipFrame) break; /* this was just a header */ + zds->outEnd = zds->outStart + decodedSize; + zds->stage = zdss_flush; + break; + } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->stage = zdss_load; + /* pass-through */ + } + + case zdss_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->zd); + size_t const toLoad = neededInSize - zds->inPos; /* should always be <= remaining space within inBuff */ + size_t loadedSize; + if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */ + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); + ip += loadedSize; + zds->inPos += loadedSize; + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + { const int isSkipFrame = ZSTD_isSkipFrame(zds->zd); + size_t const decodedSize = ZSTD_decompressContinue(zds->zd, + zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, + zds->inBuff, neededInSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + zds->inPos = 0; /* input is consumed */ + if (!decodedSize && !isSkipFrame) { zds->stage = zdss_read; break; } /* this was just a header */ + zds->outEnd = zds->outStart + decodedSize; + zds->stage = zdss_flush; + /* pass-through */ + } } + + case zdss_flush: + { size_t const toFlushSize = zds->outEnd - zds->outStart; + size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize); + op += flushedSize; + zds->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zds->stage = zdss_read; + if (zds->outStart + zds->blockSize > zds->outBuffSize) + zds->outStart = zds->outEnd = 0; + break; + } + /* cannot flush everything */ + someMoreWork = 0; + break; + } + default: return ERROR(GENERIC); /* impossible */ + } } + + /* result */ + input->pos += (size_t)(ip-istart); + output->pos += (size_t)(op-ostart); + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->zd); + if (!nextSrcSizeHint) return (zds->outEnd != zds->outStart); /* return 0 only if fully flushed too */ + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->zd) == ZSTDnit_block); + if (zds->inPos > nextSrcSizeHint) return ERROR(GENERIC); /* should never happen */ + nextSrcSizeHint -= zds->inPos; /* already loaded*/ + return nextSrcSizeHint; + } +} diff --git a/contrib/zstd/zstd_internal.h b/contrib/zstd/zstd_internal.h new file mode 100644 index 0000000..51e7170 --- /dev/null +++ b/contrib/zstd/zstd_internal.h @@ -0,0 +1,230 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE + +/*-************************************* +* Dependencies +***************************************/ +#include "mem.h" +#include "error_private.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" + + +/*-************************************* +* Common macros +***************************************/ +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) + + +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_NUM (1<<12) +#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */ + +#define ZSTD_REP_NUM 3 /* number of repcodes */ +#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */ +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM) +static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 +static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; + +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ +static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + +#define HufLog 12 +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; + +#define LONGNBSEQ 0x7F00 + +#define MINMATCH 3 +#define EQUAL_READ32 4 + +#define Litbits 8 +#define MaxLit ((1<= 3) /* GCC Intrinsic */ + return 31 - __builtin_clz(val); +# else /* Software version */ + static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + int r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + +#endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/contrib/zstd/zstd_opt.h b/contrib/zstd/zstd_opt.h new file mode 100644 index 0000000..cb58729 --- /dev/null +++ b/contrib/zstd/zstd_opt.h @@ -0,0 +1,904 @@ +/** + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + + +/* Note : this file is intended to be included within zstd_compress.c */ + + +#ifndef ZSTD_OPT_H_91842398743 +#define ZSTD_OPT_H_91842398743 + + +#define ZSTD_FREQ_DIV 5 + +/*-************************************* +* Price functions for optimal parser +***************************************/ +FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr) +{ + ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum+1); + ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum+1); + ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum+1); + ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1); + ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum)); +} + + +MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) +{ + unsigned u; + + ssPtr->cachedLiterals = NULL; + ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; + + if (ssPtr->litLengthSum == 0) { + ssPtr->litSum = (2<litLengthSum = MaxLL+1; + ssPtr->matchLengthSum = MaxML+1; + ssPtr->offCodeSum = (MaxOff+1); + ssPtr->matchSum = (2<litFreq[u] = 2; + for (u=0; u<=MaxLL; u++) + ssPtr->litLengthFreq[u] = 1; + for (u=0; u<=MaxML; u++) + ssPtr->matchLengthFreq[u] = 1; + for (u=0; u<=MaxOff; u++) + ssPtr->offCodeFreq[u] = 1; + } else { + ssPtr->matchLengthSum = 0; + ssPtr->litLengthSum = 0; + ssPtr->offCodeSum = 0; + ssPtr->matchSum = 0; + ssPtr->litSum = 0; + + for (u=0; u<=MaxLit; u++) { + ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litSum += ssPtr->litFreq[u]; + } + for (u=0; u<=MaxLL; u++) { + ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; + } + for (u=0; u<=MaxML; u++) { + ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; + ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); + } + for (u=0; u<=MaxOff; u++) { + ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); + ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; + } + } + + ZSTD_setLog2Prices(ssPtr); +} + + +FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals) +{ + U32 price, u; + + if (litLength == 0) + return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1); + + /* literals */ + if (ssPtr->cachedLiterals == literals) { + U32 const additional = litLength - ssPtr->cachedLitLength; + const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength; + price = ssPtr->cachedPrice + additional * ssPtr->log2litSum; + for (u=0; u < additional; u++) + price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]]+1); + ssPtr->cachedPrice = price; + ssPtr->cachedLitLength = litLength; + } else { + price = litLength * ssPtr->log2litSum; + for (u=0; u < litLength; u++) + price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]]+1); + + if (litLength >= 12) { + ssPtr->cachedLiterals = literals; + ssPtr->cachedPrice = price; + ssPtr->cachedLitLength = litLength; + } + } + + /* literal Length */ + { const BYTE LL_deltaCode = 19; + const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; + price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode]+1); + } + + return price; +} + + +FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +{ + /* offset */ + BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); + U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1); + + /* match Length */ + { const BYTE ML_deltaCode = 36; + const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; + price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode]+1); + } + + return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor; +} + + +MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +{ + U32 u; + + /* literals */ + seqStorePtr->litSum += litLength; + for (u=0; u < litLength; u++) + seqStorePtr->litFreq[literals[u]]++; + + /* literal Length */ + { const BYTE LL_deltaCode = 19; + const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; + seqStorePtr->litLengthFreq[llCode]++; + seqStorePtr->litLengthSum++; + } + + /* match offset */ + { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); + seqStorePtr->offCodeSum++; + seqStorePtr->offCodeFreq[offCode]++; + } + + /* match Length */ + { const BYTE ML_deltaCode = 36; + const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; + seqStorePtr->matchLengthFreq[mlCode]++; + seqStorePtr->matchLengthSum++; + } + + ZSTD_setLog2Prices(seqStorePtr); +} + + +#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ + { \ + while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \ + opt[pos].mlen = mlen_; \ + opt[pos].off = offset_; \ + opt[pos].litlen = litlen_; \ + opt[pos].price = price_; \ + } + + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (ie. not within extDict) */ +FORCE_INLINE +U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) +{ + U32* const hashTable3 = zc->hashTable3; + U32 const hashLog3 = zc->hashLog3; + const BYTE* const base = zc->base; + U32 idx = zc->nextToUpdate3; + const U32 target = zc->nextToUpdate3 = (U32)(ip - base); + const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +static U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + U32 nbCompares, const U32 mls, + U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen) +{ + const BYTE* const base = zc->base; + const U32 current = (U32)(ip-base); + const U32 hashLog = zc->params.cParams.hashLog; + const size_t h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const hashTable = zc->hashTable; + U32 matchIndex = hashTable[h]; + U32* const bt = zc->chainTable; + const U32 btLog = zc->params.cParams.chainLog - 1; + const U32 btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = zc->dictBase; + const U32 dictLimit = zc->dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const U32 btLow = btMask >= current ? 0 : current - btMask; + const U32 windowLow = zc->lowLimit; + U32* smallerPtr = bt + 2*(current&btMask); + U32* largerPtr = bt + 2*(current&btMask) + 1; + U32 matchEndIdx = current+8; + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + + const U32 minMatch = (mls == 3) ? 3 : 4; + size_t bestLength = minMatchLen-1; + + if (minMatch == 3) { /* HC3 match finder */ + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip); + if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) { + const BYTE* match; + size_t currentMl=0; + if ((!extDict) || matchIndex3 >= dictLimit) { + match = base + matchIndex3; + if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit); + } else { + match = dictBase + matchIndex3; + if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; + } + + /* save best solution */ + if (currentMl > bestLength) { + bestLength = currentMl; + matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3; + matches[mnum].len = (U32)currentMl; + mnum++; + if (currentMl > ZSTD_OPT_NUM) goto update; + if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/ + } + } + } + + hashTable[h] = current; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((!extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + if (match[matchLength] == ip[matchLength]) { + matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1; + } + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex; + matches[mnum].len = (U32)matchLength; + mnum++; + if (matchLength > ZSTD_OPT_NUM) break; + if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + +update: + zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1; + return mnum; +} + + +/** Tree updater, providing best match */ +static U32 ZSTD_BtGetAllMatches ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen); +} + + +static U32 ZSTD_BtGetAllMatches_selectMLS ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iHighLimit, + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) +{ + switch(matchLengthSearch) + { + case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); + default : + case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); + case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); + case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); + } +} + +/** Tree updater, providing best match */ +static U32 ZSTD_BtGetAllMatches_extDict ( + ZSTD_CCtx* zc, + const BYTE* const ip, const BYTE* const iLimit, + const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen) +{ + if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); + return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen); +} + + +static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( + ZSTD_CCtx* zc, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iHighLimit, + const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen) +{ + switch(matchLengthSearch) + { + case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); + default : + case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); + case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); + case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); + } +} + + +/*-******************************* +* Optimal parser +*********************************/ +FORCE_INLINE +void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const BYTE* const prefixStart = base + ctx->dictLimit; + + const U32 maxSearches = 1U << ctx->params.cParams.searchLog; + const U32 sufficient_len = ctx->params.cParams.targetLength; + const U32 mls = ctx->params.cParams.searchLength; + const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; + + ZSTD_optimal_t* opt = seqStorePtr->priceTable; + ZSTD_match_t* matches = seqStorePtr->matchTable; + const BYTE* inr; + U32 offset, rep[ZSTD_REP_NUM]; + + /* init */ + ctx->nextToUpdate3 = ctx->nextToUpdate; + ZSTD_rescaleFreqs(seqStorePtr); + ip += (ip==prefixStart); + { U32 i; for (i=0; irep[i]; } + inr = ip; + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, match_num, last_pos, litlen, price; + U32 u, mlen, best_mlen, best_off, litLength; + memset(opt, 0, sizeof(ZSTD_optimal_t)); + last_pos = 0; + litlen = (U32)(ip - anchor); + + /* check repCode */ + { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); + for (i=(ip == anchor); i 0) && (repCur < (S32)(ip-prefixStart)) + && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) { + mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch; + if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; + goto _storeSequence; + } + best_off = i - (ip == anchor); + do { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); + + if (!last_pos && !match_num) { ip++; continue; } + + if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + cur = 0; + last_pos = 1; + goto _storeSequence; + } + + /* set prices using matches at position = 0 */ + best_mlen = (last_pos) ? last_pos : minMatch; + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + while (mlen <= best_mlen) { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ + mlen++; + } } + + if (last_pos < minMatch) { ip++; continue; } + + /* initialize opt[0] */ + { U32 i ; for (i=0; i litlen) { + price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); + } else + price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); + } else { + litlen = 1; + price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); + } + + if (cur > last_pos || price <= opt[cur].price) + SET_PRICE(cur, 1, 0, litlen, price); + + if (cur == last_pos) break; + + if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ + continue; + + mlen = opt[cur].mlen; + if (opt[cur].off > ZSTD_REP_MOVE_OPT) { + opt[cur].rep[2] = opt[cur-mlen].rep[1]; + opt[cur].rep[1] = opt[cur-mlen].rep[0]; + opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; + } else { + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); + } + + best_mlen = minMatch; + { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); + for (i=(opt[cur].mlen != 1); i 0) && (repCur < (S32)(inr-prefixStart)) + && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) { + mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; + + if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; last_pos = cur + 1; + goto _storeSequence; + } + + best_off = i - (opt[cur].mlen != 1); + + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH); + } else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH); + } + + if (mlen > best_mlen) best_mlen = mlen; + + do { + if (cur + mlen > last_pos || price <= opt[cur + mlen].price) + SET_PRICE(cur + mlen, mlen, i, litlen, price); + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); + + if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + last_pos = cur + 1; + goto _storeSequence; + } + + /* set prices using matches at position = cur */ + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + + while (mlen <= best_mlen) { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH); + else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH); + } + + if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) + SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); + + mlen++; + } } } + + best_mlen = opt[last_pos].mlen; + best_off = opt[last_pos].off; + cur = last_pos - best_mlen; + + /* store sequence */ +_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ + opt[0].mlen = 1; + + while (1) { + mlen = opt[cur].mlen; + offset = opt[cur].off; + opt[cur].mlen = best_mlen; + opt[cur].off = best_off; + best_mlen = mlen; + best_off = offset; + if (mlen > cur) break; + cur -= mlen; + } + + for (u = 0; u <= last_pos;) { + u += opt[u].mlen; + } + + for (cur=0; cur < last_pos; ) { + mlen = opt[cur].mlen; + if (mlen == 1) { ip++; cur++; continue; } + offset = opt[cur].off; + cur += mlen; + litLength = (U32)(ip - anchor); + + if (offset > ZSTD_REP_MOVE_OPT) { + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = offset - ZSTD_REP_MOVE_OPT; + offset--; + } else { + if (offset != 0) { + best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]); + if (offset != 1) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = best_off; + } + if (litLength==0) offset--; + } + + ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + anchor = ip = ip + mlen; + } } /* for (cur=0; cur < last_pos; ) */ + + /* Save reps for next block */ + { int i; for (i=0; isavedRep[i] = rep[i]; } + + /* Last Literals */ + { size_t const lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + + +FORCE_INLINE +void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, + const void* src, size_t srcSize) +{ + seqStore_t* seqStorePtr = &(ctx->seqStore); + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ctx->base; + const U32 lowestIndex = ctx->lowLimit; + const U32 dictLimit = ctx->dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ctx->dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + + const U32 maxSearches = 1U << ctx->params.cParams.searchLog; + const U32 sufficient_len = ctx->params.cParams.targetLength; + const U32 mls = ctx->params.cParams.searchLength; + const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; + + ZSTD_optimal_t* opt = seqStorePtr->priceTable; + ZSTD_match_t* matches = seqStorePtr->matchTable; + const BYTE* inr; + + /* init */ + U32 offset, rep[ZSTD_REP_NUM]; + { U32 i; for (i=0; irep[i]; } + + ctx->nextToUpdate3 = ctx->nextToUpdate; + ZSTD_rescaleFreqs(seqStorePtr); + ip += (ip==prefixStart); + inr = ip; + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, match_num, last_pos, litlen, price; + U32 u, mlen, best_mlen, best_off, litLength; + U32 current = (U32)(ip-base); + memset(opt, 0, sizeof(ZSTD_optimal_t)); + last_pos = 0; + inr = ip; + opt[0].litlen = (U32)(ip - anchor); + + /* check repCode */ + { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); + for (i = (ip==anchor); i 0 && repCur <= (S32)current) + && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ + && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + + if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; + goto _storeSequence; + } + + best_off = i - (ip==anchor); + litlen = opt[0].litlen; + do { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ + + if (!last_pos && !match_num) { ip++; continue; } + + { U32 i; for (i=0; i sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + cur = 0; + last_pos = 1; + goto _storeSequence; + } + + best_mlen = (last_pos) ? last_pos : minMatch; + + /* set prices using matches at position = 0 */ + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = matches[u].len; + litlen = opt[0].litlen; + while (mlen <= best_mlen) { + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH); + if (mlen > last_pos || price < opt[mlen].price) + SET_PRICE(mlen, mlen, matches[u].off, litlen, price); + mlen++; + } } + + if (last_pos < minMatch) { + ip++; continue; + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + inr = ip + cur; + + if (opt[cur-1].mlen == 1) { + litlen = opt[cur-1].litlen + 1; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); + } else + price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); + } else { + litlen = 1; + price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); + } + + if (cur > last_pos || price <= opt[cur].price) + SET_PRICE(cur, 1, 0, litlen, price); + + if (cur == last_pos) break; + + if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ + continue; + + mlen = opt[cur].mlen; + if (opt[cur].off > ZSTD_REP_MOVE_OPT) { + opt[cur].rep[2] = opt[cur-mlen].rep[1]; + opt[cur].rep[1] = opt[cur-mlen].rep[0]; + opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; + } else { + opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2]; + opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1]; + opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); + } + + best_mlen = 0; + + { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); + for (i = (mlen != 1); i 0 && repCur <= (S32)(current+cur)) + && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ + && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch; + + if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { + best_mlen = mlen; best_off = i; last_pos = cur + 1; + goto _storeSequence; + } + + best_off = i - (opt[cur].mlen != 1); + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH); + } else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH); + } + + best_mlen = mlen; + + do { + if (cur + mlen > last_pos || price <= opt[cur + mlen].price) + SET_PRICE(cur + mlen, mlen, i, litlen, price); + mlen--; + } while (mlen >= minMatch); + } } } + + match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); + + if (match_num > 0 && matches[match_num-1].len > sufficient_len) { + best_mlen = matches[match_num-1].len; + best_off = matches[match_num-1].off; + last_pos = cur + 1; + goto _storeSequence; + } + + best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch; + + /* set prices using matches at position = cur */ + for (u = 0; u < match_num; u++) { + mlen = (u>0) ? matches[u-1].len+1 : best_mlen; + best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur; + + while (mlen <= best_mlen) { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH); + else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH); + } + + if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) + SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); + + mlen++; + } } } /* for (cur = 1; cur <= last_pos; cur++) */ + + best_mlen = opt[last_pos].mlen; + best_off = opt[last_pos].off; + cur = last_pos - best_mlen; + + /* store sequence */ +_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ + opt[0].mlen = 1; + + while (1) { + mlen = opt[cur].mlen; + offset = opt[cur].off; + opt[cur].mlen = best_mlen; + opt[cur].off = best_off; + best_mlen = mlen; + best_off = offset; + if (mlen > cur) break; + cur -= mlen; + } + + for (u = 0; u <= last_pos; ) { + u += opt[u].mlen; + } + + for (cur=0; cur < last_pos; ) { + mlen = opt[cur].mlen; + if (mlen == 1) { ip++; cur++; continue; } + offset = opt[cur].off; + cur += mlen; + litLength = (U32)(ip - anchor); + + if (offset > ZSTD_REP_MOVE_OPT) { + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = offset - ZSTD_REP_MOVE_OPT; + offset--; + } else { + if (offset != 0) { + best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]); + if (offset != 1) rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = best_off; + } + + if (litLength==0) offset--; + } + + ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + anchor = ip = ip + mlen; + } } /* for (cur=0; cur < last_pos; ) */ + + /* Save reps for next block */ + { int i; for (i=0; isavedRep[i] = rep[i]; } + + /* Last Literals */ + { size_t lastLLSize = iend - anchor; + memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; + } +} + +#endif /* ZSTD_OPT_H_91842398743 */ diff --git a/debian/changelog b/debian/changelog index ec2d84f..35fc7ad 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,36 @@ +rmilter (1.9.2) unstable; urgency=low + * [Feature] Allow adding/overriding options for rmilter + * [Feature] Allow to disable dkim signing completely + * [Fix] Allow empty sections in the config + * [Fix] Do not fold dkim signature received from Rspamd + * [Fix] Fix adding headers from rspamd + * [Fix] Fix adding of IP addresses to the lists + * [Fix] Treat connections from unix socket as local connections + * [Fix] Unbroke 'spamd_greylist = no' config option + * [Rework] Process all flag config file options in same way + * [Rework] Remove old and deprecated rules + * [Rework] Use lc-btrie code for radix trie + + -- Vitaly Isaev Mon, 19 Sep 2016 14:52:32 +0000 + +rmilter (1.9.1) unstable; urgency=low + * [Feature] Allow to disable greylisting and ratelimit globally + * [Feature] Optimize sending data to rspamd + * [Fix] Avoid using of LTS as it seems to be broken on FreeBSD + * [Fix] Fix detection of sendfile on FreeBSD + * [Fix] Fix probability setup + * [Fix] Fix sdsnewlen usage + * [Fix] Fix usage of magic constants + * [Rework] Remove systemd socket activation + + -- Vitaly Isaev Mon, 19 Sep 2016 12:52:32 +0000 + +rmilter (1.9.0) unstable; urgency=low + + * Fixed debian build dependencies (glib2.0) + + -- Vitaly Isaev Tue, 26 Jul 2016 14:52:32 +0000 + rmilter (1.6.7) unstable; urgency=low * New release. diff --git a/debian/control b/debian/control index 26cecef..93e447e 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,7 @@ Source: rmilter Section: mail Priority: extra Maintainer: Mikhail Gusarov -Build-Depends: debhelper (>= 9), dpkg-dev (>= 1.16.1~), cmake, libpcre3-dev, libssl-dev (>= 1.0), libopendkim-dev, libmilter-dev, libspf2-dev, bison, flex, dh-systemd, libmemcached-dev +Build-Depends: debhelper (>= 9), dpkg-dev (>= 1.16.1~), cmake, libpcre3-dev, libssl-dev (>= 1.0), libopendkim-dev, libmilter-dev, libspf2-dev, bison, flex, dh-systemd, libmemcached-dev, libglib2.0-dev Standards-Version: 3.9.6 Homepage: https://github.com/vstakhov/rmilter Vcs-Git: git://github.com/vstakhov/rmilter.git diff --git a/debian/install b/debian/install index 9a173c9..a9175f1 100644 --- a/debian/install +++ b/debian/install @@ -1,3 +1,4 @@ debian/rmilter.conf.common /etc debian/rmilter.conf /etc debian/rmilter.conf.sysvinit /etc +debian/rmilter-tmpfiles.conf /usr/lib/tmpfiles.d diff --git a/debian/rmilter-tmpfiles.conf b/debian/rmilter-tmpfiles.conf new file mode 100644 index 0000000..f7bcfec --- /dev/null +++ b/debian/rmilter-tmpfiles.conf @@ -0,0 +1 @@ +d /run/rmilter 0755 _rmilter adm - diff --git a/debian/rmilter.conf b/debian/rmilter.conf index 63ca643..f5a5663 100644 --- a/debian/rmilter.conf +++ b/debian/rmilter.conf @@ -1,10 +1,9 @@ # systemd-specific settings for rmilter -.include /etc/rmilter.conf.common +.include /etc/rmilter.conf.common -# rmilter is socket-activated under systemd -bind_socket = fd:3; +bind_socket = unix:/run/rmilter/rmilter.sock; # include user's configuration .try_include /etc/rmilter.conf.local -.try_include /etc/rmilter.conf.d/*.conf \ No newline at end of file +.try_include /etc/rmilter.conf.d/*.conf diff --git a/debian/rmilter.conf.sysvinit b/debian/rmilter.conf.sysvinit index b2683c1..618094b 100644 --- a/debian/rmilter.conf.sysvinit +++ b/debian/rmilter.conf.sysvinit @@ -9,7 +9,6 @@ bind_socket = unix:/run/rmilter/rmilter.sock; pidfile = /run/rmilter/rmilter.pid; - # include user's configuration .try_include /etc/rmilter.conf.local -.try_include /etc/rmilter.conf.d/*.conf \ No newline at end of file +.try_include /etc/rmilter.conf.d/*.conf diff --git a/debian/rmilter.init b/debian/rmilter.init index 34a5dd2..321479c 100644 --- a/debian/rmilter.init +++ b/debian/rmilter.init @@ -32,6 +32,11 @@ SCRIPTNAME=/etc/init.d/$NAME export TMPDIR=/tmp # Apparently people have trouble if this isn't explicitly set... +# Include rmilter defaults if available +if [ -f /etc/default/$NAME ]; then + . /etc/default/$NAME +fi + set -e SSD="start-stop-daemon --pidfile $RUNDIR/$NAME.pid --name rmilter" diff --git a/debian/rmilter.socket b/debian/rmilter.socket deleted file mode 100644 index a191214..0000000 --- a/debian/rmilter.socket +++ /dev/null @@ -1,11 +0,0 @@ -[Unit] -Description=Another sendmail milter for different mail checks - -[Socket] -ListenStream=/var/run/rmilter/rmilter.sock -SocketUser=_rmilter -SocketGroup=adm -SocketMode=660 - -[Install] -WantedBy=sockets.target diff --git a/debian/rmilter.upstart b/debian/rmilter.upstart deleted file mode 100644 index b9ccc06..0000000 --- a/debian/rmilter.upstart +++ /dev/null @@ -1,12 +0,0 @@ -description "rmilter - sendmail filter" - -start on filesystem -stop on runlevel [016] or unmounting-filesystem - -exec start-stop-daemon -S -q -c _rmilter -x /usr/sbin/rmilter -- -c /etc/rmilter.conf.sysvinit -n - -pre-start script - install -o _rmilter -g adm -m 750 -d /run/rmilter - rm -f /run/rmilter/rmilter.sock - chmod g+s /run/rmilter -end script diff --git a/debian/rules b/debian/rules index 27fc981..68fee64 100755 --- a/debian/rules +++ b/debian/rules @@ -13,14 +13,5 @@ override_dh_auto_configure: -DDEBIAN_BUILD=1 \ -DWANT_SYSTEMD_UNITS=ON -override_dh_systemd_enable: - dh_systemd_enable rmilter.socket - -override_dh_systemd_start: - dh_systemd_start --restart-after-upgrade --no-start - -override_dh_installinit: - dh_installinit -n - override_dh_strip: dh_strip --dbg-package=rmilter-dbg diff --git a/rmilter.conf.sample b/rmilter.conf.sample index 559be17..29f7289 100644 --- a/rmilter.conf.sample +++ b/rmilter.conf.sample @@ -6,94 +6,135 @@ #.include ./rmilter-grey.conf # pidfile - path to pid file -# Default: pidfile = /var/run/rmilter.pid - +# Default: pidfile = /var/run/rmilter.pid pidfile = /var/run/rmilter/rmilter.pid; +# bind_socket - socket credits for local bind: +# unix:/path/to/file - bind to local socket +# inet:port@host - bind to inet socket +# Default: bind_socket = unix:/var/tmp/rmilter.sock; +bind_socket = unix:/var/run/rmilter/rmilter.sock; + +# tempdir - path to directory that contains temporary files +# Default: $TMPDIR +tempdir = /tmp; + +# max_size - maximum size of scanned message for ClamAV, Rspamd and DCC +# Default: 0 (no limit) +max_size = 10M; + +# strict_auth - strict checks for mails from authenticated senders +# Default: no +#strict_auth = no; + +# spf_domains - path to file that contains hash of spf domains +# Default: empty +#spf_domains = example.com, mail.ru; + +# use_dcc - whether or not use DCC +# Default: no +#use_dcc = no; + +# whitelist - global recipients whitelist +# Default: no +#whitelist = abuse@example.com, postmaster@example.com; + +# our_networks - treat mail from these networks as mail from authenticated users +# (has no effect if `strict_auth` = `yes`) +# Default: empty +#our_networks = 127.0.0.1/32, [::1]/128, 192.168.0.0/16; + clamav { # servers - clamav socket definitions in format: # /path/to/file # host[:port] # sockets are separated by ',' - # Default: empty + # Default: empty #servers = clam1.example.com, clam2.example.com; # connect_timeout - timeout in miliseconds for connecting to clamav - # Default: 1s - connect_timeout = 1s; + # Default: 1s + #connect_timeout = 1s; # port_timeout - timeout in miliseconds for waiting for clamav port response - # Default: 4s - port_timeout = 4s; + # Default: 4s + #port_timeout = 4s; # results_timeout - timeout in miliseconds for waiting for clamav response - # Default: 20s - results_timeout = 20s; + # Default: 20s + #results_timeout = 20s; # error_time - time in seconds during which we are counting errors - # Default: 10 - error_time = 10; + # Default: 10 + #error_time = 10; # dead_time - time in seconds during which we are thinking that server is down - # Default: 300 - dead_time = 300; + # Default: 300 + #dead_time = 300; # maxerrors - maximum number of errors that can occur during error_time to make us thinking that # this upstream is dead - # Default: 10 - maxerrors = 10; + # Default: 10 + #maxerrors = 10; }; spamd { - # servers - spamd socket definitions in format: + # servers - Rspamd socket definitions in format: # /path/to/file # host[:port] # sockets are separated by ',' - # is server name is prefixed with r: it is rspamd server - # Default: empty + # if server name is prefixed with 'r:' it is an Rspamd server + # Default: empty servers = r:localhost:11333; - # also_check - extra spamd servers to check + # also_check - extra Rspamd servers to check #also_check = r:clam10.example.com; # diff_dir - path where to write messages that have different results from main and extra checks #diff_dir = /var/run/rmilter/diffmsg; # connect_timeout - timeout in miliseconds for connecting to spamd - # Default: 1s - connect_timeout = 1s; + # Default: 1s + #connect_timeout = 1s; # results_timeout - timeout in miliseconds for waiting for spamd response - # Default: 20s - results_timeout = 20s; + # Default: 20s + #results_timeout = 20s; # error_time - time in seconds during which we are counting errors - # Default: 10 - error_time = 10; + # Default: 10 + #error_time = 10; # dead_time - time in seconds during which we are thinking that server is down - # Default: 300 - dead_time = 300; + # Default: 300 + #dead_time = 300; - # maxerrors - maximum number of errors that can occur during error_time to make us thinking that + # maxerrors - maximum number of errors that can occur during error_time to make Rmilter thinking that # this upstream is dead - # Default: 10 - maxerrors = 10; + # Default: 10 + #maxerrors = 10; - # reject_message - reject message for spam - # Default: "Spam message rejected; If this is not spam contact abuse team" - reject_message = "Spam message rejected; If this is not spam contact abuse at example.com"; + # reject_message - reject message for spam (quoted string) + # Default: "Spam message rejected; If this is not spam contact abuse team" + #reject_message = "Spam message rejected; If this is not spam contact abuse at example.com"; # whitelist - list of ips or nets that should be not checked with spamd - # Default: empty + # Default: empty whitelist = 127.0.0.1/32, [::1]/128, 192.168.0.0/16; # rspamd_metric - metric for using with rspamd - # Default: "default" - rspamd_metric = "default"; + # Default: "default" + #rspamd_metric = "default"; + + # spamd_soft_fail - if action is not reject use it for other actions (flag) + # Default: true + #spamd_soft_fail = yes; + + # spamd_greylist - greylist message only if action is greylist (flag) + # Default: true + #spamd_greylist = yes; - spamd_soft_fail = yes; - spamd_greylist = yes; - # For using in the personal email + # extended_spam_headers - add extended spamd headers to messages (flag) + # Default: false #extended_spam_headers = yes; }; @@ -123,74 +164,43 @@ redis { # copy_probability = 0.5; # id_prefix - prefix for extracting message ids from redis - # Default: empty (no prefix is prepended to key) + # Default: empty (no prefix is prepended to key) id_prefix = "message_id."; # grey_prefix - prefix for extracting greylisted records from redis - # Default: empty (no prefix is prepended to key) + # Default: empty (no prefix is prepended to key) grey_prefix = "grey."; # white_prefix - prefix for extracting whitelisted records from redis - # Default: empty (no prefix is prepended to key) + # Default: empty (no prefix is prepended to key) white_prefix = "white."; # spam_channel - redis pub/sub channel to send spam messages to - # Default: empty + # Default: empty # spam_channel = "spam"; # copy_channel - redis pub/sub channel to send messages copies to - # Default: empty + # Default: empty # copy_channel = "copy"; # connect_timeout - timeout in miliseconds for waiting for redis - # Default: 1s - connect_timeout = 1s; + # Default: 1s + #connect_timeout = 1s; # error_time - time in seconds during which we are counting errors - # Default: 10 - error_time = 10; + # Default: 10 + #error_time = 10; # dead_time - time in seconds during which we are thinking that server is down - # Default: 300 - dead_time = 300; + # Default: 300 + #dead_time = 300; # maxerrors - maximum number of errors that can occur during error_time to make us thinking that # this upstream is dead - # Default: 10 - maxerrors = 10; + # Default: 10 + #maxerrors = 10; }; -# bind_socket - socket credits for local bind: -# unix:/path/to/file - bind to local socket -# inet:port@host - bind to inet socket -# Default: bind_socket = unix:/var/tmp/rmilter.sock; - -bind_socket = unix:/var/run/rmilter/rmilter.sock; - -# tempdir - path to directory that contains temporary files -# Default: $TMPDIR - -tempdir = /tmp; - -# max_size - maximum size of scanned mail with clamav and dcc -# Default: 0 (no limit) - -max_size = 10M; - -# strict_auth - strict checks for mails from authenticated senders -# Default: no - -strict_auth = no; - -# spf_domains - path to file that contains hash of spf domains -# Default: empty - -#spf_domains = example.com, mail.ru; - -# use_dcc - whether use or not dcc system -# Default: no - -#use_dcc = yes; # rule definition: # rule { @@ -205,6 +215,10 @@ strict_auth = no; # limits section limits { + # enable: enable or disable rate limits (binary flag) + # Default: true + #enable = yes; + # Whitelisted ip or networks #limit_whitelist = 194.67.45.4/32; # Whitelisted recipients @@ -224,14 +238,28 @@ limits { }; greylisting { - timeout = 300s; + # enable - enable or disable greylisting (binary flag) + # Default: true + #enable = yes; + + # timeout - time during which we mark message greylisted + # Default: 300s + #timeout = 300s; + + # expire - time during which we save a greylisting record + # Default: 1d expire = 3d; - whitelist = 127.0.0.1, - 192.168.1.1, - 192.168.2.0/24; + + # whitelist - list of ip addresses or networks that should be whitelisted from greylisting + # Default: empty + whitelist = 127.0.0.1/32, [::1]/128, 192.168.0.0/16; }; dkim { + # enable - enable or disable DKIM signing (binary flag) + # Default: true + #enable = yes; + # Sample for dkim specific keys # domain { # key = /etc/dkim/dkim_example.key; @@ -243,8 +271,9 @@ dkim { # domain = "test.com"; # selector = "dkim"; # }; + # Universal selector, keys will be checked for pattern /etc/dkim/..key - domain { + domain { key = /usr/local/etc/dkim; domain = "*"; selector = "dkim"; diff --git a/rmilter.socket b/rmilter.socket deleted file mode 100644 index a191214..0000000 --- a/rmilter.socket +++ /dev/null @@ -1,11 +0,0 @@ -[Unit] -Description=Another sendmail milter for different mail checks - -[Socket] -ListenStream=/var/run/rmilter/rmilter.sock -SocketUser=_rmilter -SocketGroup=adm -SocketMode=660 - -[Install] -WantedBy=sockets.target diff --git a/src/cfg_file.c b/src/cfg_file.c index e1eca08..c19c3af 100644 --- a/src/cfg_file.c +++ b/src/cfg_file.c @@ -27,7 +27,6 @@ #include #include "config.h" -#include "pcre.h" #include "cfg_file.h" #include "rmilter.h" @@ -291,84 +290,9 @@ int add_spamd_server(struct config_file *cf, char *str, int is_extra) return 1; } -struct action * -create_action(enum action_type type, const char *message) +int add_ip_radix (radix_compressed_t **tree, char *ipnet) { - struct action *new; - size_t len = strlen (message); - - if (message == NULL) - return NULL; - - new = (struct action *) malloc (sizeof(struct action)); - - if (new == NULL) - return NULL; - - new->type = type; - - new->message = (char *) malloc (len + 1); - - if (new->message == NULL) - return NULL; - - rmilter_strlcpy (new->message, message, len + 1); - - return new; -} - -struct condition * -create_cond(enum condition_type type, const char *arg1, const char *arg2) -{ - struct condition *new; - int offset; - const char *read_err; - - new = (struct condition *) malloc (sizeof(struct condition)); - bzero (new, sizeof(struct condition)); - - if (new == NULL) - return NULL; - - if (arg1 == NULL || *arg1 == '\0') { - new->args[0].empty = 1; - } - else { - if (!copy_regexp (&new->args[0].src, arg1)) { - new->args[0].empty = 1; - } - else { - new->args[0].re = pcre_compile (new->args[0].src, 0, &read_err, - &offset, NULL); - if (new->args[0].re == NULL) { - new->args[0].empty = 1; - } - } - } - if (arg2 == NULL || *arg2 == '\0') { - new->args[1].empty = 1; - } - else { - if (!copy_regexp (&new->args[1].src, arg2)) { - new->args[1].empty = 1; - } - else { - new->args[1].re = pcre_compile (new->args[1].src, 0, &read_err, - &offset, NULL); - if (new->args[1].re == NULL) { - new->args[1].empty = 1; - } - } - } - - new->type = type; - - return new; -} - -int add_ip_radix (radix_compressed_t *tree, char *ipnet) -{ - if (!radix_add_generic_iplist (ipnet, &tree)) { + if (!radix_add_generic_iplist (ipnet, tree, true)) { yyerror ("add_ip_radix: cannot insert ip to tree: %s", ipnet); return 0; @@ -393,11 +317,8 @@ void init_defaults(struct config_file *cfg) { memset (cfg, 0, sizeof (*cfg)); - LIST_INIT(&cfg->rules); cfg->wlist_rcpt_global = NULL; cfg->wlist_rcpt_limit = NULL; - LIST_INIT(&cfg->bounce_addrs); - cfg->clamav_connect_timeout = DEFAULT_CLAMAV_CONNECT_TIMEOUT; cfg->clamav_port_timeout = DEFAULT_CLAMAV_PORT_TIMEOUT; cfg->clamav_results_timeout = DEFAULT_CLAMAV_RESULTS_TIMEOUT; @@ -447,8 +368,11 @@ void init_defaults(struct config_file *cfg) cfg->spamd_soft_fail = 1; cfg->spamd_greylist = 1; + cfg->greylisting_enable = 1; + cfg->ratelimit_enable = 1; cfg->dkim_auth_only = 1; + cfg->dkim_enable = 1; cfg->pid_file = NULL; cfg->tempfiles_mode = 00600; @@ -521,37 +445,7 @@ void free_config(struct config_file *cfg) for (i = 0; i < cfg->spamd_servers_num; i++) { free (cfg->spamd_servers[i].name); } - /* Free rules list */ - LIST_FOREACH_SAFE (cur, &cfg->rules, next, tmp_rule) - { - LIST_FOREACH_SAFE (cond, cur->conditions, next, tmp_cond) - { - if (!cond->args[0].empty) { - if (cond->args[0].re != NULL) { - pcre_free (cond->args[0].re); - } - if (cond->args[0].src != NULL) { - free (cond->args[0].src); - } - } - if (!cond->args[1].empty) { - if (cond->args[1].re != NULL) { - pcre_free (cond->args[1].re); - } - if (cond->args[1].src != NULL) { - free (cond->args[1].src); - } - } - LIST_REMOVE(cond, next); - free (cond); - } - if (cur->act->message) { - free (cur->act->message); - } - free (cur->act); - LIST_REMOVE(cur, next); - free (cur); - } + /* Free whitelists and bounce list*/ HASH_ITER (hh, cfg->wlist_rcpt_global, rcpt_cur, rcpt_tmp) { HASH_DEL (cfg->wlist_rcpt_global, rcpt_cur); @@ -563,11 +457,9 @@ void free_config(struct config_file *cfg) free (rcpt_cur->rcpt); free (rcpt_cur); } - LIST_FOREACH_SAFE (addr_cur, &cfg->bounce_addrs, next, addr_tmp) { - if (addr_cur->addr) { - free (addr_cur->addr); - } - LIST_REMOVE (addr_cur, next); + HASH_ITER (hh, cfg->bounce_addrs, addr_cur, addr_tmp) { + HASH_DEL (cfg->bounce_addrs, addr_cur); + free (addr_cur->addr); free (addr_cur); } @@ -648,7 +540,8 @@ void free_config(struct config_file *cfg) } #endif } -void add_rcpt_whitelist(struct config_file *cfg, const char *rcpt, +void +add_rcpt_whitelist (struct config_file *cfg, const char *rcpt, int is_global) { struct whitelisted_rcpt_entry *t; @@ -674,7 +567,29 @@ void add_rcpt_whitelist(struct config_file *cfg, const char *rcpt, } } -int is_whitelisted_rcpt(struct config_file *cfg, const char *str, int is_global) +void +clear_rcpt_whitelist (struct config_file *cfg, bool is_global) +{ + struct whitelisted_rcpt_entry *t, *tmp; + + if (is_global) { + HASH_ITER (hh, cfg->wlist_rcpt_global, t, tmp) { + HASH_DEL (cfg->wlist_rcpt_global, t); + free (t->rcpt); + free (t); + } + } + else { + HASH_ITER (hh, cfg->wlist_rcpt_limit, t, tmp) { + HASH_DEL (cfg->wlist_rcpt_limit, t); + free (t->rcpt); + free (t); + } + } +} + +int +is_whitelisted_rcpt (struct config_file *cfg, const char *str, int is_global) { int len; struct whitelisted_rcpt_entry *entry, *list; diff --git a/src/cfg_file.h b/src/cfg_file.h index 2e9dfcb..9283a3f 100644 --- a/src/cfg_file.h +++ b/src/cfg_file.h @@ -92,25 +92,6 @@ extern pthread_rwlock_t cfg_mtx; -enum { VAL_UNDEF=0, VAL_TRUE, VAL_FALSE }; -enum condition_type { - COND_CONNECT = 0, - COND_HELO, - COND_ENVFROM, - COND_ENVRCPT, - COND_HEADER, - COND_BODY, - COND_MAX -}; - -enum action_type { - ACTION_REJECT, - ACTION_TEMPFAIL, - ACTION_QUARANTINE, - ACTION_DISCARD, - ACTION_ACCEPT -}; - enum spamd_type { SPAMD_RSPAMD = 0 }; @@ -120,29 +101,6 @@ typedef struct bucket_s { double rate; } bucket_t; -struct action { - enum action_type type; - char *message; -}; - -struct condition { - struct cond_arg { - char *src; - int empty; - int not; - pcre *re; - } args[2]; - enum condition_type type; - LIST_ENTRY (condition) next; -}; - -struct rule { - LIST_HEAD (condl, condition) *conditions; - struct action *act; - uint8_t flags; - LIST_ENTRY (rule) next; -}; - struct clamav_server { struct upstream up; int port; @@ -169,15 +127,10 @@ struct beanstalk_server { char *name; }; -struct ip_list_entry { - struct in_addr addr; - LIST_ENTRY (ip_list_entry) next; -}; - struct addr_list_entry { char *addr; size_t len; - LIST_ENTRY (addr_list_entry) next; + UT_hash_handle hh; }; struct dkim_hash_entry { @@ -291,8 +244,10 @@ struct config_file { unsigned use_dcc:1; unsigned strict_auth:1; unsigned weighted_clamav:1; - - LIST_HEAD (ruleset, rule) rules; + unsigned greylisting_enable:1; + unsigned ratelimit_enable:1; + unsigned dkim_enable:1; + unsigned compression_enable:1; /* limits section */ bucket_t limit_to; @@ -303,7 +258,7 @@ struct config_file { struct whitelisted_rcpt_entry *wlist_rcpt_limit; struct whitelisted_rcpt_entry *wlist_rcpt_global; - LIST_HEAD (bounceaddrset, addr_list_entry) bounce_addrs; + struct addr_list_entry *bounce_addrs; unsigned int greylisting_timeout; unsigned int greylisting_expire; @@ -336,13 +291,12 @@ struct config_file { int add_cache_server (struct config_file *cf, char *str, char *str2, int type); int add_clamav_server (struct config_file *cf, char *str); int add_spamd_server (struct config_file *cf, char *str, int is_extra); -struct action * create_action (enum action_type type, const char *message); -struct condition * create_cond (enum condition_type type, const char *arg1, const char *arg2); void init_defaults (struct config_file *cfg); void free_config (struct config_file *cfg); -int add_ip_radix (radix_compressed_t *tree, char *ipnet); +int add_ip_radix (radix_compressed_t **tree, char *ipnet); void add_rcpt_whitelist (struct config_file *cfg, const char *rcpt, int is_global); int is_whitelisted_rcpt (struct config_file *cfg, const char *str, int is_global); +void clear_rcpt_whitelist (struct config_file *cfg, bool is_global); char *trim_quotes (char *in); int yylex (void); diff --git a/src/cfg_file.l b/src/cfg_file.l index 95ce1f3..88e2ee6 100644 --- a/src/cfg_file.l +++ b/src/cfg_file.l @@ -133,6 +133,13 @@ parse_flag (const char *str) return 0; } + if (strcmp (str, "on") == 0 || strcmp (str, "ON") == 0) { + return 1; + } + if (strcmp (str, "off") == 0 || strcmp (str, "OFF") == 0) { + return 0; + } + return -1; } @@ -231,7 +238,7 @@ ls32 {h16}:{h16}|{IPv4address} IPv6address ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?:: IPaddress {IPv4address}|\[{IPv6address}\] IPnetwork {IPaddress}\/[0-9]{1,3} -flag yes|YES|no|NO|true|TRUE|false|FALSE +flag yes|YES|no|NO|true|TRUE|false|FALSE|on|ON|off|OFF %% %{ /* Rules section */ @@ -244,7 +251,6 @@ tempfiles_mode return TEMPFILES_MODE; pidfile return PIDFILE; strict_auth return STRICT_AUTH; check_auth return STRICT_AUTH; -rule return RULE; clamav return CLAMAV; spamd return SPAMD; also_check return ALSO_CHECK; @@ -294,6 +300,7 @@ password return PASSWORD; dbname return DBNAME; spam_channel return SPAM_CHANNEL; copy_channel return COPY_CHANNEL; +enable return ENABLE; dkim return DKIM_SECTION; key return DKIM_KEY; @@ -332,20 +339,8 @@ limit_whitelist_rcpt return LIMIT_WHITELIST_RCPT; limit_bounce_addrs return LIMIT_BOUNCE_ADDRS; limit_bounce_to return LIMIT_BOUNCE_TO; limit_bounce_to_ip return LIMIT_BOUNCE_TO_IP; - -accept return ACCEPT; -body return BODY; -connect return CONNECT; -discard return DISCARD; -envfrom return ENVFROM; -envrcpt return ENVRCPT; -header return HEADER; -helo return HELO; -not return NOT; -quarantine return QUARANTINE; -reject return REJECTL; -tempfail return TEMPFAIL; our_networks return OUR_NETWORKS; +compression return COMPRESSION; \" return QUOTE; @@ -354,6 +349,7 @@ our_networks return OUR_NETWORKS; ; return SEMICOLON; , return COMMA; = return EQSIGN; +\+= return EQPLUS; {flag} yylval.flag=parse_flag(yytext); return FLAG; \n /* ignore EOL */; [ \t]+ /* ignore whitespace */; diff --git a/src/cfg_file.y b/src/cfg_file.y index e45a815..1019f90 100644 --- a/src/cfg_file.y +++ b/src/cfg_file.y @@ -26,7 +26,6 @@ %{ -#include "pcre.h" #include "cfg_file.h" #define YYDEBUG 1 @@ -43,8 +42,6 @@ uint8_t cur_flags = 0; %union { char *string; - struct condition *cond; - struct action *action; size_t limit; bucket_t bucket; char flag; @@ -54,10 +51,7 @@ uint8_t cur_flags = 0; } %token ERROR STRING QUOTEDSTRING FLAG FLOAT -%token ACCEPT REJECTL TEMPFAIL DISCARD QUARANTINE -%token CONNECT HELO ENVFROM ENVRCPT HEADER MACRO BODY -%token AND OR NOT -%token TEMPDIR LOGFILE PIDFILE RULE CLAMAV SERVERS ERROR_TIME DEAD_TIME MAXERRORS CONNECT_TIMEOUT PORT_TIMEOUT RESULTS_TIMEOUT SPF DCC +%token TEMPDIR LOGFILE PIDFILE CLAMAV SERVERS ERROR_TIME DEAD_TIME MAXERRORS CONNECT_TIMEOUT PORT_TIMEOUT RESULTS_TIMEOUT SPF DCC %token FILENAME REGEXP QUOTE SEMICOLON OBRACE EBRACE COMMA EQSIGN %token BINDSOCK SOCKCRED DOMAIN_STR IPADDR IPNETWORK HOSTPORT NUMBER GREYLISTING WHITELIST TIMEOUT EXPIRE EXPIRE_WHITE %token MAXSIZE SIZELIMIT SECONDS BUCKET USEDCC MEMCACHED PROTOCOL SERVERS_WHITE SERVERS_LIMITS SERVERS_GREY SERVERS_COPY SERVERS_SPAM @@ -70,7 +64,7 @@ uint8_t cur_flags = 0; %token SEND_BEANSTALK_SPAM_EXTRA_DIFF DKIM_FOLD_HEADER SPAMD_RETRY_COUNT SPAMD_RETRY_TIMEOUT SPAMD_TEMPFAIL %token SPAMD_NEVER_REJECT TEMPFILES_MODE USE_REDIS REDIS DKIM_SIGN_NETWORKS OUR_NETWORKS SPAM_BAR_CHAR %token SPAM_NO_AUTH_HEADER PASSWORD DBNAME SPAMD_SETTINGS_ID SPAMD_SPAM_ADD_HEADER -%token COPY_FULL COPY_CHANNEL SPAM_CHANNEL +%token COPY_FULL COPY_CHANNEL SPAM_CHANNEL ENABLE EQPLUS COMPRESSION %type STRING %type QUOTEDSTRING @@ -79,20 +73,20 @@ uint8_t cur_flags = 0; %type SOCKCRED %type IPADDR IPNETWORK %type HOSTPORT -%type ip_net cache_hosts clamav_addr spamd_addr -%type expr_l expr term -%type action +%type ip_net cache_hosts clamav_addr spamd_addr bounce_addr %type DOMAIN_STR %type SIZELIMIT %type FLAG %type BUCKET; %type SECONDS; %type NUMBER; -%type FLOAT; +%type FLOAT; +%type prob_num; %% -input : /* empty */ - | command separator input +input: + empty + | command separator input ; separator: @@ -101,6 +95,7 @@ separator: ; empty: + /* %empty */ ; command : @@ -108,7 +103,6 @@ command : | tempfiles_mode | strictauth | pidfile - | rule | clamav | spamd | spf @@ -183,169 +177,9 @@ strictauth: } ; -rule : - RULE OBRACE rulebody EBRACE - ; - -rulebody : - action SEMICOLON expr_l { - struct rule *cur_rule; - cur_rule = (struct rule *) malloc (sizeof (struct rule)); - if (cur_rule == NULL) { - yyerror ("yyparse: malloc: %s", strerror (errno)); - YYERROR; - } - - cur_rule->act = $1; - cur_rule->conditions = cur_conditions; - cur_rule->flags = cur_flags; - cur_flags = 0; - LIST_INSERT_HEAD (&cfg->rules, cur_rule, next); - } - ; - -action : - REJECTL QUOTEDSTRING { - $$ = create_action(ACTION_REJECT, $2); - if ($$ == NULL) { - yyerror ("yyparse: create_action"); - YYERROR; - } - free($2); - } - | TEMPFAIL QUOTEDSTRING { - $$ = create_action(ACTION_TEMPFAIL, $2); - if ($$ == NULL) { - yyerror ("yyparse: create_action"); - YYERROR; - } - free($2); - } - | QUARANTINE QUOTEDSTRING { - $$ = create_action(ACTION_QUARANTINE, $2); - if ($$ == NULL) { - yyerror ("yyparse: create_action"); - YYERROR; - } - free($2); - } - | DISCARD { - $$ = create_action(ACTION_DISCARD, ""); - if ($$ == NULL) { - yyerror ("yyparse: create_action"); - YYERROR; - } - } - | ACCEPT { - $$ = create_action(ACTION_ACCEPT, ""); - if ($$ == NULL) { - yyerror ("yyparse: create_action"); - YYERROR; - } - } - ; - -expr_l : - expr SEMICOLON { - cur_conditions = (struct condl *)malloc (sizeof (struct condl)); - if (cur_conditions == NULL) { - yyerror ("yyparse: malloc: %s", strerror (errno)); - YYERROR; - } - LIST_INIT (cur_conditions); - $$ = $1; - if ($$ == NULL) { - yyerror ("yyparse: malloc: %s", strerror(errno)); - YYERROR; - } - LIST_INSERT_HEAD (cur_conditions, $$, next); - } - | expr_l expr SEMICOLON { - $$ = $2; - if ($$ == NULL) { - yyerror ("yyparse: malloc: %s", strerror(errno)); - YYERROR; - } - LIST_INSERT_HEAD (cur_conditions, $$, next); - } - ; - -expr : - term { - $$ = $1; - } - | NOT term { - struct condition *tmp; - tmp = $2; - if (tmp != NULL) { - tmp->args[0].not = 1; - tmp->args[1].not = 1; - } - $$ = tmp; - } - ; - -term : - CONNECT REGEXP REGEXP { - $$ = create_cond(COND_CONNECT, $2, $3); - if ($$ == NULL) { - yyerror ("yyparse: malloc: %s", strerror(errno)); - YYERROR; - } - cur_flags |= COND_CONNECT_FLAG; - free($2); - free($3); - } - | HELO REGEXP { - $$ = create_cond(COND_HELO, $2, NULL); - if ($$ == NULL) { - yyerror ("yyparse: malloc: %s", strerror(errno)); - YYERROR; - } - cur_flags |= COND_HELO_FLAG; - free($2); - } - | ENVFROM REGEXP { - $$ = create_cond(COND_ENVFROM, $2, NULL); - if ($$ == NULL) { - yyerror ("yyparse: malloc: %s", strerror(errno)); - YYERROR; - } - cur_flags |= COND_ENVFROM_FLAG; - free($2); - } - | ENVRCPT REGEXP { - $$ = create_cond(COND_ENVRCPT, $2, NULL); - if ($$ == NULL) { - yyerror ("yyparse: malloc: %s", strerror(errno)); - YYERROR; - } - cur_flags |= COND_ENVRCPT_FLAG; - free($2); - } - | HEADER REGEXP REGEXP { - $$ = create_cond(COND_HEADER, $2, $3); - if ($$ == NULL) { - yyerror ("yyparse: malloc: %s", strerror(errno)); - YYERROR; - } - cur_flags |= COND_HEADER_FLAG; - free($2); - free($3); - } - | BODY REGEXP { - $$ = create_cond(COND_BODY, $2, NULL); - if ($$ == NULL) { - yyerror ("yyparse: malloc: %s", strerror(errno)); - YYERROR; - } - cur_flags |= COND_BODY_FLAG; - free($2); - } - ; - clamav: CLAMAV OBRACE clamavbody EBRACE + | CLAMAV OBRACE empty EBRACE ; clamavbody: @@ -368,11 +202,13 @@ clamav_servers: SERVERS EQSIGN { cfg->clamav_servers_num = 0; } clamav_server + | SERVERS EQPLUS clamav_server ; clamav_server: clamav_params | clamav_server COMMA clamav_params + | empty ; clamav_params: @@ -445,7 +281,7 @@ clamav_ip_list: clamav_ip: ip_net { - if (add_ip_radix (cfg->clamav_whitelist, $1) == 0) { + if (add_ip_radix (&cfg->clamav_whitelist, $1) == 0) { YYERROR; } } @@ -453,6 +289,7 @@ clamav_ip: spamd: SPAMD OBRACE spamdbody EBRACE + | SPAMD OBRACE empty EBRACE ; spamdbody: @@ -489,6 +326,7 @@ spamdcmd: | spam_bar_char | spam_no_auth_header | spamd_settings_id + | spamd_compression ; diff_dir : @@ -504,6 +342,10 @@ diff_dir : YYERROR; } + if (cfg->diff_dir) { + free (cfg->diff_dir); + } + cfg->diff_dir = $3; } ; @@ -520,6 +362,10 @@ symbols_dir: YYERROR; } + if (cfg->symbols_dir) { + free (cfg->symbols_dir); + } + cfg->symbols_dir = $3; } ; @@ -537,11 +383,13 @@ spamd_servers: cfg->spamd_servers_num = 0; } spamd_server + | SERVERS EQPLUS spamd_server ; spamd_server: spamd_params | spamd_server COMMA spamd_params + | empty ; spamd_params: @@ -625,17 +473,24 @@ spamd_reject_message: } ; spamd_whitelist: - WHITELIST EQSIGN spamd_ip_list + WHITELIST EQSIGN { + if (cfg->spamd_whitelist) { + radix_destroy_compressed (cfg->spamd_whitelist); + cfg->spamd_whitelist = NULL; + } + } spamd_ip_list + | WHITELIST EQPLUS spamd_ip_list ; spamd_ip_list: spamd_ip | spamd_ip_list COMMA spamd_ip + | empty ; spamd_ip: ip_net { - if (add_ip_radix (cfg->spamd_whitelist, $1) == 0) { + if (add_ip_radix (&cfg->spamd_whitelist, $1) == 0) { YYERROR; } } @@ -650,44 +505,31 @@ spamd_rspamd_metric: spamd_soft_fail: SPAMD_SOFT_FAIL EQSIGN FLAG { - if ($3) { - cfg->spamd_soft_fail = 1; - } + cfg->spamd_soft_fail = $3; } ; spamd_never_reject: SPAMD_NEVER_REJECT EQSIGN FLAG { - if ($3) { - cfg->spamd_never_reject = 1; - } + cfg->spamd_never_reject = $3; } ; spamd_spam_add_header: SPAMD_SPAM_ADD_HEADER EQSIGN FLAG { - if ($3) { - cfg->spamd_spam_add_header = 1; - } - else { - cfg->spamd_spam_add_header = 0; - } + cfg->spamd_spam_add_header = $3; } ; extended_spam_headers: EXTENDED_SPAM_HEADERS EQSIGN FLAG { - if ($3) { - cfg->extended_spam_headers = 1; - } + cfg->extended_spam_headers = $3; } ; spamd_greylist: SPAMD_GREYLIST EQSIGN FLAG { - if ($3) { - cfg->spamd_greylist = 1; - } + cfg->spamd_greylist = $3; } ; @@ -741,9 +583,7 @@ spam_bar_char: ; spam_no_auth_header: SPAM_NO_AUTH_HEADER EQSIGN FLAG { - if ($3) { - cfg->spam_no_auth_header = 1; - } + cfg->spam_no_auth_header = $3; } ; @@ -754,6 +594,12 @@ spamd_settings_id: } ; +spamd_compression: + COMPRESSION EQSIGN FLAG { + cfg->compression_enable = $3; + } + ; + spf: SPF EQSIGN spf_params ; @@ -794,16 +640,13 @@ maxsize: ; usedcc: USEDCC EQSIGN FLAG { - if ($3 == -1) { - yyerror ("yyparse: parse flag"); - YYERROR; - } cfg->use_dcc = $3; } ; greylisting: GREYLISTING OBRACE greylistingbody EBRACE + | GREYLISTING OBRACE empty EBRACE ; greylistingbody: @@ -817,6 +660,7 @@ greylistingcmd: | greylisting_expire | greylisting_whitelist_expire | greylisted_message + | greylisting_enable ; greylisting_timeout: @@ -841,7 +685,14 @@ greylisting_whitelist_expire: ; greylisting_whitelist: - WHITELIST EQSIGN greylisting_ip_list + WHITELIST EQSIGN { + if (cfg->grey_whitelist_tree) { + radix_destroy_compressed (cfg->grey_whitelist_tree); + cfg->grey_whitelist_tree = NULL; + } + } + greylisting_ip_list + | WHITELIST EQPLUS greylisting_ip_list ; greylisting_ip_list: @@ -851,7 +702,7 @@ greylisting_ip_list: greylisting_ip: ip_net { - if (add_ip_radix (cfg->grey_whitelist_tree, $1) == 0) { + if (add_ip_radix (&cfg->grey_whitelist_tree, $1) == 0) { YYERROR; } } @@ -864,6 +715,12 @@ greylisted_message: } ; +greylisting_enable: + ENABLE EQSIGN FLAG { + cfg->greylisting_enable = $3; + } + ; + ip_net: IPADDR | IPNETWORK @@ -871,13 +728,17 @@ ip_net: ; cache: - MEMCACHED OBRACE cachebody EBRACE {} - | REDIS OBRACE cachebody EBRACE { cfg->cache_use_redis = 1; } + MEMCACHED OBRACE cachebody EBRACE + | REDIS { cfg->cache_use_redis = 1; } OBRACE cachebody EBRACE ; cachebody: + cahcebody_commands + | empty + ; +cahcebody_commands: cachecmd SEMICOLON - | cachebody cachecmd SEMICOLON + | cahcebody_commands cachecmd SEMICOLON ; cachecmd: @@ -908,11 +769,13 @@ cache_grey_servers: cfg->cache_servers_grey_num = 0; } cache_grey_server + | SERVERS_GREY EQPLUS cache_grey_server ; cache_grey_server: cache_grey_params | cache_grey_server COMMA cache_grey_params + | empty ; cache_grey_params: @@ -939,6 +802,7 @@ cache_white_servers: cfg->cache_servers_white_num = 0; } cache_white_server + | SERVERS_WHITE EQPLUS cache_white_server ; cache_white_server: @@ -970,11 +834,13 @@ cache_limits_servers: cfg->cache_servers_limits_num = 0; } cache_limits_server + | SERVERS_LIMITS EQPLUS cache_limits_server ; cache_limits_server: cache_limits_params | cache_limits_server COMMA cache_limits_params + | empty ; cache_limits_params: @@ -993,11 +859,13 @@ cache_id_servers: cfg->cache_servers_id_num = 0; } cache_id_server + | SERVERS_ID EQPLUS cache_id_server ; cache_id_server: cache_id_params | cache_id_server COMMA cache_id_params + | empty ; cache_id_params: @@ -1016,11 +884,13 @@ cache_copy_servers: cfg->cache_servers_copy_num = 0; } cache_copy_server + | SERVERS_COPY EQPLUS cache_copy_server ; cache_copy_server: cache_copy_params | cache_copy_server COMMA cache_copy_params + | empty ; cache_copy_params: @@ -1039,11 +909,13 @@ cache_spam_servers: cfg->cache_servers_spam_num = 0; } cache_spam_server + | SERVERS_SPAM EQPLUS cache_spam_server ; cache_spam_server: cache_spam_params | cache_spam_server COMMA cache_spam_params + | empty ; cache_spam_params: @@ -1132,10 +1004,25 @@ cache_copy_channel: ; cache_copy_probability: - COPY_PROBABILITY EQSIGN NUMBER { - cfg->cache_copy_prob = $3; + COPY_PROBABILITY EQSIGN prob_num { + if ($3 < 0) { + YYERROR; + } + if ($3 > 1.0) { + if ($3 > 100.0) { + YYERROR; + } + cfg->cache_copy_prob = $3 / 100.0; + } + else { + cfg->cache_copy_prob = $3; + } } ; +prob_num: + NUMBER { $$ = (double)$1; } + | FLOAT + ; cache_spam_channel: SPAM_CHANNEL EQSIGN QUOTEDSTRING { free (cfg->cache_spam_channel); @@ -1145,6 +1032,7 @@ cache_spam_channel: limits: LIMITS OBRACE limitsbody EBRACE + | LIMITS OBRACE empty EBRACE ; limitsbody: @@ -1160,6 +1048,7 @@ limitcmd: | limit_bounce_addrs | limit_bounce_to | limit_bounce_to_ip + | limit_enable ; limit_to: @@ -1181,23 +1070,34 @@ limit_to_ip_from: } ; limit_whitelist: - LIMIT_WHITELIST EQSIGN whitelist_ip_list + LIMIT_WHITELIST EQSIGN { + if (cfg->limit_whitelist_tree) { + radix_destroy_compressed (cfg->limit_whitelist_tree); + cfg->limit_whitelist_tree = NULL; + } + } + whitelist_ip_list + | LIMIT_WHITELIST EQPLUS whitelist_ip_list ; whitelist_ip_list: ip_net { - if (add_ip_radix (cfg->limit_whitelist_tree, $1) == 0) { + if (add_ip_radix (&cfg->limit_whitelist_tree, $1) == 0) { YYERROR; } } | whitelist_ip_list COMMA ip_net { - if (add_ip_radix (cfg->limit_whitelist_tree, $3) == 0) { + if (add_ip_radix (&cfg->limit_whitelist_tree, $3) == 0) { YYERROR; } } + | empty ; limit_whitelist_rcpt: - LIMIT_WHITELIST_RCPT EQSIGN whitelist_rcpt_list + LIMIT_WHITELIST_RCPT EQSIGN { + clear_rcpt_whitelist (cfg, false); + } whitelist_rcpt_list + | LIMIT_WHITELIST_RCPT EQPLUS whitelist_rcpt_list ; whitelist_rcpt_list: STRING { @@ -1212,42 +1112,42 @@ whitelist_rcpt_list: | whitelist_rcpt_list COMMA QUOTEDSTRING { add_rcpt_whitelist (cfg, $3, 0); } + | empty ; limit_bounce_addrs: - LIMIT_BOUNCE_ADDRS EQSIGN bounce_addr_list + LIMIT_BOUNCE_ADDRS EQSIGN { + struct addr_list_entry *t, *tmp; + + HASH_ITER (hh, cfg->bounce_addrs, t, tmp) { + HASH_DEL (cfg->bounce_addrs, t); + free (t->addr); + free (t); + } + } bounce_addr_list + | LIMIT_BOUNCE_ADDRS EQPLUS bounce_addr_list ; bounce_addr_list: - STRING { + bounce_addr { struct addr_list_entry *t; - t = (struct addr_list_entry *)malloc (sizeof (struct addr_list_entry)); + t = calloc (1, sizeof (struct addr_list_entry)); t->addr = strdup ($1); t->len = strlen (t->addr); - LIST_INSERT_HEAD (&cfg->bounce_addrs, t, next); + HASH_ADD_KEYPTR(hh, cfg->bounce_addrs, t->addr, t->len, t); } - | bounce_addr_list COMMA STRING { + | bounce_addr_list COMMA bounce_addr { struct addr_list_entry *t; - t = (struct addr_list_entry *)malloc (sizeof (struct addr_list_entry)); + t = calloc (1, sizeof (struct addr_list_entry)); t->addr = strdup ($3); t->len = strlen (t->addr); - LIST_INSERT_HEAD (&cfg->bounce_addrs, t, next); - } - | QUOTEDSTRING { - struct addr_list_entry *t; - t = (struct addr_list_entry *)malloc (sizeof (struct addr_list_entry)); - t->addr = strdup ($1); - t->len = strlen (t->addr); - LIST_INSERT_HEAD (&cfg->bounce_addrs, t, next); - } - | bounce_addr_list COMMA QUOTEDSTRING { - struct addr_list_entry *t; - t = (struct addr_list_entry *)malloc (sizeof (struct addr_list_entry)); - t->addr = strdup ($3); - t->len = strlen (t->addr); - LIST_INSERT_HEAD (&cfg->bounce_addrs, t, next); + HASH_ADD_KEYPTR(hh, cfg->bounce_addrs, t->addr, t->len, t); } + | empty ; +bounce_addr: + STRING + | QUOTEDSTRING limit_bounce_to: LIMIT_BOUNCE_TO EQSIGN BUCKET { @@ -1263,9 +1163,17 @@ limit_bounce_to_ip: } ; +limit_enable: + ENABLE EQSIGN FLAG { + cfg->ratelimit_enable = $3; + } + ; whitelist: - WHITELIST EQSIGN whitelist_list + WHITELIST EQSIGN { + clear_rcpt_whitelist (cfg, true); + } whitelist_list + | WHITELIST EQPLUS whitelist_list ; whitelist_list: STRING { @@ -1280,11 +1188,13 @@ whitelist_list: | whitelist_list COMMA QUOTEDSTRING { add_rcpt_whitelist (cfg, $3, 1); } + | empty ; dkim: DKIM_SECTION OBRACE dkimbody EBRACE + | DKIM_SECTION OBRACE empty EBRACE ; dkimbody: @@ -1301,6 +1211,7 @@ dkimcmd: | dkim_auth_only | dkim_fold_header | dkim_sign_networks + | dkim_enable ; dkim_domain: @@ -1449,39 +1360,42 @@ dkim_sign_alg: dkim_auth_only: DKIM_AUTH_ONLY EQSIGN FLAG { - if ($3) { - cfg->dkim_auth_only = 1; - } - else { - cfg->dkim_auth_only = 0; - } + cfg->dkim_auth_only = $3; } ; dkim_fold_header: DKIM_FOLD_HEADER EQSIGN FLAG { - if ($3) { - cfg->dkim_fold_header = 1; - } - else { - cfg->dkim_fold_header = 0; - } + cfg->dkim_fold_header = $3; } ; dkim_sign_networks: - DKIM_SIGN_NETWORKS EQSIGN dkim_ip_list + DKIM_SIGN_NETWORKS EQSIGN { + if (cfg->dkim_ip_tree) { + radix_destroy_compressed (cfg->dkim_ip_tree); + cfg->dkim_ip_tree = NULL; + } + } dkim_ip_list + | DKIM_SIGN_NETWORKS EQPLUS dkim_ip_list ; dkim_ip_list: ip_net { - if (add_ip_radix (cfg->dkim_ip_tree, $1) == 0) { + if (add_ip_radix (&cfg->dkim_ip_tree, $1) == 0) { YYERROR; } } | dkim_ip_list COMMA ip_net { - if (add_ip_radix (cfg->dkim_ip_tree, $3) == 0) { + if (add_ip_radix (&cfg->dkim_ip_tree, $3) == 0) { YYERROR; } } + | empty + ; + +dkim_enable: + ENABLE EQSIGN FLAG { + cfg->dkim_enable = $3; + } ; use_redis: @@ -1491,17 +1405,24 @@ use_redis: ; our_networks: - OUR_NETWORKS EQSIGN our_networks_list + OUR_NETWORKS EQSIGN { + if (cfg->our_networks) { + radix_destroy_compressed (cfg->our_networks); + cfg->our_networks = NULL; + } + }our_networks_list + | OUR_NETWORKS EQPLUS our_networks_list ; our_networks_list: our_networks_elt | our_networks_list COMMA our_networks_elt + | empty ; our_networks_elt: ip_net { - if (add_ip_radix (cfg->our_networks, $1) == 0) { + if (add_ip_radix (&cfg->our_networks, $1) == 0) { YYERROR; } } diff --git a/src/libspamd.c b/src/libspamd.c index 9f73505..31071f9 100644 --- a/src/libspamd.c +++ b/src/libspamd.c @@ -34,6 +34,7 @@ #include "ucl.h" #include "http_parser.h" #include "sds.h" +#include "contrib/zstd/zstd.h" #include #include @@ -65,12 +66,92 @@ rmilter_spamd_parser_on_body (http_parser * parser, const char *at, size_t lengt priv = res->priv; up = ucl_parser_new (0); - if (!ucl_parser_add_chunk (up, at, length)) { - msg_err ("<%s>; cannot parse reply from rspamd: %s", - priv->mlfi_id, ucl_parser_get_error (up)); - ucl_parser_free (up); + if (res->compressed) { + /* Decompress first */ + ZSTD_DStream *zstream; + ZSTD_inBuffer zin; + ZSTD_outBuffer zout; + char *out; + size_t outlen, r; - return -1; + zstream = ZSTD_createDStream (); + ZSTD_initDStream (zstream); + + zin.pos = 0; + zin.src = at; + zin.size = length; + + if ((outlen = ZSTD_getDecompressedSize (zin.src, zin.size)) == 0) { + outlen = ZSTD_DStreamOutSize (); + } + + out = malloc (outlen); + + if (out == NULL) { + msg_err ("<%s>; malloc error: %s", priv->mlfi_id, + strerror (errno)); + ucl_parser_free (up); + + return -1; + } + + zout.dst = out; + zout.pos = 0; + zout.size = outlen; + + while (zin.pos < zin.size) { + r = ZSTD_decompressStream (zstream, &zout, &zin); + + if (ZSTD_isError (r)) { + msg_err ("<%s>; decompression error: %s", priv->mlfi_id, + ZSTD_getErrorName (r)); + ZSTD_freeDStream (zstream); + free (out); + ucl_parser_free (up); + + return -1; + } + + if (zout.pos == zout.size) { + /* We need to extend output buffer */ + zout.size = zout.size * 1.5 + 1.0; + out = realloc (zout.dst, zout.size); + + if (out == NULL) { + msg_err ("<%s>; malloc error: %s", priv->mlfi_id, + strerror (errno)); + ucl_parser_free (up); + free (zout.dst); + + return -1; + } + else { + zout.dst = out; + } + } + } + + ZSTD_freeDStream (zstream); + + if (!ucl_parser_add_chunk (up, out, zout.pos)) { + msg_err ("<%s>; cannot parse reply from rspamd: %s", + priv->mlfi_id, ucl_parser_get_error (up)); + ucl_parser_free (up); + free (out); + + return -1; + } + + free (out); + } + else { + if (!ucl_parser_add_chunk (up, at, length)) { + msg_err ("<%s>; cannot parse reply from rspamd: %s", + priv->mlfi_id, ucl_parser_get_error (up)); + ucl_parser_free (up); + + return -1; + } } obj = ucl_parser_get_object (up); @@ -179,6 +260,7 @@ rspamdscan_socket(SMFICTX *ctx, struct mlfi_priv *priv, struct rspamd_metric_result *res) { sds buf = NULL; + char *io_buf = NULL; struct sockaddr_un server_un; struct sockaddr_in server_in; int s = -1, fd = -1, ofl, size = 0, ret = -1; @@ -189,6 +271,8 @@ rspamdscan_socket(SMFICTX *ctx, struct mlfi_priv *priv, struct http_parser parser; struct http_parser_settings ps; void *map = NULL; + const size_t iobuf_len = 16384; + uint64_t r; /* somebody doesn't need reply... */ if (!srv) { @@ -232,83 +316,152 @@ rspamdscan_socket(SMFICTX *ctx, struct mlfi_priv *priv, ofl = fcntl (s, F_GETFL, 0); fcntl (s, F_SETFL, ofl & (~O_NONBLOCK)); - buf = sdscatprintf (sdsempty (), - "POST /symbols HTTP/1.0\r\nContent-Length: %ld\r\n", - (long int )sb.st_size); + buf = sdsnewlen (NULL, 512); + sdsclear (buf); + buf = sdscatfmt (buf, + "POST /symbols HTTP/1.0\r\nContent-Length: %U\r\n", + (uint64_t)sb.st_size); DL_FOREACH (priv->rcpts, rcpt) { - buf = sdscatprintf (buf, "Rcpt: %s\r\n", rcpt->r_addr); + buf = sdscatfmt (buf, "Rcpt: %s\r\n", rcpt->r_addr); } if (priv->priv_from[0] != '\0') { - buf = sdscatprintf (buf, "From: %s\r\n", priv->priv_from); + buf = sdscatfmt (buf, "From: %s\r\n", priv->priv_from); } if (priv->priv_helo[0] != '\0') { - buf = sdscatprintf (buf, "Helo: %s\r\n", priv->priv_helo); + buf = sdscatfmt (buf, "Helo: %s\r\n", priv->priv_helo); } if (priv->priv_hostname[0] != '\0' && memcmp (priv->priv_hostname, "unknown", 8) != 0) { - buf = sdscatprintf (buf, "Hostname: %s\r\n", + buf = sdscatfmt (buf, "Hostname: %s\r\n", priv->priv_hostname); } if (priv->priv_ip[0] != '\0') { - buf = sdscatprintf (buf, "IP: %s\r\n", priv->priv_ip); + buf = sdscatfmt (buf, "IP: %s\r\n", priv->priv_ip); } if (priv->priv_user[0] != '\0') { - buf = sdscatprintf (buf, "User: %s\r\n", priv->priv_user); + buf = sdscatfmt (buf, "User: %s\r\n", priv->priv_user); } - buf = sdscatprintf (buf, "Queue-ID: %s\r\n", priv->queue_id); + buf = sdscatfmt (buf, "Queue-ID: %s\r\n", priv->queue_id); if (cfg->spamd_settings_id) { - buf = sdscatprintf (buf, "Settings-ID: %s\r\n", cfg->spamd_settings_id); + buf = sdscatfmt (buf, "Settings-ID: %s\r\n", cfg->spamd_settings_id); } - buf = sdscat (buf, "\r\n"); + if (priv->file[0] != '\0') { + if (cfg->compression_enable) { + unsigned char *out; + size_t outlen; - if (rmilter_atomic_write (s, buf, sdslen (buf)) == -1) { - msg_warn("<%s>; rspamd: write (%s), %s", priv->mlfi_id, srv->name, - strerror (errno)); - goto err; - } + map = mmap (NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0); - if (priv->file[0] != '\0') { + if (map == MAP_FAILED) { + map = NULL; + msg_warn ("<%s>; rspamd: mmap (%s), %s", priv->mlfi_id, srv->name, + strerror (errno)); + goto err; + } - (void)map; -#if defined(FREEBSD) && defined(HAVE_SENDFILE) - if (sendfile(fd, s, 0, 0, 0, 0, 0) != 0) { - msg_warn("<%s>; rspamd: sendfile (%s), %s", priv->mlfi_id, srv->name, strerror (errno)); - goto err; + outlen = ZSTD_compressBound (sb.st_size); + out = malloc (outlen); + + if (out == NULL) { + msg_warn ("<%s>; rspamd: malloc (%s), %s", priv->mlfi_id, srv->name, + strerror (errno)); + goto err; + } + + r = ZSTD_compress (out, outlen, map, sb.st_size, 1); + msg_info ("<%s>; rspamd: compressed message, %lu bytes to %lu bytes", + priv->mlfi_id, (unsigned long)sb.st_size, (unsigned long)r); + munmap (map, sb.st_size); + map = NULL; + + if (ZSTD_isError (r)) { + msg_warn ("<%s>; rspamd: zstd compress (%s), %s", + priv->mlfi_id, srv->name, + ZSTD_getErrorName (r)); + free (out); + goto err; + } + + buf = sdscatfmt (buf, "Compression: zstd\r\n"); + buf = sdscatfmt (buf, "Content-Type: application/x-compressed\r\n" + "Content-Length: %U\r\n\r\n", r); + + if (rmilter_atomic_write (s, buf, sdslen (buf)) == -1) { + msg_warn("<%s>; rspamd: write (%s), %s", priv->mlfi_id, srv->name, + strerror (errno)); + goto err; + } + + if (rmilter_atomic_write (s, out, r) == -1) { + free (out); + msg_warn ("<%s>; rspamd: write (%s), %s", priv->mlfi_id, srv->name, + strerror (errno)); + goto err; + } + + free (out); } + else { + (void)map; + r = sb.st_size; + buf = sdscatfmt (buf, "Content-Type: text/plain\r\n" + "Content-Length: %U\r\n\r\n", r); + + if (rmilter_atomic_write (s, buf, sdslen (buf)) == -1) { + msg_warn("<%s>; rspamd: write (%s), %s", priv->mlfi_id, srv->name, + strerror (errno)); + goto err; + } + +#if defined(FREEBSD) && defined(HAVE_SENDFILE) + if (sendfile(fd, s, 0, 0, 0, 0, 0) != 0) { + msg_warn("<%s>; rspamd: sendfile (%s), %s", priv->mlfi_id, srv->name, strerror (errno)); + goto err; + } #elif defined(LINUX) && defined(HAVE_SENDFILE) - off_t off = 0; - if (sendfile(s, fd, &off, sb.st_size) == -1) { - msg_warn("<%s>; rspamd: sendfile (%s), %s", priv->mlfi_id, srv->name, strerror (errno)); - goto err; - } + off_t off = 0; + if (sendfile(s, fd, &off, sb.st_size) == -1) { + msg_warn("<%s>; rspamd: sendfile (%s), %s", priv->mlfi_id, srv->name, strerror (errno)); + goto err; + } #else - map = mmap (NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0); + map = mmap (NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0); - if (map == MAP_FAILED) { - map = NULL; - msg_warn ("<%s>; rspamd: mmap (%s), %s", priv->mlfi_id, srv->name, - strerror (errno)); - goto err; + if (map == MAP_FAILED) { + map = NULL; + msg_warn ("<%s>; rspamd: mmap (%s), %s", priv->mlfi_id, srv->name, + strerror (errno)); + goto err; + } + + if (rmilter_atomic_write (s, map, sb.st_size) == -1) { + msg_warn ("<%s>; rspamd: write (%s), %s", priv->mlfi_id, srv->name, + strerror (errno)); + goto err; + } + + munmap (map, sb.st_size); +#endif } + } + else { + buf = sdscatfmt (buf, "Content-Length: 0\r\n\r\n"); - if (rmilter_atomic_write (s, map, sb.st_size) == -1) { - msg_warn ("<%s>; rspamd: write (%s), %s", priv->mlfi_id, srv->name, + if (rmilter_atomic_write (s, buf, sdslen (buf)) == -1) { + msg_warn("<%s>; rspamd: write (%s), %s", priv->mlfi_id, srv->name, strerror (errno)); goto err; } - - munmap (map, sb.st_size); -#endif } fcntl (s, F_SETFL, ofl|O_NONBLOCK); @@ -320,7 +473,16 @@ rspamdscan_socket(SMFICTX *ctx, struct mlfi_priv *priv, for (;;) { ssize_t r; - char io_buf[16384]; + + if (io_buf == NULL) { + io_buf = malloc (iobuf_len); + } + + if (io_buf == NULL) { + msg_err ("<%s>; rspamd: malloc (%s), %s", priv->mlfi_id, srv->name, + strerror (errno)); + goto err; + } if (rmilter_poll_fd (s, cfg->spamd_results_timeout, POLLIN) < 1) { msg_warn("<%s>; rspamd: timeout waiting results %s", priv->mlfi_id, @@ -328,7 +490,7 @@ rspamdscan_socket(SMFICTX *ctx, struct mlfi_priv *priv, goto err; } - r = read (s, io_buf, sizeof (io_buf)); + r = read (s, io_buf, iobuf_len); if (r == -1) { if (errno == EAGAIN || errno == EINTR) { @@ -362,6 +524,7 @@ rspamdscan_socket(SMFICTX *ctx, struct mlfi_priv *priv, memset (&ps, 0, sizeof (ps)); res->priv = priv; + res->compressed = cfg->compression_enable; ps.on_body = rmilter_spamd_parser_on_body; parser.data = res; parser.content_length = size; @@ -404,6 +567,10 @@ rspamdscan_socket(SMFICTX *ctx, struct mlfi_priv *priv, munmap (map, sb.st_size); } + if (io_buf) { + free (io_buf); + } + return ret; } #undef TEST_WORD @@ -416,9 +583,9 @@ rmiler_process_rspamd_block (const ucl_object_t *obj, SMFICTX *ctx) int nhdr; if (obj && ucl_object_type (obj) == UCL_OBJECT) { - elt = ucl_object_lookup (obj, "headers_remove"); + elt = ucl_object_lookup (obj, "remove_headers"); /* - * headers_remove: {"name": 1, ... } + * remove_headers: {"name": 1, ... } * where number is the header's position starting from '1' */ if (elt && ucl_object_type (elt) == UCL_OBJECT) { @@ -436,9 +603,9 @@ rmiler_process_rspamd_block (const ucl_object_t *obj, SMFICTX *ctx) } } - elt = ucl_object_lookup (obj, "headers_add"); + elt = ucl_object_lookup (obj, "add_headers"); /* - * headers_add: {"name": "value", ... } + * add_headers: {"name": "value", ... } * name could have multiple values */ if (elt && ucl_object_type (elt) == UCL_OBJECT) { @@ -566,8 +733,8 @@ spamdscan (void *_ctx, struct mlfi_priv *priv, struct config_file *cfg, int extr gettimeofday (&t, NULL); tf = t.tv_sec + t.tv_usec / 1000000.0; - logbuf = sdsempty (); - headerbuf = sdsempty (); + logbuf = sdsnewlen (NULL, 1024); + headerbuf = sdsnewlen (NULL, 512); if (res->symbols) { /* Sort symbols by scores from high to low */ @@ -723,13 +890,7 @@ spamdscan (void *_ctx, struct mlfi_priv *priv, struct config_file *cfg, int extr if (res->dkim_signature) { /* Add dkim signature passed from rspamd */ - GString *folded = rmilter_header_value_fold ("DKIM-Signature", - res->dkim_signature, 76); - - if (folded) { - smfi_addheader (ctx, "DKIM-Signature", folded->str); - g_string_free (folded, TRUE); - } + smfi_addheader (ctx, "DKIM-Signature", (char *)res->dkim_signature); } obj = ucl_object_lookup (res->obj, "rmilter"); diff --git a/src/libspamd.h b/src/libspamd.h index 8a7c59a..7a08a8d 100644 --- a/src/libspamd.h +++ b/src/libspamd.h @@ -72,6 +72,7 @@ struct rspamd_metric_result { struct rspamd_symbol *symbols; struct mlfi_priv *priv; bool parsed; + bool compressed; }; #endif diff --git a/src/main.c b/src/main.c index 8c7d133..67c1667 100644 --- a/src/main.c +++ b/src/main.c @@ -43,7 +43,7 @@ pthread_cond_t cfg_cond = PTHREAD_COND_INITIALIZER; pthread_mutex_t cfg_reload_mtx = PTHREAD_MUTEX_INITIALIZER; /* R/W lock for reconfiguring milter */ pthread_rwlock_t cfg_mtx = PTHREAD_RWLOCK_INITIALIZER; -pthread_key_t rnd_state; +struct rmilter_rng_state *rng_state = NULL; int my_strcmp (const void *s1, const void *s2) @@ -165,6 +165,45 @@ reload_thread (void *unused) return NULL; } +static struct rmilter_rng_state* +get_prng_state (void) +{ + static const char *rng_dev = "/dev/urandom"; + struct rmilter_rng_state *st; + int fd; + struct timeval tv; + + st = malloc (sizeof (*st)); + + if (st == NULL) { + abort (); + } + + fd = open (rng_dev, O_RDONLY); + + if (fd == -1) { + msg_warn ("cannot open %s to seed prng, use current time", + rng_dev); + gettimeofday (&tv, NULL); + memcpy (st->s, &tv, sizeof (tv)); + } + else { + if (read (fd, st->s, sizeof (st->s)) == -1) { + msg_warn ("cannot read %d bytes from %s to seed prng, use current time", + (int)sizeof (*st), rng_dev); + gettimeofday (&tv, NULL); + memcpy (st->s, &tv, sizeof (tv)); + } + + close (fd); + } + + st->p = 0; + pthread_mutex_init (&st->mtx, NULL); + + return st; +} + int main(int argc, char *argv[]) { @@ -274,6 +313,7 @@ main(int argc, char *argv[]) #endif umask (0); + rng_state = get_prng_state (); smfi_setconn(cfg->sock_cred); if (smfi_register(smfilter) == MI_FAILURE) { @@ -291,11 +331,6 @@ main(int argc, char *argv[]) exit(EX_UNAVAILABLE); } - if (pthread_key_create (&rnd_state, free) != 0) { - msg_err ("cannot create pthread key"); - exit(EX_UNAVAILABLE); - } - msg_info ("main: starting rmilter version %s, listen on %s", MVERSION, cfg->sock_cred); diff --git a/src/radix.c b/src/radix.c index 992cccb..996bf61 100644 --- a/src/radix.c +++ b/src/radix.c @@ -1,607 +1,95 @@ -/* - * Copyright (c) 2009-2015, Vsevolod Stakhov - * All rights reserved. +/*- + * Copyright 2016 Vsevolod Stakhov * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ - - #include "config.h" #include "radix.h" -#include -#include -#include -#include -#include -#include - -#define msg_debug_radix(...) do { } while(0) - -struct radix_compressed_node { - union { - struct { - struct radix_compressed_node *right; - struct radix_compressed_node *left; - } n; - struct { - uint8_t *key; - unsigned int keylen; - unsigned int level; - } s; - } d; - uintptr_t value; - bool skipped; -}; - -struct mem_chunk { - void *ptr; - struct mem_chunk *next; -}; +#include "btrie.h" +#include "mempool.h" struct radix_tree_compressed { - struct radix_compressed_node *root; - struct mem_chunk *chunks; + struct mempool pool; size_t size; + struct btrie *tree; }; -#ifdef __GNUC__ -static void * radix_alloc (struct radix_tree_compressed *tree, size_t size) - __attribute__((malloc)); -#else -static void * radix_alloc (struct radix_tree_compressed *tree, size_t size); -#endif - -static void * -radix_alloc (struct radix_tree_compressed *tree, size_t size) -{ - struct mem_chunk *chunk; - - chunk = malloc (sizeof (*chunk) + size); - - if (chunk == NULL) { - abort (); - } - - chunk->next = NULL; - chunk->ptr = ((uint8_t *)chunk) + sizeof (*chunk); - LL_PREPEND (tree->chunks, chunk); - - return chunk->ptr; -} - -static bool -radix_compare_compressed (struct radix_compressed_node *node, - const uint8_t *key, unsigned int keylen, unsigned int cur_level) -{ - const uint8_t *nk; - const uint8_t *k; - uint8_t bit; - unsigned int shift, rbits, skip; - - if (node->d.s.keylen > keylen) { - /* Obvious case */ - return false; - } - - - /* Compare byte aligned levels of a compressed node */ - shift = node->d.s.level / NBBY; - /* - * We know that at least of cur_level bits are the same, - * se we can optimize search slightly - */ - if (shift > 0) { - skip = cur_level / NBBY; - if (shift > skip && - memcmp (node->d.s.key + skip, key + skip, shift - skip) != 0) { - return false; - } - } - - rbits = node->d.s.level % NBBY; - if (rbits > 0) { - /* Precisely compare remaining bits */ - nk = node->d.s.key + shift; - k = key + shift; - - bit = 1U << 7; - - while (rbits > 0) { - if ((*nk & bit) != (*k & bit)) { - return false; - } - bit >>= 1; - rbits --; - } - } - - return true; -} - uintptr_t -radix_find_compressed (radix_compressed_t * tree, const uint8_t *key, size_t keylen) +radix_find_compressed (radix_compressed_t * tree, const guint8 *key, gsize keylen) { - struct radix_compressed_node *node; - uint32_t bit; - size_t kremain = keylen / sizeof (uint32_t); - uintptr_t value; - const uint32_t *k = (uint32_t *)key; - uint32_t kv = ntohl (*k); - unsigned int cur_level = 0; - - bit = 1U << 31; - value = RADIX_NO_VALUE; - node = tree->root; - - msg_debug_radix ("trying to find key: %*xs", (int)keylen, key); - while (node && kremain) { - if (node->skipped) { - msg_debug_radix ("finding in the compressed node: %p at level %d", - (void *)node->value, cur_level); - /* It is obviously a leaf node */ - if (radix_compare_compressed (node, key, keylen, cur_level)) { - return node->value; - } - else { - return value; - } - } - if (node->value != RADIX_NO_VALUE) { - value = node->value; - } + gconstpointer ret; - msg_debug_radix ("finding value cur value: %p, left: %p, " - "right: %p, go %s, level: %d", - (void *)node->value, node->d.n.left, - node->d.n.right, (kv & bit) ? "right" : "left", - cur_level); - if (kv & bit) { - node = node->d.n.right; - } - else { - node = node->d.n.left; - } - - bit >>= 1; - if (bit == 0) { - k ++; - bit = 1U << 31; - kv = ntohl (*k); - kremain --; - } - cur_level ++; - } - - if (node) { - /* We still have a node but no more key, so we can search for skipped node */ - if (node->skipped) { - msg_debug_radix ("finding in the compressed node: %p at level %d", - (void *)node->value, cur_level); - /* It is obviously a leaf node */ - if (radix_compare_compressed (node, key, keylen, cur_level)) { - return node->value; - } - } - } - - return value; -} - -uintptr_t -radix_find_rmilter_addr (radix_compressed_t * tree, - const struct rmilter_inet_address *addr) -{ - const uint8_t *key; - size_t keylen; - - switch (addr->family) { - case AF_INET: - key = (const uint8_t *)&addr->addr.sa4.sin_addr; - keylen = sizeof (addr->addr.sa4.sin_addr); - break; - case AF_INET6: - key = (const uint8_t *)&addr->addr.sa6.sin6_addr; - keylen = sizeof (addr->addr.sa6.sin6_addr); - break; - default: + if (tree == NULL) { return RADIX_NO_VALUE; } - return radix_find_compressed (tree, key, keylen); -} - -static struct radix_compressed_node * -radix_uncompress_path (radix_compressed_t *tree, - struct radix_compressed_node *node, - unsigned int start_level, - unsigned int levels_uncompress) -{ - uint8_t *nkey = node->d.s.key + start_level / NBBY; - uint8_t bit = 1U << (7 - start_level % NBBY); - struct radix_compressed_node *leaf, *next; - - /* Make compressed leaf */ - leaf = radix_alloc (tree, sizeof (*node)); - memcpy (leaf, node, sizeof (*node)); - - /* Make compressed node as uncompressed */ - node->skipped = false; - node->value = RADIX_NO_VALUE; - - msg_debug_radix ( - "uncompress %ud levels of tree from %ud to %ud, stored key: %*xs", - levels_uncompress, - start_level, - start_level + levels_uncompress, - leaf->d.s.keylen, - leaf->d.s.key); - - /* Uncompress the desired path */ - while (levels_uncompress) { - next = radix_alloc (tree, sizeof (*node)); - - next->skipped = false; - next->value = RADIX_NO_VALUE; - - if (*nkey & bit) { - node->d.n.right = next; - node->d.n.left = NULL; - } - else { - node->d.n.left = next; - node->d.n.right = NULL; - } - - msg_debug_radix ("uncompress path for node: %p, left: %p, " - "right: %p, go %s", (void *)node->value, node->d.n.left, - node->d.n.right, (*nkey & bit) ? "right" : "left"); + ret = btrie_lookup (tree->tree, key, keylen * NBBY); - bit >>= 1; - if (bit == 0) { - nkey ++; - bit = 1U << 7; - } - node = next; - levels_uncompress --; - } - - /* Attach leaf node, that was previously a compressed node */ - msg_debug_radix ("attach leaf node to %s with value %p", (*nkey & bit) ? "right" : "left", - (void *)leaf->value); - if (*nkey & bit) { - node->d.n.right = leaf; - node->d.n.left = NULL; - } - else { - node->d.n.left = leaf; - node->d.n.right = NULL; + if (ret == NULL) { + return RADIX_NO_VALUE; } - /* Return node */ - return node; + return (uintptr_t)ret; } -static struct radix_compressed_node * -radix_make_leaf_node (radix_compressed_t *tree, - uint8_t *key, unsigned int keylen, unsigned int level, - uintptr_t value, - bool compressed) -{ - struct radix_compressed_node *node; - - node = radix_alloc (tree, sizeof (struct radix_compressed_node)); - if (compressed) { - node->skipped = true; - node->d.s.keylen = keylen; - node->d.s.key = radix_alloc (tree, node->d.s.keylen); - node->d.s.level = level; - memcpy (node->d.s.key, key, node->d.s.keylen); - } - else { - /* Uncompressed leaf node */ - memset (node, 0, sizeof (*node)); - } - node->value = value; - msg_debug_radix ("insert new leaf node with value %p to level %d", - (void *)value, level); - - return node; -} - -static void -radix_move_up_compressed_leaf (radix_compressed_t *tree, - struct radix_compressed_node *leaf, - struct radix_compressed_node *parent, uintptr_t value, - uint8_t *key, unsigned int keylen, unsigned int leaf_level) +uintptr_t +radix_insert_compressed (radix_compressed_t * tree, + guint8 *key, gsize keylen, + gsize masklen, + uintptr_t value) { - parent->value = leaf->value; + guint keybits = keylen * NBBY; + uintptr_t old; + gchar ip_str[INET6_ADDRSTRLEN + 1]; + int ret; - leaf->value = value; - leaf->d.s.keylen = keylen; - leaf->d.s.key = radix_alloc (tree, leaf->d.s.keylen); - memcpy (leaf->d.s.key, key, keylen); - leaf->d.s.level = leaf_level; -} - -static uintptr_t -radix_replace_node (radix_compressed_t *tree, - struct radix_compressed_node *node, - uint8_t *key, size_t keylen, - uintptr_t value) -{ - uintptr_t oldval; - - if (node->skipped) { - /* - * For leaf nodes we have to deal with the keys as well, since - * we might find that keys are different for the same leaf node - */ - node->d.s.keylen = keylen; - node->d.s.key = radix_alloc (tree, node->d.s.keylen); - memcpy (node->d.s.key, key, node->d.s.keylen); - oldval = node->value; - node->value = value; - msg_debug_radix ("replace value for leaf node with: %p, old value: %p", - (void *)value, (void *)oldval); - } - else { - oldval = node->value; - node->value = value; - msg_debug_radix ("replace value for node with: %p, old value: %p", - (void *)value, (void *)oldval); + if (tree == NULL) { + return NULL; } - return oldval; -} - -static uintptr_t -radix_uncompress_node (radix_compressed_t *tree, - struct radix_compressed_node *node, - uint8_t *key, size_t keylen, - uintptr_t value, - unsigned int cur_level, - unsigned int target_level, - uint8_t bit) -{ - /* Find the largest common prefix of the compressed node and target node */ - size_t kremain = keylen - cur_level / NBBY; - uint8_t *nkey = node->d.s.key + cur_level / NBBY; - uint8_t *k = key + cur_level / NBBY; - unsigned int levels_uncompress = 0, start_level = cur_level; - bool masked = false; - struct radix_compressed_node *leaf; - - msg_debug_radix ("want to uncompress nodes from level %ud to level %ud, " - "compressed node level: %ud", - cur_level, target_level, node->d.s.level); - while (cur_level < target_level) { - uint8_t kb = *k & bit; - uint8_t nb = *nkey & bit; - - if (cur_level >= node->d.s.level) { - msg_debug_radix ("found available masked path at level %ud", cur_level); - masked = true; - break; - } - if (kb != nb) { - msg_debug_radix ("found available path at level %ud", cur_level); - break; - } + g_assert (keybits >= masklen); - cur_level ++; - levels_uncompress ++; - bit >>= 1; - if (bit == 0) { - k ++; - nkey ++; - bit = 1U << 7; - kremain --; - } - } + old = radix_find_compressed (tree, key, keylen); - if (kremain == 0) { - /* Nodes are equal */ - return radix_replace_node (tree, node, key, keylen, value); - } - else { - /* - * We need to uncompress the common path - */ - struct radix_compressed_node *nnode; - - nnode = radix_uncompress_path (tree, node, start_level, levels_uncompress); - - /* - * Now nnode is the last uncompressed node with compressed leaf inside - * and we also know that the current bit is different - * - * - if we have target_level == cur_level, then we can safely assign the - * value of that parent node - * - otherwise we insert new compressed leaf node - */ - if (cur_level == target_level) { - msg_debug_radix ("insert detached leaf node with value: %p", - (void *)value); - nnode->value = value; - } - else if (masked) { - /* - * Here we just add the previous value of node to the current node - * and replace value in the leaf - */ - if (nnode->d.n.left != NULL) { - leaf = nnode->d.n.left; - } - else { - leaf = nnode->d.n.right; - } - msg_debug_radix ("move leaf node with value: %p, to level %ud, " - "set leaf node value to %p and level %ud", (void *)nnode->value, - cur_level, - (void *)value, target_level); - radix_move_up_compressed_leaf (tree, leaf, nnode, value, key, keylen, - target_level); - } - else { - node = radix_make_leaf_node (tree, key, keylen, - target_level, value, true); - if (nnode->d.n.left == NULL) { - nnode->d.n.left = node; - } - else { - nnode->d.n.right = node; - } - } - tree->size ++; - } - - return value; -} + ret = btrie_add_prefix (tree->tree, key, keybits - masklen, + (gconstpointer)value); + if (ret != BTRIE_OKAY) { + memset (ip_str, 0, sizeof (ip_str)); -uintptr_t -radix_insert_compressed (radix_compressed_t * tree, - uint8_t *key, size_t keylen, - size_t masklen, - uintptr_t value) -{ - struct radix_compressed_node *node, *next = NULL, **prev; - size_t keybits = keylen * NBBY; - unsigned int target_level = (keylen * NBBY - masklen); - unsigned int cur_level = 0; - uint8_t bit, *k = key; - size_t kremain = keylen; - uintptr_t oldval = RADIX_NO_VALUE; - - bit = 1U << 7; - node = tree->root; - - assert (keybits >= masklen); - msg_debug_radix ("want insert value %p with mask %z, key: %*xs", - (void *)value, masklen, (int)keylen, key); - - node = tree->root; - next = node; - prev = &tree->root; - - /* Search for the place to insert element */ - while (node && cur_level < target_level) { - if (node->skipped) { - /* We have found skipped node and we need to uncompress it */ - return radix_uncompress_node (tree, node, key, keylen, value, - cur_level, target_level, bit); + if (keybits == 32) { + msg_err ("cannot insert %p, key: %s/%d, duplicate value", + (gpointer)value, + inet_ntop (AF_INET, key, ip_str, sizeof (ip_str) - 1), + (int)(keybits - masklen)); } - if (*k & bit) { - next = node->d.n.right; - prev = &node->d.n.right; + else if (keybits == 128) { + msg_err ("cannot insert %p, key: [%s]/%d, duplicate value", + (gpointer)value, + inet_ntop (AF_INET6, key, ip_str, sizeof (ip_str) - 1), + (int)(keybits - masklen)); } else { - next = node->d.n.left; - prev = &node->d.n.left; + msg_err ("cannot insert %p with mask %d, key: unknown, duplicate value", + (gpointer)value, (int)(keybits - masklen)); } - - if (next == NULL) { - /* Need to insert some nodes */ - break; - } - - bit >>= 1; - if (bit == 0) { - k ++; - bit = 1U << 7; - kremain --; - } - cur_level ++; - node = next; - } - - if (next == NULL) { - next = radix_make_leaf_node (tree, key, keylen, target_level, value, - true); - *prev = next; - tree->size ++; - } - else if (next->value == RADIX_NO_VALUE) { - msg_debug_radix ("insert value node with %p", (void *)value); - next->value = value; - tree->size ++; } else { - if (next->skipped) { - /* - * For skipped node we replace value if the level of skipped node - * is equal to the target level - */ - if (next->d.s.level == target_level) { - oldval = radix_replace_node (tree, next, key, keylen, value); - } - else if (next->d.s.level > target_level) { - /* - * Here we must create new normal node and insert compressed leaf - * one level below - */ - node = radix_make_leaf_node (tree, key, keylen, - target_level, value, false); - *prev = node; - if (*k & bit) { - node->d.n.right = next; - } - else { - node->d.n.left = next; - } - oldval = next->value; - tree->size ++; - } - else { - /* - * We must convert old compressed node to a normal node and - * create new compressed leaf attached to that normal node - */ - node = radix_make_leaf_node (tree, key, keylen, - target_level, value, true); - *prev = next; - msg_debug_radix ("move leaf node with value: %p, to level %ud, " - "set leaf node value to %p and level %ud", (void *)next->value, - cur_level, - (void *)value, target_level); - next->skipped = false; - if (*k & bit) { - next->d.n.right = node; - next->d.n.left = NULL; - } - else { - next->d.n.left = node; - next->d.n.right = NULL; - } - oldval = next->value; - tree->size ++; - } - } - else { - oldval = radix_replace_node (tree, next, key, keylen, value); - } - return oldval; + tree->size ++; } - return next->value; + return old; } @@ -615,8 +103,9 @@ radix_create_compressed (void) return NULL; } + mp_init (&tree->pool); tree->size = 0; - tree->root = NULL; + tree->tree = btrie_init (&tree->pool); return tree; } @@ -624,40 +113,35 @@ radix_create_compressed (void) void radix_destroy_compressed (radix_compressed_t *tree) { - struct mem_chunk *chunk, *tmp; - if (tree) { - LL_FOREACH_SAFE (tree->chunks, chunk, tmp) { - free (chunk); - } + mp_free (&tree->pool); free (tree); } } -int -rspamd_radix_add_iplist (const char *list, const char *separators, - radix_compressed_t *tree) +gint +rspamd_radix_add_iplist (const gchar *list, const gchar *separators, + radix_compressed_t *tree, gconstpointer value, gboolean resolve) { - char *token, *ipnet, *err_str, **strv, *cur, *cpy, *to_free; + gchar *token, *ipnet, *err_str, **strv, **cur, *brace; struct in_addr ina; struct in6_addr ina6; - unsigned int k = INT_MAX; - bool v6_braces = false; - int af; - int res = 0; + guint k = G_MAXINT; + gint af; + gint res = 0, r; + struct addrinfo hints, *ai_res, *cur_ai; /* Split string if there are multiple items inside a single string */ - cpy = strdup (list); - to_free = cpy; - - while ((cur = strsep (&cpy, separators)) != NULL) { + strv = g_strsplit_set (list, separators, 0); + cur = strv; + while (*cur) { af = AF_UNSPEC; - if (*cur == '\0') { + if (**cur == '\0') { + cur++; continue; } - /* Extract ipnet */ - ipnet = cur; + ipnet = *cur; token = strsep (&ipnet, "/"); if (ipnet != NULL) { @@ -665,37 +149,41 @@ rspamd_radix_add_iplist (const char *list, const char *separators, /* Get mask */ k = strtoul (ipnet, &err_str, 10); if (errno != 0) { - msg_err ( + msg_warn ( "invalid netmask, error detected on symbol: %s, erorr: %s", err_str, strerror (errno)); - k = INT_MAX; + k = G_MAXINT; } } + /* Check IP */ if (token[0] == '[') { - v6_braces = true; - } + /* Braced IPv6 */ + brace = strrchr (token, ']'); - if (v6_braces) { - token ++; - cur = strrchr (token, ']'); - if (cur) { - *cur = '\0'; - } + if (brace != NULL) { + token ++; + *brace = '\0'; - if (inet_pton (AF_INET6, token, &ina6) == 1) { - af = AF_INET6; - } - else if (inet_pton (AF_INET, token, &ina) == 1) { - af = AF_INET; + if (inet_pton (AF_INET6, token, &ina6) == 1) { + af = AF_INET6; + } + else { + msg_warn ("invalid IP address: %s", token); + + cur ++; + continue; + } } else { - msg_err ("invalid IP address: %s", token); + msg_warn ("invalid IP address: %s", token); + + cur ++; + continue; } } else { - /* Check IP */ if (inet_pton (AF_INET, token, &ina) == 1) { af = AF_INET; } @@ -703,7 +191,63 @@ rspamd_radix_add_iplist (const char *list, const char *separators, af = AF_INET6; } else { - msg_err ("invalid IP address: %s", token); + + if (resolve) { + memset (&hints, 0, sizeof (hints)); + hints.ai_socktype = SOCK_STREAM; /* Type of the socket */ + hints.ai_flags = AI_NUMERICSERV; + hints.ai_family = AF_UNSPEC; + + if ((r = getaddrinfo (token, NULL, &hints, &ai_res)) == 0) { + for (cur_ai = ai_res; cur_ai != NULL; + cur_ai = cur_ai->ai_next) { + + if (cur_ai->ai_family == AF_INET) { + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *)cur_ai->ai_addr; + if (k > 32) { + k = 32; + } + + radix_insert_compressed (tree, + (guint8 *)&sin->sin_addr, + sizeof (sin->sin_addr), + 32 - k, (uintptr_t)value); + res ++; + } + else if (cur_ai->ai_family == AF_INET6) { + struct sockaddr_in6 *sin6; + + sin6 = (struct sockaddr_in6 *)cur_ai->ai_addr; + if (k > 128) { + k = 128; + } + + radix_insert_compressed (tree, + (guint8 *)&sin6->sin6_addr, + sizeof (sin6->sin6_addr), + 128 - k, (uintptr_t)value); + res ++; + } + } + + freeaddrinfo (ai_res); + } + else { + msg_warn ("getaddrinfo failed for %s: %s", token, + gai_strerror (r)); + } + + cur ++; + continue; + } + else { + msg_warn ("invalid IP address: %s", token); + + cur ++; + continue; + } } } @@ -711,37 +255,40 @@ rspamd_radix_add_iplist (const char *list, const char *separators, if (k > 32) { k = 32; } - radix_insert_compressed (tree, (uint8_t *)&ina, sizeof (ina), - 32 - k, 1); + radix_insert_compressed (tree, (guint8 *)&ina, sizeof (ina), + 32 - k, (uintptr_t)value); res ++; } else if (af == AF_INET6){ if (k > 128) { k = 128; } - radix_insert_compressed (tree, (uint8_t *)&ina6, sizeof (ina6), - 128 - k, 1); + radix_insert_compressed (tree, (guint8 *)&ina6, sizeof (ina6), + 128 - k, (uintptr_t)value); res ++; } + cur++; } - free (to_free); + g_strfreev (strv); return res; } -bool -radix_add_generic_iplist (const char *ip_list, radix_compressed_t **tree) +gboolean +radix_add_generic_iplist (const gchar *ip_list, radix_compressed_t **tree, + gboolean resolve) { if (*tree == NULL) { *tree = radix_create_compressed (); } - return (rspamd_radix_add_iplist (ip_list, ",; ", *tree) > 0); + return (rspamd_radix_add_iplist (ip_list, ",; ", *tree, + GINT_TO_POINTER (1), resolve) > 0); } -size_t +gsize radix_get_size (radix_compressed_t *tree) { if (tree != NULL) { @@ -750,3 +297,38 @@ radix_get_size (radix_compressed_t *tree) return 0; } + + + +const gchar * +radix_get_info (radix_compressed_t *tree) +{ + if (tree == NULL) { + return NULL; + } + + return btrie_stats (tree->tree); +} + +uintptr_t +radix_find_rmilter_addr (radix_compressed_t * tree, + const struct rmilter_inet_address *addr) +{ + const uint8_t *key; + size_t keylen; + + switch (addr->family) { + case AF_INET: + key = (const uint8_t *)&addr->addr.sa4.sin_addr; + keylen = sizeof (addr->addr.sa4.sin_addr); + break; + case AF_INET6: + key = (const uint8_t *)&addr->addr.sa6.sin6_addr; + keylen = sizeof (addr->addr.sa6.sin6_addr); + break; + default: + return RADIX_NO_VALUE; + } + + return radix_find_compressed (tree, key, keylen); +} diff --git a/src/radix.h b/src/radix.h index 62246e2..c7d1487 100644 --- a/src/radix.h +++ b/src/radix.h @@ -1,35 +1,22 @@ -/* - * Copyright (c) 2009-2015, Vsevolod Stakhov - * All rights reserved. +/*- + * Copyright 2016 Vsevolod Stakhov * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * THIS SOFTWARE IS PROVIDED BY AUTHOR ''AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ - #ifndef RADIX_H #define RADIX_H #include "config.h" -#include -#include -#include -#include #include "util.h" #define RADIX_NO_VALUE (uintptr_t)-1 @@ -48,8 +35,8 @@ typedef struct radix_tree_compressed radix_compressed_t; */ uintptr_t radix_insert_compressed (radix_compressed_t * tree, - uint8_t *key, size_t keylen, - size_t masklen, + guint8 *key, gsize keylen, + gsize masklen, uintptr_t value); /** @@ -59,11 +46,8 @@ radix_insert_compressed (radix_compressed_t * tree, * @param keylen length of a key * @return opaque pointer or `RADIX_NO_VALUE` if no value has been found */ -uintptr_t radix_find_compressed (radix_compressed_t * tree, const uint8_t *key, - size_t keylen); - -uintptr_t radix_find_rmilter_addr (radix_compressed_t * tree, - const struct rmilter_inet_address *addr); +uintptr_t radix_find_compressed (radix_compressed_t * tree, const guint8 *key, + gsize keylen); /** * Destroy the complete radix trie @@ -84,21 +68,31 @@ radix_compressed_t *radix_create_compressed (void); * @param tree target tree * @return number of elements inserted */ -int rspamd_radix_add_iplist (const char *list, const char *separators, - radix_compressed_t *tree); +gint rspamd_radix_add_iplist (const gchar *list, const gchar *separators, + radix_compressed_t *tree, gconstpointer value, gboolean resolve); /** * Generic version of @see rspamd_radix_add_iplist. This function creates tree * if `tree` is NULL. */ -bool radix_add_generic_iplist (const char *ip_list, - radix_compressed_t **tree); +gboolean radix_add_generic_iplist (const gchar *ip_list, + radix_compressed_t **tree, gboolean resolve); /** * Returns number of elements in the tree * @param tree * @return */ -size_t radix_get_size (radix_compressed_t *tree); +gsize radix_get_size (radix_compressed_t *tree); + +/** + * Return string that describes this radix tree (memory, nodes, compression etc) + * @param tree + * @return constant string + */ +const gchar * radix_get_info (radix_compressed_t *tree); + +uintptr_t radix_find_rmilter_addr (radix_compressed_t * tree, + const struct rmilter_inet_address *addr); #endif diff --git a/src/ratelimit.c b/src/ratelimit.c index 3283ec3..6224fdf 100644 --- a/src/ratelimit.c +++ b/src/ratelimit.c @@ -94,16 +94,13 @@ is_whitelisted (struct rmilter_inet_address *addr, const char *rcpt, static int is_bounce(char *from, struct config_file *cfg) { size_t user_part_len; - struct addr_list_entry *cur_addr; + struct addr_list_entry *cur_addr = NULL; user_part_len = extract_user_part (from); - LIST_FOREACH (cur_addr, &cfg->bounce_addrs, next) - { - if (cur_addr->len == user_part_len - && strncasecmp (cur_addr->addr, from, user_part_len) == 0) { - /* Bounce rcpt */ - return 1; - } + HASH_FIND (hh, cfg->bounce_addrs, from, user_part_len, cur_addr); + + if (cur_addr) { + return 1; } return 0; @@ -169,7 +166,7 @@ static int check_specific_limit(struct mlfi_priv *priv, struct config_file *cfg, dlen = sizeof (*b); if (b == NULL) { - msg_err("<%s>; check_specific_limit: calloc failed: %s", + msg_err("<%s>; check_specific_limit: calloc failed: %s", priv->mlfi_id, strerror (errno)); return -1; } @@ -213,13 +210,18 @@ static int check_specific_limit(struct mlfi_priv *priv, struct config_file *cfg, return 1; } -int rate_check(struct mlfi_priv *priv, struct config_file *cfg, +int +rate_check (struct mlfi_priv *priv, struct config_file *cfg, const char *rcpt, int is_update) { double t; struct timeval tm; int r; + if (!cfg->ratelimit_enable) { + return 1; + } + if (priv->priv_addr.family == AF_INET && is_whitelisted (&priv->priv_addr, rcpt, cfg) != 0) { diff --git a/src/regexp.c b/src/regexp.c deleted file mode 100644 index e9971a8..0000000 --- a/src/regexp.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2007-2012, Vsevolod Stakhov - * All rights reserved. - - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. Redistributions in binary form - * must reproduce the above copyright notice, this list of conditions and the - * following disclaimer in the documentation and/or other materials provided with - * the distribution. Neither the name of the author nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#include "utlist.h" -#include "pcre.h" -#include "rmilter.h" -#include "cfg_file.h" -#include "regexp.h" - -extern pthread_mutex_t regexp_mtx; - -#define C_LOCK() do { pthread_mutex_lock (®exp_mtx); } while (0) -#define C_UNLOCK() do { pthread_mutex_unlock (®exp_mtx); } while (0) - -static int -check_condition (struct cond_arg *arg, const char *match_str, size_t str_len) -{ - int ovector[30]; - int r; - - if (!arg || !match_str || arg->empty) { - return 0; - } - - r = pcre_exec (arg->re, NULL, match_str, str_len, 0, 0, ovector, sizeof(ovector)/sizeof(int)); - - if (arg->not) { - return r < 0; - } - else { - return r >= 0; - } -} - -static struct rule * -check_connect_rule (struct rule *cur, const struct mlfi_priv *priv) -{ - struct condition *cond; - size_t hlen, iplen; - - hlen = strlen (priv->priv_hostname); - iplen = strlen (priv->priv_ip); - - LIST_FOREACH (cond, cur->conditions, next) { - if (cond->type == COND_CONNECT) { - /* Hostname and ip address */ - if (check_condition (&cond->args[0], priv->priv_hostname, hlen) - && check_condition (&cond->args[1], priv->priv_ip, iplen)) { - return cur; - } - } - } - return NULL; -} - -static struct rule * -check_helo_rule (struct rule *cur, const struct mlfi_priv *priv) -{ - struct condition *cond; - size_t hlen; - - hlen = strlen (priv->priv_helo); - - LIST_FOREACH (cond, cur->conditions, next) { - if (cond->type == COND_HELO) { - /* Helo */ - if (check_condition (&cond->args[0], priv->priv_helo, hlen)) { - return cur; - } - } - } - return NULL; -} - -static struct rule * -check_envfrom_rule (struct rule *cur, const struct mlfi_priv *priv) -{ - struct condition *cond; - size_t flen; - - flen = strlen (priv->priv_from); - - LIST_FOREACH (cond, cur->conditions, next) { - if (cond->type == COND_ENVFROM) { - /* From: */ - if (check_condition (&cond->args[0], priv->priv_from, flen)) { - return cur; - } - } - } - return NULL; -} - -static struct rule * -check_envrcpt_rule (struct rule *cur, const struct mlfi_priv *priv) -{ - struct condition *cond; - struct rcpt *rcpt; - size_t tlen; - - LIST_FOREACH (cond, cur->conditions, next) { - if (cond->type == COND_ENVRCPT) { - /* To: */ - DL_FOREACH (priv->rcpts, rcpt) { - tlen = strlen (rcpt->r_addr); - if (check_condition (&cond->args[0], rcpt->r_addr, tlen)) { - return cur; - } - } - } - } - return NULL; -} -static struct rule * -check_header_rule (struct rule *cur, const struct mlfi_priv *priv) -{ - struct condition *cond; - size_t nlen, vlen; - - nlen = strlen (priv->priv_cur_header.header_name); - vlen = strlen (priv->priv_cur_header.header_value); - - LIST_FOREACH (cond, cur->conditions, next) { - if (cond->type == COND_HEADER) { - /* Header name and value */ - if (check_condition (&cond->args[0], priv->priv_cur_header.header_name, nlen) - && check_condition (&cond->args[1], priv->priv_cur_header.header_value, vlen)) { - return cur; - } - } - } - return NULL; -} - -static struct rule * -check_body_rule (struct rule *cur, const struct mlfi_priv *priv) -{ - struct condition *cond; - - LIST_FOREACH (cond, cur->conditions, next) { - if (cond->type == COND_BODY) { - /* Body line */ - if (check_condition (&cond->args[0], priv->priv_cur_body.value, priv->priv_cur_body.len)) { - return cur; - } - } - } - return NULL; -} - -struct rule * -regexp_check (const struct config_file *cfg, const struct mlfi_priv *priv, enum milter_stage stage) -{ - struct rule *cur; - struct rule *r = NULL; - - /* Check rules for specific stage */ - LIST_FOREACH (cur, &cfg->rules, next) { - if ((cur->flags & COND_CONNECT_FLAG) != 0 && stage == STAGE_CONNECT) { - C_LOCK (); - r = check_connect_rule (cur, priv); - C_UNLOCK (); - - } else if ((cur->flags & COND_HELO_FLAG) != 0 && stage == STAGE_HELO) { - C_LOCK (); - r = check_helo_rule (cur, priv); - C_UNLOCK (); - - } else if ((cur->flags & COND_ENVFROM_FLAG) != 0 && stage == STAGE_ENVFROM) { - C_LOCK (); - r = check_envfrom_rule (cur, priv); - C_UNLOCK (); - - } else if ((cur->flags & COND_ENVRCPT_FLAG) != 0 && stage == STAGE_ENVRCPT) { - C_LOCK (); - r = check_envrcpt_rule (cur, priv); - C_UNLOCK (); - - } else if ((cur->flags & COND_HEADER_FLAG) != 0 && stage == STAGE_HEADER) { - C_LOCK (); - r = check_header_rule (cur, priv); - C_UNLOCK (); - - } else if ((cur->flags & COND_BODY_FLAG) != 0 && stage == STAGE_BODY) { - C_LOCK (); - r = check_body_rule (cur, priv); - C_UNLOCK (); - - } - /* Stop matching on finding matched rule */ - if (r != NULL) { - return r; - } - } - - return NULL; -} - -struct action * -rules_check (struct rule **rules) -{ - struct rule *cur = NULL; - struct condition *cond; - int i, r; - - for (i = 0; i < STAGE_MAX; i++) { - if (rules[i] == NULL) { - continue; - } - cur = rules[i]; - r = 1; - LIST_FOREACH (cond, cur->conditions, next) { - if (rules[cond->type] == 0) { - r = 0; - break; - } - } - /* Return reject actions before any accept action */ - if (r == 1 && cur->act->type != ACTION_ACCEPT) { - return cur->act; - } - else if (r == 0) { - cur = NULL; - } - } - - /* Return accept action if found */ - if (cur) { - return cur->act; - } - - return NULL; -} - -#undef C_LOCK -#undef C_UNLOCK diff --git a/src/regexp.h b/src/regexp.h deleted file mode 100644 index 8fd6251..0000000 --- a/src/regexp.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2007-2012, Vsevolod Stakhov - * All rights reserved. - - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. Redistributions in binary form - * must reproduce the above copyright notice, this list of conditions and the - * following disclaimer in the documentation and/or other materials provided with - * the distribution. Neither the name of the author nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef REGEXP_H -#define REGEXP_H - -#include "rmilter.h" -#include "cfg_file.h" - -enum milter_stage { - STAGE_CONNECT = 0, - STAGE_HELO, - STAGE_ENVFROM, - STAGE_ENVRCPT, - STAGE_HEADER, - STAGE_BODY, -}; - -struct mlfi_priv; - -struct rule * regexp_check (const struct config_file *, /* Config file */ - const struct mlfi_priv *, /* Current priv data */ - enum milter_stage); /* Current Stage */ -struct action * rules_check (struct rule **); - -#endif diff --git a/src/rmilter.c b/src/rmilter.c index 6f3c28d..fcd4781 100644 --- a/src/rmilter.c +++ b/src/rmilter.c @@ -37,7 +37,6 @@ #include "libspamd.h" #include "cfg_file.h" #include "rmilter.h" -#include "regexp.h" #include "cache.h" #ifdef HAVE_DCC #include "dccif.h" @@ -90,51 +89,11 @@ struct smfiDesc smfilter = }; extern struct config_file *cfg; -extern pthread_key_t rnd_state; - -struct rmilter_rng_state { - uint64_t s[16]; - int p; -}; +extern struct rmilter_rng_state *rng_state; /* Milter mutexes */ pthread_mutex_t regexp_mtx = PTHREAD_MUTEX_INITIALIZER; -static sfsistat -set_reply (SMFICTX *ctx, const struct action *act) -{ - int result = SMFIS_CONTINUE; - - switch (act->type) { - case ACTION_ACCEPT: - result = SMFIS_ACCEPT; - break; - case ACTION_REJECT: - result = SMFIS_REJECT; - break; - case ACTION_TEMPFAIL: - result = SMFIS_TEMPFAIL; - break; - case ACTION_QUARANTINE: - result = SMFIS_DISCARD; - break; - case ACTION_DISCARD: - result = SMFIS_DISCARD; - break; - } - if (act->type == ACTION_REJECT && - smfi_setreply(ctx, RCODE_REJECT, XCODE_REJECT, - (char *)act->message) != MI_SUCCESS) { - msg_err("smfi_setreply"); - } - if (act->type == ACTION_TEMPFAIL && - smfi_setreply(ctx, RCODE_TEMPFAIL, XCODE_TEMPFAIL, - (char *)act->message) != MI_SUCCESS) { - msg_err("smfi_setreply"); - } - - return result; -} /* * Strip angle braces if needed @@ -167,50 +126,6 @@ normalize_email_addr (const char *src, char *dest, size_t destlen) } } -static struct rmilter_rng_state* -get_prng_state (void) -{ - static const char *rng_dev = "/dev/urandom"; - struct rmilter_rng_state *st; - int fd; - struct timeval tv; - - st = pthread_getspecific (rnd_state); - - if (st == NULL) { - st = malloc (sizeof (*st)); - - if (st == NULL) { - abort (); - } - - fd = open (rng_dev, O_RDONLY); - - if (fd == -1) { - msg_warn ("cannot open %s to seed prng, use current time", - rng_dev); - gettimeofday (&tv, NULL); - memcpy (st->s, &tv, sizeof (tv)); - } - else { - if (read (fd, st->s, sizeof (st->s)) == -1) { - msg_warn ("cannot read %d bytes from %s to seed prng, use current time", - (int)sizeof (*st), rng_dev); - gettimeofday (&tv, NULL); - memcpy (st->s, &tv, sizeof (tv)); - } - - close (fd); - } - - st->p = 0; - - pthread_setspecific (rnd_state, st); - } - - return st; -} - /* * xorshift1024* * from http://xoroshiro.di.unimi.it/ @@ -218,12 +133,18 @@ get_prng_state (void) static uint64_t prng_next (struct rmilter_rng_state *st) { + pthread_mutex_lock (&st->mtx); + const uint64_t s0 = st->s[st->p]; uint64_t s1 = st->s[st->p = (st->p + 1) & 15]; + uint64_t res; s1 ^= s1 << 31; st->s[st->p] = s1 ^ s0 ^ (s1 >> 11) ^ (s0 >> 30); - return st->s[st->p] * UINT64_C(1181783497276652981); + res = st->s[st->p] * UINT64_C(1181783497276652981); + pthread_mutex_unlock (&st->mtx); + + return res; } static inline int @@ -372,8 +293,10 @@ check_greylisting_ctx(SMFICTX *ctx, struct mlfi_priv *priv) int r; CFG_RLOCK(); - if (priv->priv_ip[0] != '\0' && cfg->cache_servers_grey_num > 0 && - cfg->greylisting_timeout > 0 && cfg->greylisting_expire > 0 && priv->strict != 0) { + if (cfg->greylisting_enable && + priv->priv_ip[0] != '\0' && cfg->cache_servers_grey_num > 0 && + cfg->greylisting_timeout > 0 && + cfg->greylisting_expire > 0 && priv->strict != 0) { msg_debug ("<%s>; check_greylisting_ctx: checking greylisting", priv->mlfi_id); @@ -405,7 +328,6 @@ check_greylisting_ctx(SMFICTX *ctx, struct mlfi_priv *priv) static void set_random_id (struct mlfi_priv *priv) { - struct rmilter_rng_state *st; uint64_t val; static const char hexdigests[16] = "0123456789abcdef"; const unsigned int nbytes = 5; @@ -413,8 +335,8 @@ set_random_id (struct mlfi_priv *priv) const char *p; unsigned r = 0, i; - st = get_prng_state (); - val = prng_next (st); + val = prng_next (rng_state); + val ^= (uint64_t)pthread_self (); buf = priv->mlfi_id; @@ -522,6 +444,13 @@ mlfi_connect(SMFICTX * ctx, char *hostname, _SOCK_ADDR * addr) memcpy (&priv->priv_addr.addr.sa6, &addr_storage->sa6, sizeof (struct sockaddr_in6)); port = ntohs (addr_storage->sa6.sin6_port); break; + case AF_UNIX: + rmilter_strlcpy (priv->priv_ip, "127.0.0.1", sizeof(priv->priv_ip)); + priv->priv_addr.addr.sa.sa_family = AF_INET; + priv->priv_addr.addr.sa4.sin_addr.s_addr = INADDR_LOOPBACK; + priv->priv_addr.addr.sa4.sin_port = 0; + port = 0; + break; default: rmilter_strlcpy (priv->priv_ip, "NULL", sizeof(priv->priv_ip)); memcpy (&priv->priv_addr.addr.sa, &addr_storage->sa, sizeof (struct sockaddr)); @@ -673,7 +602,6 @@ mlfi_envfrom(SMFICTX *ctx, char **envfrom) { char *tmpfrom; struct mlfi_priv *priv; - struct rule *act; unsigned int i; if ((priv = (struct mlfi_priv *) smfi_getpriv (ctx)) == NULL) { @@ -730,6 +658,10 @@ mlfi_envfrom(SMFICTX *ctx, char **envfrom) /* Check whether we need to sign this message */ #ifdef WITH_DKIM + if (!cfg->dkim_enable) { + return SMFIS_CONTINUE; + } + CFG_RLOCK(); DKIM_STAT statp; struct dkim_domain_entry *dkim_domain; @@ -797,25 +729,6 @@ mlfi_envfrom(SMFICTX *ctx, char **envfrom) CFG_UNLOCK(); #endif - CFG_RLOCK(); - /* Check connect */ - act = regexp_check (cfg, priv, STAGE_CONNECT); - if (act != NULL) { - priv->matched_rules[STAGE_CONNECT] = act; - } - /* Check helo */ - act = regexp_check (cfg, priv, STAGE_HELO); - if (act != NULL) { - priv->matched_rules[STAGE_HELO] = act; - } - - /* Check envfrom */ - act = regexp_check (cfg, priv, STAGE_ENVFROM); - if (act != NULL) { - priv->matched_rules[STAGE_ENVFROM] = act; - } - - CFG_UNLOCK(); return SMFIS_CONTINUE; } @@ -823,7 +736,6 @@ static sfsistat mlfi_envrcpt(SMFICTX *ctx, char **envrcpt) { struct mlfi_priv *priv; - struct rule *act; struct rcpt *newrcpt; char *tmprcpt; @@ -867,13 +779,8 @@ mlfi_envrcpt(SMFICTX *ctx, char **envrcpt) DL_APPEND(priv->rcpts, newrcpt); priv->priv_rcptcount ++; - /* Check recipient */ - act = regexp_check (cfg, priv, STAGE_ENVRCPT); - if (act != NULL) { - priv->matched_rules[STAGE_ENVRCPT] = act; - } - CFG_UNLOCK(); + return SMFIS_CONTINUE; } @@ -929,7 +836,6 @@ static sfsistat mlfi_header(SMFICTX * ctx, char *headerf, char *headerv) { struct mlfi_priv *priv; - struct rule *act; int len; char *p, *c, t, *hname_lowercase; @@ -1030,11 +936,6 @@ mlfi_header(SMFICTX * ctx, char *headerf, char *headerv) } } - act = regexp_check (cfg, priv, STAGE_HEADER); - if (act != NULL) { - priv->matched_rules[STAGE_HEADER] = act; - } - free (hname_lowercase); CFG_UNLOCK(); @@ -1130,7 +1031,6 @@ mlfi_eom(SMFICTX * ctx) int prob_max; double prob_cur; struct stat sb; - struct action *act; struct rcpt *rcpt; bool ip_whitelisted = false; int ret = SMFIS_CONTINUE; @@ -1176,18 +1076,6 @@ mlfi_eom(SMFICTX * ctx) #endif CFG_RLOCK(); - if (cfg->serial == priv->serial) { - msg_debug ("<%s>; mlfi_eom: checking regexp rules", priv->mlfi_id); - act = rules_check (priv->matched_rules); - if (act != NULL && act->type != ACTION_ACCEPT) { - CFG_UNLOCK (); - return set_reply (ctx, act); - } - } - else { - msg_warn ("<%s>; mlfi_eom: config was reloaded, not checking rules", priv->mlfi_id); - } - if (priv->complete_to_beanstalk) { /* Set actual pos to send all message to beanstalk */ priv->eoh_pos = ftell (priv->fileh); @@ -1437,8 +1325,6 @@ mlfi_eom(SMFICTX * ctx) /* Maybe write message copy */ if (cfg->cache_servers_copy > 0 && cfg->cache_copy_channel) { - struct rmilter_rng_state *st = get_prng_state (); - prob_cur = cfg->cache_copy_prob; /* Normalize */ prob_max = 100; @@ -1447,7 +1333,7 @@ mlfi_eom(SMFICTX * ctx) prob_cur *= 10; } - if (prng_next (st) % prob_max <= prob_cur) { + if (prng_next (rng_state) % prob_max <= prob_cur) { publish_message (priv, RMILTER_PUBLISH_COPY, extra_buf, sizeof (extra_buf)); } @@ -1529,8 +1415,16 @@ mlfi_eom(SMFICTX * ctx) rate_check (priv, cfg, rcpt->r_addr, 1); } #endif - dkim_result = "not signed, ignored"; + #ifdef WITH_DKIM + if (cfg->dkim_enable) { + dkim_result = "not signed, ignored"; + } + else { + dkim_result = "not signed, disabled"; + } + + /* Add dkim signature */ char *hdr; size_t len; @@ -1570,6 +1464,8 @@ mlfi_eom(SMFICTX * ctx) dkim_result = "not signed, internal failure"; } } +#else + dkim_result = "not signed, not compiled with dkim"; #endif end: @@ -1691,7 +1587,6 @@ static sfsistat mlfi_body(SMFICTX * ctx, u_char * bodyp, size_t bodylen) { struct mlfi_priv *priv; - struct rule *act; if ((priv = (struct mlfi_priv *) smfi_getpriv (ctx)) == NULL) { msg_err ("Internal error: smfi_getpriv() returns NULL"); @@ -1719,10 +1614,6 @@ mlfi_body(SMFICTX * ctx, u_char * bodyp, size_t bodylen) priv->priv_cur_body.len = bodylen; CFG_RLOCK(); - act = regexp_check (cfg, priv, STAGE_BODY); - if (act != NULL) { - priv->matched_rules[STAGE_BODY] = act; - } /* continue processing */ #ifdef WITH_DKIM int r; @@ -1808,7 +1699,3 @@ check_dcc (const struct mlfi_priv *priv) return dccres; } #endif - -/* - * vi:ts=4 - */ diff --git a/src/rmilter.h b/src/rmilter.h index 4f7c7d9..af316aa 100644 --- a/src/rmilter.h +++ b/src/rmilter.h @@ -51,6 +51,13 @@ #define XCODE_REJECT "5.7.1" #define XCODE_TEMPFAIL "4.7.1" + +struct rmilter_rng_state { + pthread_mutex_t mtx; + uint64_t s[16]; + int p; +}; + /* Structures and macros used */ struct rcpt { char r_addr[ADDRLEN + 1];