From 50e0da08040d54fd93e7f3d70252e18e5d30dd8f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 6 Aug 2015 13:14:37 +0530 Subject: [PATCH] Implement decoding of .xz files LZMA decoding code taken from the public domain LZMA SDK by Igor Pavlov. --- COPYRIGHT | 12 +- setup/extensions.py | 5 + src/calibre/constants.py | 1 + src/lzma/7zTypes.h | 256 +++++++++ src/lzma/Compiler.h | 31 ++ src/lzma/Lzma2Dec.c | 378 +++++++++++++ src/lzma/Lzma2Dec.h | 80 +++ src/lzma/LzmaDec.c | 1091 ++++++++++++++++++++++++++++++++++++++ src/lzma/LzmaDec.h | 227 ++++++++ src/lzma/Precomp.h | 10 + src/lzma/__init__.py | 10 + src/lzma/lzma_binding.c | 189 +++++++ src/lzma/xz.py | 350 ++++++++++++ 13 files changed, 2638 insertions(+), 2 deletions(-) create mode 100644 src/lzma/7zTypes.h create mode 100644 src/lzma/Compiler.h create mode 100644 src/lzma/Lzma2Dec.c create mode 100644 src/lzma/Lzma2Dec.h create mode 100644 src/lzma/LzmaDec.c create mode 100644 src/lzma/LzmaDec.h create mode 100644 src/lzma/Precomp.h create mode 100644 src/lzma/__init__.py create mode 100644 src/lzma/lzma_binding.c create mode 100644 src/lzma/xz.py diff --git a/COPYRIGHT b/COPYRIGHT index fab8b6d057c9..82c2b977d1de 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -8,8 +8,8 @@ Files: src/duktape/* Copyright: Various License: MIT -Files: resources/rapydscript/compiler.js -Copyright: Alexander Tsepkov +Files: resources/rapydscript/* +Copyright: Various License: BSD Files: src/unrar/* @@ -21,6 +21,14 @@ Files: src/html5lib/* Copyright: Copyright (c) 2006-2013 James Graham and other contributors License: Expat +Files: src/lzma/* +Copyright: Igor Pavlov +License: Public Domain + +Files: src/lzma/*.py src/lzma/lzma_binding.c +Copyright: Kovid Goyal +License: GPLv3 + Files: src/templite/* Copyright: Copyright (c) 2009 joonis new media, Thimo Kraemer License: GPL-2+ diff --git a/setup/extensions.py b/setup/extensions.py index 53dfce3bcb4c..28cc4aa89d30 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -66,6 +66,11 @@ def preflight(self, obj_dir, compiler, linker, builder, cflags, ldflags): extensions = [ + Extension('lzma_binding', + glob.glob(os.path.join(SRC, 'lzma', '*.c')), + headers=glob.glob(os.path.join(SRC, 'lzma', '*.h')), + ), + Extension('dukpy', ['duktape/%s.c' % x for x in 'errors context conversions proxy module duktape/duktape'.split()], headers=['duktape/dukpy.h', 'duktape/duktape/duktape.h'], diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 32c4b6c08b79..a1ecbf40a1a7 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -147,6 +147,7 @@ def __init__(self): 'tokenizer', 'certgen', 'dukpy', + 'lzma_binding', ] if iswindows: plugins.extend(['winutil', 'wpd', 'winfonts']) diff --git a/src/lzma/7zTypes.h b/src/lzma/7zTypes.h new file mode 100644 index 000000000000..903047b10fb3 --- /dev/null +++ b/src/lzma/7zTypes.h @@ -0,0 +1,256 @@ +/* 7zTypes.h -- Basic types +2013-11-12 : Igor Pavlov : Public domain */ + +#ifndef __7Z_TYPES_H +#define __7Z_TYPES_H + +#ifdef _WIN32 +/* #include */ +#endif + +#include + +#ifndef EXTERN_C_BEGIN +#ifdef __cplusplus +#define EXTERN_C_BEGIN extern "C" { +#define EXTERN_C_END } +#else +#define EXTERN_C_BEGIN +#define EXTERN_C_END +#endif +#endif + +EXTERN_C_BEGIN + +#define SZ_OK 0 + +#define SZ_ERROR_DATA 1 +#define SZ_ERROR_MEM 2 +#define SZ_ERROR_CRC 3 +#define SZ_ERROR_UNSUPPORTED 4 +#define SZ_ERROR_PARAM 5 +#define SZ_ERROR_INPUT_EOF 6 +#define SZ_ERROR_OUTPUT_EOF 7 +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 +#define SZ_ERROR_PROGRESS 10 +#define SZ_ERROR_FAIL 11 +#define SZ_ERROR_THREAD 12 + +#define SZ_ERROR_ARCHIVE 16 +#define SZ_ERROR_NO_ARCHIVE 17 + +typedef int SRes; + +#ifdef _WIN32 +/* typedef DWORD WRes; */ +typedef unsigned WRes; +#else +typedef int WRes; +#endif + +#ifndef RINOK +#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } +#endif + +typedef unsigned char Byte; +typedef short Int16; +typedef unsigned short UInt16; + +#ifdef _LZMA_UINT32_IS_ULONG +typedef long Int32; +typedef unsigned long UInt32; +#else +typedef int Int32; +typedef unsigned int UInt32; +#endif + +#ifdef _SZ_NO_INT_64 + +/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. + NOTES: Some code will work incorrectly in that case! */ + +typedef long Int64; +typedef unsigned long UInt64; + +#else + +#if defined(_MSC_VER) || defined(__BORLANDC__) +typedef __int64 Int64; +typedef unsigned __int64 UInt64; +#define UINT64_CONST(n) n +#else +typedef long long int Int64; +typedef unsigned long long int UInt64; +#define UINT64_CONST(n) n ## ULL +#endif + +#endif + +#ifdef _LZMA_NO_SYSTEM_SIZE_T +typedef UInt32 SizeT; +#else +typedef size_t SizeT; +#endif + +typedef int Bool; +#define True 1 +#define False 0 + + +#ifdef _WIN32 +#define MY_STD_CALL __stdcall +#else +#define MY_STD_CALL +#endif + +#ifdef _MSC_VER + +#if _MSC_VER >= 1300 +#define MY_NO_INLINE __declspec(noinline) +#else +#define MY_NO_INLINE +#endif + +#define MY_CDECL __cdecl +#define MY_FAST_CALL __fastcall + +#else + +#define MY_NO_INLINE +#define MY_CDECL +#define MY_FAST_CALL + +#endif + + +/* The following interfaces use first parameter as pointer to structure */ + +typedef struct +{ + Byte (*Read)(void *p); /* reads one byte, returns 0 in case of EOF or error */ +} IByteIn; + +typedef struct +{ + void (*Write)(void *p, Byte b); +} IByteOut; + +typedef struct +{ + SRes (*Read)(void *p, void *buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) < input(*size)) is allowed */ +} ISeqInStream; + +/* it can return SZ_ERROR_INPUT_EOF */ +SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size); +SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf); + +typedef struct +{ + size_t (*Write)(void *p, const void *buf, size_t size); + /* Returns: result - the number of actually written bytes. + (result < size) means error */ +} ISeqOutStream; + +typedef enum +{ + SZ_SEEK_SET = 0, + SZ_SEEK_CUR = 1, + SZ_SEEK_END = 2 +} ESzSeek; + +typedef struct +{ + SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); +} ISeekInStream; + +typedef struct +{ + SRes (*Look)(void *p, const void **buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) > input(*size)) is not allowed + (output(*size) < input(*size)) is allowed */ + SRes (*Skip)(void *p, size_t offset); + /* offset must be <= output(*size) of Look */ + + SRes (*Read)(void *p, void *buf, size_t *size); + /* reads directly (without buffer). It's same as ISeqInStream::Read */ + SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); +} ILookInStream; + +SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset); + +/* reads via ILookInStream::Read */ +SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size); + +#define LookToRead_BUF_SIZE (1 << 14) + +typedef struct +{ + ILookInStream s; + ISeekInStream *realStream; + size_t pos; + size_t size; + Byte buf[LookToRead_BUF_SIZE]; +} CLookToRead; + +void LookToRead_CreateVTable(CLookToRead *p, int lookahead); +void LookToRead_Init(CLookToRead *p); + +typedef struct +{ + ISeqInStream s; + ILookInStream *realStream; +} CSecToLook; + +void SecToLook_CreateVTable(CSecToLook *p); + +typedef struct +{ + ISeqInStream s; + ILookInStream *realStream; +} CSecToRead; + +void SecToRead_CreateVTable(CSecToRead *p); + +typedef struct +{ + SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize); + /* Returns: result. (result != SZ_OK) means break. + Value (UInt64)(Int64)-1 for size means unknown value. */ +} ICompressProgress; + +typedef struct +{ + void *(*Alloc)(void *p, size_t size); + void (*Free)(void *p, void *address); /* address can be 0 */ +} ISzAlloc; + +#define IAlloc_Alloc(p, size) (p)->Alloc((p), size) +#define IAlloc_Free(p, a) (p)->Free((p), a) + +#ifdef _WIN32 + +#define CHAR_PATH_SEPARATOR '\\' +#define WCHAR_PATH_SEPARATOR L'\\' +#define STRING_PATH_SEPARATOR "\\" +#define WSTRING_PATH_SEPARATOR L"\\" + +#else + +#define CHAR_PATH_SEPARATOR '/' +#define WCHAR_PATH_SEPARATOR L'/' +#define STRING_PATH_SEPARATOR "/" +#define WSTRING_PATH_SEPARATOR L"/" + +#endif + +EXTERN_C_END + +#endif diff --git a/src/lzma/Compiler.h b/src/lzma/Compiler.h new file mode 100644 index 000000000000..37589a0e24e4 --- /dev/null +++ b/src/lzma/Compiler.h @@ -0,0 +1,31 @@ +/* Compiler.h +2015-03-25 : Igor Pavlov : Public domain */ + +#ifndef __7Z_COMPILER_H +#define __7Z_COMPILER_H + +#ifdef _MSC_VER + + #ifdef UNDER_CE + #define RPC_NO_WINDOWS_H + /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */ + #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union + #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int + #endif + + #if _MSC_VER >= 1300 + #pragma warning(disable : 4996) // This function or variable may be unsafe + #else + #pragma warning(disable : 4511) // copy constructor could not be generated + #pragma warning(disable : 4512) // assignment operator could not be generated + #pragma warning(disable : 4702) // unreachable code + #pragma warning(disable : 4710) // not inlined + #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information + #endif + +#endif + +#define UNUSED_VAR(x) (void)x; +/* #define UNUSED_VAR(x) x=x; */ + +#endif diff --git a/src/lzma/Lzma2Dec.c b/src/lzma/Lzma2Dec.c new file mode 100644 index 000000000000..0e91195399cc --- /dev/null +++ b/src/lzma/Lzma2Dec.c @@ -0,0 +1,378 @@ +/* Lzma2Dec.c -- LZMA2 Decoder +2014-10-29 : Igor Pavlov : Public domain */ + +/* #define SHOW_DEBUG_INFO */ + +#include "Precomp.h" + +#ifdef SHOW_DEBUG_INFO +#include +#endif + +#include + +#include "Lzma2Dec.h" + +/* +00000000 - EOS +00000001 U U - Uncompressed Reset Dic +00000010 U U - Uncompressed No Reset +100uuuuu U U P P - LZMA no reset +101uuuuu U U P P - LZMA reset state +110uuuuu U U P P S - LZMA reset state + new prop +111uuuuu U U P P S - LZMA reset state + new prop + reset dic + + u, U - Unpack Size + P - Pack Size + S - Props +*/ + +#define LZMA2_CONTROL_LZMA (1 << 7) +#define LZMA2_CONTROL_COPY_NO_RESET 2 +#define LZMA2_CONTROL_COPY_RESET_DIC 1 +#define LZMA2_CONTROL_EOF 0 + +#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & LZMA2_CONTROL_LZMA) == 0) + +#define LZMA2_GET_LZMA_MODE(p) (((p)->control >> 5) & 3) +#define LZMA2_IS_THERE_PROP(mode) ((mode) >= 2) + +#define LZMA2_LCLP_MAX 4 +#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11)) + +#ifdef SHOW_DEBUG_INFO +#define PRF(x) x +#else +#define PRF(x) +#endif + +typedef enum +{ + LZMA2_STATE_CONTROL, + LZMA2_STATE_UNPACK0, + LZMA2_STATE_UNPACK1, + LZMA2_STATE_PACK0, + LZMA2_STATE_PACK1, + LZMA2_STATE_PROP, + LZMA2_STATE_DATA, + LZMA2_STATE_DATA_CONT, + LZMA2_STATE_FINISHED, + LZMA2_STATE_ERROR +} ELzma2State; + +static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props) +{ + UInt32 dicSize; + if (prop > 40) + return SZ_ERROR_UNSUPPORTED; + dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop); + props[0] = (Byte)LZMA2_LCLP_MAX; + props[1] = (Byte)(dicSize); + props[2] = (Byte)(dicSize >> 8); + props[3] = (Byte)(dicSize >> 16); + props[4] = (Byte)(dicSize >> 24); + return SZ_OK; +} + +SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAlloc *alloc) +{ + Byte props[LZMA_PROPS_SIZE]; + RINOK(Lzma2Dec_GetOldProps(prop, props)); + return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc); +} + +SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAlloc *alloc) +{ + Byte props[LZMA_PROPS_SIZE]; + RINOK(Lzma2Dec_GetOldProps(prop, props)); + return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc); +} + +void Lzma2Dec_Init(CLzma2Dec *p) +{ + p->state = LZMA2_STATE_CONTROL; + p->needInitDic = True; + p->needInitState = True; + p->needInitProp = True; + LzmaDec_Init(&p->decoder); +} + +static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) +{ + switch (p->state) + { + case LZMA2_STATE_CONTROL: + p->control = b; + PRF(printf("\n %4X ", p->decoder.dicPos)); + PRF(printf(" %2X", b)); + if (p->control == 0) + return LZMA2_STATE_FINISHED; + if (LZMA2_IS_UNCOMPRESSED_STATE(p)) + { + if ((p->control & 0x7F) > 2) + return LZMA2_STATE_ERROR; + p->unpackSize = 0; + } + else + p->unpackSize = (UInt32)(p->control & 0x1F) << 16; + return LZMA2_STATE_UNPACK0; + + case LZMA2_STATE_UNPACK0: + p->unpackSize |= (UInt32)b << 8; + return LZMA2_STATE_UNPACK1; + + case LZMA2_STATE_UNPACK1: + p->unpackSize |= (UInt32)b; + p->unpackSize++; + PRF(printf(" %8d", p->unpackSize)); + return (LZMA2_IS_UNCOMPRESSED_STATE(p)) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0; + + case LZMA2_STATE_PACK0: + p->packSize = (UInt32)b << 8; + return LZMA2_STATE_PACK1; + + case LZMA2_STATE_PACK1: + p->packSize |= (UInt32)b; + p->packSize++; + PRF(printf(" %8d", p->packSize)); + return LZMA2_IS_THERE_PROP(LZMA2_GET_LZMA_MODE(p)) ? LZMA2_STATE_PROP: + (p->needInitProp ? LZMA2_STATE_ERROR : LZMA2_STATE_DATA); + + case LZMA2_STATE_PROP: + { + unsigned lc, lp; + if (b >= (9 * 5 * 5)) + return LZMA2_STATE_ERROR; + lc = b % 9; + b /= 9; + p->decoder.prop.pb = b / 5; + lp = b % 5; + if (lc + lp > LZMA2_LCLP_MAX) + return LZMA2_STATE_ERROR; + p->decoder.prop.lc = lc; + p->decoder.prop.lp = lp; + p->needInitProp = False; + return LZMA2_STATE_DATA; + } + } + return LZMA2_STATE_ERROR; +} + +static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size) +{ + memcpy(p->dic + p->dicPos, src, size); + p->dicPos += size; + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size) + p->checkDicSize = p->prop.dicSize; + p->processedPos += (UInt32)size; +} + +void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState); + +SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT inSize = *srcLen; + *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + + while (p->state != LZMA2_STATE_FINISHED) + { + SizeT dicPos = p->decoder.dicPos; + + if (p->state == LZMA2_STATE_ERROR) + return SZ_ERROR_DATA; + + if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + + if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT) + { + if (*srcLen == inSize) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + (*srcLen)++; + p->state = Lzma2Dec_UpdateState(p, *src++); + + if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED) + { + p->state = LZMA2_STATE_ERROR; + return SZ_ERROR_DATA; + } + continue; + } + + { + SizeT destSizeCur = dicLimit - dicPos; + SizeT srcSizeCur = inSize - *srcLen; + ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY; + + if (p->unpackSize <= destSizeCur) + { + destSizeCur = (SizeT)p->unpackSize; + curFinishMode = LZMA_FINISH_END; + } + + if (LZMA2_IS_UNCOMPRESSED_STATE(p)) + { + if (*srcLen == inSize) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + if (p->state == LZMA2_STATE_DATA) + { + Bool initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC); + if (initDic) + p->needInitProp = p->needInitState = True; + else if (p->needInitDic) + { + p->state = LZMA2_STATE_ERROR; + return SZ_ERROR_DATA; + } + p->needInitDic = False; + LzmaDec_InitDicAndState(&p->decoder, initDic, False); + } + + if (srcSizeCur > destSizeCur) + srcSizeCur = destSizeCur; + + if (srcSizeCur == 0) + { + p->state = LZMA2_STATE_ERROR; + return SZ_ERROR_DATA; + } + + LzmaDec_UpdateWithUncompressed(&p->decoder, src, srcSizeCur); + + src += srcSizeCur; + *srcLen += srcSizeCur; + p->unpackSize -= (UInt32)srcSizeCur; + p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT; + } + else + { + SizeT outSizeProcessed; + SRes res; + + if (p->state == LZMA2_STATE_DATA) + { + unsigned mode = LZMA2_GET_LZMA_MODE(p); + Bool initDic = (mode == 3); + Bool initState = (mode != 0); + if ((!initDic && p->needInitDic) || (!initState && p->needInitState)) + { + p->state = LZMA2_STATE_ERROR; + return SZ_ERROR_DATA; + } + + LzmaDec_InitDicAndState(&p->decoder, initDic, initState); + p->needInitDic = False; + p->needInitState = False; + p->state = LZMA2_STATE_DATA_CONT; + } + + if (srcSizeCur > p->packSize) + srcSizeCur = (SizeT)p->packSize; + + res = LzmaDec_DecodeToDic(&p->decoder, dicPos + destSizeCur, src, &srcSizeCur, curFinishMode, status); + + src += srcSizeCur; + *srcLen += srcSizeCur; + p->packSize -= (UInt32)srcSizeCur; + + outSizeProcessed = p->decoder.dicPos - dicPos; + p->unpackSize -= (UInt32)outSizeProcessed; + + RINOK(res); + if (*status == LZMA_STATUS_NEEDS_MORE_INPUT) + return res; + + if (srcSizeCur == 0 && outSizeProcessed == 0) + { + if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + || p->unpackSize != 0 + || p->packSize != 0) + { + p->state = LZMA2_STATE_ERROR; + return SZ_ERROR_DATA; + } + p->state = LZMA2_STATE_CONTROL; + } + + if (*status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK) + *status = LZMA_STATUS_NOT_FINISHED; + } + } + } + + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; +} + +SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT outSize = *destLen, inSize = *srcLen; + *srcLen = *destLen = 0; + for (;;) + { + SizeT srcSizeCur = inSize, outSizeCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + if (p->decoder.dicPos == p->decoder.dicBufSize) + p->decoder.dicPos = 0; + dicPos = p->decoder.dicPos; + if (outSize > p->decoder.dicBufSize - dicPos) + { + outSizeCur = p->decoder.dicBufSize; + curFinishMode = LZMA_FINISH_ANY; + } + else + { + outSizeCur = dicPos + outSize; + curFinishMode = finishMode; + } + + res = Lzma2Dec_DecodeToDic(p, outSizeCur, src, &srcSizeCur, curFinishMode, status); + src += srcSizeCur; + inSize -= srcSizeCur; + *srcLen += srcSizeCur; + outSizeCur = p->decoder.dicPos - dicPos; + memcpy(dest, p->decoder.dic + dicPos, outSizeCur); + dest += outSizeCur; + outSize -= outSizeCur; + *destLen += outSizeCur; + if (res != 0) + return res; + if (outSizeCur == 0 || outSize == 0) + return SZ_OK; + } +} + +SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc) +{ + CLzma2Dec p; + SRes res; + SizeT outSize = *destLen, inSize = *srcLen; + *destLen = *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + Lzma2Dec_Construct(&p); + RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc)); + p.decoder.dic = dest; + p.decoder.dicBufSize = outSize; + Lzma2Dec_Init(&p); + *srcLen = inSize; + res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + *destLen = p.decoder.dicPos; + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + Lzma2Dec_FreeProbs(&p, alloc); + return res; +} diff --git a/src/lzma/Lzma2Dec.h b/src/lzma/Lzma2Dec.h new file mode 100644 index 000000000000..026cdefe9944 --- /dev/null +++ b/src/lzma/Lzma2Dec.h @@ -0,0 +1,80 @@ +/* Lzma2Dec.h -- LZMA2 Decoder +2015-05-13 : Igor Pavlov : Public domain */ + +#ifndef __LZMA2_DEC_H +#define __LZMA2_DEC_H + +#include "LzmaDec.h" + +EXTERN_C_BEGIN + +/* ---------- State Interface ---------- */ + +typedef struct +{ + CLzmaDec decoder; + UInt32 packSize; + UInt32 unpackSize; + unsigned state; + Byte control; + Bool needInitDic; + Bool needInitState; + Bool needInitProp; +} CLzma2Dec; + +#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder) +#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc); +#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc); + +SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAlloc *alloc); +SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAlloc *alloc); +void Lzma2Dec_Init(CLzma2Dec *p); + + +/* +finishMode: + It has meaning only if the decoding reaches output limit (*destLen or dicLimit). + LZMA_FINISH_ANY - use smallest number of input bytes + LZMA_FINISH_END - read EndOfStream marker after decoding + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + SZ_ERROR_DATA - Data error +*/ + +SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + +SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- One Call Interface ---------- */ + +/* +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - use smallest number of input bytes + LZMA_FINISH_END - read EndOfStream marker after decoding + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). +*/ + +SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAlloc *alloc); + +EXTERN_C_END + +#endif diff --git a/src/lzma/LzmaDec.c b/src/lzma/LzmaDec.c new file mode 100644 index 000000000000..2ef641024c07 --- /dev/null +++ b/src/lzma/LzmaDec.c @@ -0,0 +1,1091 @@ +/* LzmaDec.c -- LZMA Decoder +2015-05-14 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "LzmaDec.h" + +#include + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +#define RC_INIT_SIZE 5 + +#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) +#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); +#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); +#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ + { UPDATE_0(p); i = (i + i); A0; } else \ + { UPDATE_1(p); i = (i + i) + 1; A1; } +#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;) + +#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); } +#define TREE_DECODE(probs, limit, i) \ + { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } + +/* #define _LZMA_SIZE_OPT */ + +#ifdef _LZMA_SIZE_OPT +#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) +#else +#define TREE_6_DECODE(probs, i) \ + { i = 1; \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + i -= 0x40; } +#endif + +#define NORMAL_LITER_DEC GET_BIT(prob + symbol, symbol) +#define MATCHED_LITER_DEC \ + matchByte <<= 1; \ + bit = (matchByte & offs); \ + probLit = prob + offs + bit + symbol; \ + GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) + +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) +#define UPDATE_0_CHECK range = bound; +#define UPDATE_1_CHECK range -= bound; code -= bound; +#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ + { UPDATE_0_CHECK; i = (i + i); A0; } else \ + { UPDATE_1_CHECK; i = (i + i) + 1; A1; } +#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) +#define TREE_DECODE_CHECK(probs, limit, i) \ + { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } + + +#define kNumPosBitsMax 4 +#define kNumPosStatesMax (1 << kNumPosBitsMax) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumMidBits 3 +#define kLenNumMidSymbols (1 << kLenNumMidBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define LenChoice 0 +#define LenChoice2 (LenChoice + 1) +#define LenLow (LenChoice2 + 1) +#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) +#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) +#define kNumLenProbs (LenHigh + kLenNumHighSymbols) + + +#define kNumStates 12 +#define kNumLitStates 7 + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +#define kNumPosSlotBits 6 +#define kNumLenToPosStates 4 + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) + +#define kMatchMinLen 2 +#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) + +#define IsMatch 0 +#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) +#define IsRepG0 (IsRep + kNumStates) +#define IsRepG1 (IsRepG0 + kNumStates) +#define IsRepG2 (IsRepG1 + kNumStates) +#define IsRep0Long (IsRepG2 + kNumStates) +#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) +#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) +#define LenCoder (Align + kAlignTableSize) +#define RepLenCoder (LenCoder + kNumLenProbs) +#define Literal (RepLenCoder + kNumLenProbs) + +#define LZMA_BASE_SIZE 1846 +#define LZMA_LIT_SIZE 0x300 + +#if Literal != LZMA_BASE_SIZE +StopCompilingDueBUG +#endif + +#define LzmaProps_GetNumProbs(p) (Literal + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + +#define LZMA_DIC_MIN (1 << 12) + +/* First LZMA-symbol is always decoded. +And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization +Out: + Result: + SZ_OK - OK + SZ_ERROR_DATA - Error + p->remainLen: + < kMatchSpecLenStart : normal remain + = kMatchSpecLenStart : finished + = kMatchSpecLenStart + 1 : Flush marker (unused now) + = kMatchSpecLenStart + 2 : State Init Marker (unused now) +*/ + +static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + CLzmaProb *probs = p->probs; + + unsigned state = p->state; + UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; + unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; + unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; + unsigned lc = p->prop.lc; + + Byte *dic = p->dic; + SizeT dicBufSize = p->dicBufSize; + SizeT dicPos = p->dicPos; + + UInt32 processedPos = p->processedPos; + UInt32 checkDicSize = p->checkDicSize; + unsigned len = 0; + + const Byte *buf = p->buf; + UInt32 range = p->range; + UInt32 code = p->code; + + do + { + CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = processedPos & pbMask; + + prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + IF_BIT_0(prob) + { + unsigned symbol; + UPDATE_0(prob); + prob = probs + Literal; + if (processedPos != 0 || checkDicSize != 0) + prob += ((UInt32)LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + + (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); + processedPos++; + + if (state < kNumLitStates) + { + state -= (state < 4) ? state : 3; + symbol = 1; + #ifdef _LZMA_SIZE_OPT + do { NORMAL_LITER_DEC } while (symbol < 0x100); + #else + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + #endif + } + else + { + unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + unsigned offs = 0x100; + state -= (state < 10) ? 3 : 6; + symbol = 1; + #ifdef _LZMA_SIZE_OPT + do + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + } + while (symbol < 0x100); + #else + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + } + #endif + } + + dic[dicPos++] = (Byte)symbol; + continue; + } + + { + UPDATE_1(prob); + prob = probs + IsRep + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + state += kNumStates; + prob = probs + LenCoder; + } + else + { + UPDATE_1(prob); + if (checkDicSize == 0 && processedPos == 0) + return SZ_ERROR_DATA; + prob = probs + IsRepG0 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + IF_BIT_0(prob) + { + UPDATE_0(prob); + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + processedPos++; + state = state < kNumLitStates ? 9 : 11; + continue; + } + UPDATE_1(prob); + } + else + { + UInt32 distance; + UPDATE_1(prob); + prob = probs + IsRepG1 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep1; + } + else + { + UPDATE_1(prob); + prob = probs + IsRepG2 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep2; + } + else + { + UPDATE_1(prob); + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < kNumLitStates ? 8 : 11; + prob = probs + RepLenCoder; + } + + #ifdef _LZMA_SIZE_OPT + { + unsigned limit, offset; + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + (posState << kLenNumLowBits); + offset = 0; + limit = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenMid + (posState << kLenNumMidBits); + offset = kLenNumLowSymbols; + limit = (1 << kLenNumMidBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenHigh; + offset = kLenNumLowSymbols + kLenNumMidSymbols; + limit = (1 << kLenNumHighBits); + } + } + TREE_DECODE(probLen, limit, len); + len += offset; + } + #else + { + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + (posState << kLenNumLowBits); + len = 1; + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + len -= 8; + } + else + { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenMid + (posState << kLenNumMidBits); + len = 1; + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenHigh; + TREE_DECODE(probLen, (1 << kLenNumHighBits), len); + len += kLenNumLowSymbols + kLenNumMidSymbols; + } + } + } + #endif + + if (state >= kNumStates) + { + UInt32 distance; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); + TREE_6_DECODE(prob, distance); + if (distance >= kStartPosModelIndex) + { + unsigned posSlot = (unsigned)distance; + unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); + distance = (2 | (distance & 1)); + if (posSlot < kEndPosModelIndex) + { + distance <<= numDirectBits; + prob = probs + SpecPos + distance - posSlot - 1; + { + UInt32 mask = 1; + unsigned i = 1; + do + { + GET_BIT2(prob + i, i, ; , distance |= mask); + mask <<= 1; + } + while (--numDirectBits != 0); + } + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE + range >>= 1; + + { + UInt32 t; + code -= range; + t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ + distance = (distance << 1) + (t + 1); + code += range & t; + } + /* + distance <<= 1; + if (code >= range) + { + code -= range; + distance |= 1; + } + */ + } + while (--numDirectBits != 0); + prob = probs + Align; + distance <<= kNumAlignBits; + { + unsigned i = 1; + GET_BIT2(prob + i, i, ; , distance |= 1); + GET_BIT2(prob + i, i, ; , distance |= 2); + GET_BIT2(prob + i, i, ; , distance |= 4); + GET_BIT2(prob + i, i, ; , distance |= 8); + } + if (distance == (UInt32)0xFFFFFFFF) + { + len += kMatchSpecLenStart; + state -= kNumStates; + break; + } + } + } + + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance + 1; + if (checkDicSize == 0) + { + if (distance >= processedPos) + return SZ_ERROR_DATA; + } + else if (distance >= checkDicSize) + return SZ_ERROR_DATA; + state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + } + + len += kMatchMinLen; + + { + SizeT rem; + unsigned curLen; + SizeT pos; + + if ((rem = limit - dicPos) == 0) + return SZ_ERROR_DATA; + + curLen = ((rem < len) ? (unsigned)rem : len); + pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); + + processedPos += curLen; + + len -= curLen; + if (curLen <= dicBufSize - pos) + { + Byte *dest = dic + dicPos; + ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; + const Byte *lim = dest + curLen; + dicPos += curLen; + do + *(dest) = (Byte)*(dest + src); + while (++dest != lim); + } + else + { + do + { + dic[dicPos++] = dic[pos]; + if (++pos == dicBufSize) + pos = 0; + } + while (--curLen != 0); + } + } + } + } + while (dicPos < limit && buf < bufLimit); + + NORMALIZE; + + p->buf = buf; + p->range = range; + p->code = code; + p->remainLen = len; + p->dicPos = dicPos; + p->processedPos = processedPos; + p->reps[0] = rep0; + p->reps[1] = rep1; + p->reps[2] = rep2; + p->reps[3] = rep3; + p->state = state; + + return SZ_OK; +} + +static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) +{ + if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) + { + Byte *dic = p->dic; + SizeT dicPos = p->dicPos; + SizeT dicBufSize = p->dicBufSize; + unsigned len = p->remainLen; + SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ + SizeT rem = limit - dicPos; + if (rem < len) + len = (unsigned)(rem); + + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) + p->checkDicSize = p->prop.dicSize; + + p->processedPos += len; + p->remainLen -= len; + while (len != 0) + { + len--; + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + } + p->dicPos = dicPos; + } +} + +static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + do + { + SizeT limit2 = limit; + if (p->checkDicSize == 0) + { + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit2 = p->dicPos + rem; + } + + RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); + + if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) + p->checkDicSize = p->prop.dicSize; + + LzmaDec_WriteRem(p, limit); + } + while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); + + if (p->remainLen > kMatchSpecLenStart) + p->remainLen = kMatchSpecLenStart; + + return 0; +} + +typedef enum +{ + DUMMY_ERROR, /* unexpected end of input stream */ + DUMMY_LIT, + DUMMY_MATCH, + DUMMY_REP +} ELzmaDummy; + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) +{ + UInt32 range = p->range; + UInt32 code = p->code; + const Byte *bufLimit = buf + inSize; + const CLzmaProb *probs = p->probs; + unsigned state = p->state; + ELzmaDummy res; + + { + const CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); + + prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + + /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ + + prob = probs + Literal; + if (p->checkDicSize != 0 || p->processedPos != 0) + prob += ((UInt32)LZMA_LIT_SIZE * + ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + + (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + + if (state < kNumLitStates) + { + unsigned symbol = 1; + do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); + } + else + { + unsigned matchByte = p->dic[p->dicPos - p->reps[0] + + (p->dicPos < p->reps[0] ? p->dicBufSize : 0)]; + unsigned offs = 0x100; + unsigned symbol = 1; + do + { + unsigned bit; + const CLzmaProb *probLit; + matchByte <<= 1; + bit = (matchByte & offs); + probLit = prob + offs + bit + symbol; + GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) + } + while (symbol < 0x100); + } + res = DUMMY_LIT; + } + else + { + unsigned len; + UPDATE_1_CHECK; + + prob = probs + IsRep + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + state = 0; + prob = probs + LenCoder; + res = DUMMY_MATCH; + } + else + { + UPDATE_1_CHECK; + res = DUMMY_REP; + prob = probs + IsRepG0 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + NORMALIZE_CHECK; + return DUMMY_REP; + } + else + { + UPDATE_1_CHECK; + } + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG1 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG2 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + } + } + } + state = kNumStates; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + const CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; + probLen = prob + LenLow + (posState << kLenNumLowBits); + offset = 0; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenChoice2; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; + probLen = prob + LenMid + (posState << kLenNumMidBits); + offset = kLenNumLowSymbols; + limit = 1 << kLenNumMidBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenHigh; + offset = kLenNumLowSymbols + kLenNumMidSymbols; + limit = 1 << kLenNumHighBits; + } + } + TREE_DECODE_CHECK(probLen, limit, len); + len += offset; + } + + if (state < 4) + { + unsigned posSlot; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << + kNumPosSlotBits); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); + if (posSlot >= kStartPosModelIndex) + { + unsigned numDirectBits = ((posSlot >> 1) - 1); + + /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ + + if (posSlot < kEndPosModelIndex) + { + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE_CHECK + range >>= 1; + code -= range & (((code - range) >> 31) - 1); + /* if (code >= range) code -= range; */ + } + while (--numDirectBits != 0); + prob = probs + Align; + numDirectBits = kNumAlignBits; + } + { + unsigned i = 1; + do + { + GET_BIT_CHECK(prob + i, i); + } + while (--numDirectBits != 0); + } + } + } + } + } + NORMALIZE_CHECK; + return res; +} + + +void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState) +{ + p->needFlush = 1; + p->remainLen = 0; + p->tempBufSize = 0; + + if (initDic) + { + p->processedPos = 0; + p->checkDicSize = 0; + p->needInitState = 1; + } + if (initState) + p->needInitState = 1; +} + +void LzmaDec_Init(CLzmaDec *p) +{ + p->dicPos = 0; + LzmaDec_InitDicAndState(p, True, True); +} + +static void LzmaDec_InitStateReal(CLzmaDec *p) +{ + SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); + SizeT i; + CLzmaProb *probs = p->probs; + for (i = 0; i < numProbs; i++) + probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + p->needInitState = 0; +} + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT inSize = *srcLen; + (*srcLen) = 0; + LzmaDec_WriteRem(p, dicLimit); + + *status = LZMA_STATUS_NOT_SPECIFIED; + + while (p->remainLen != kMatchSpecLenStart) + { + int checkEndMarkNow; + + if (p->needFlush) + { + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize < RC_INIT_SIZE) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (p->tempBuf[0] != 0) + return SZ_ERROR_DATA; + p->code = + ((UInt32)p->tempBuf[1] << 24) + | ((UInt32)p->tempBuf[2] << 16) + | ((UInt32)p->tempBuf[3] << 8) + | ((UInt32)p->tempBuf[4]); + p->range = 0xFFFFFFFF; + p->needFlush = 0; + p->tempBufSize = 0; + } + + checkEndMarkNow = 0; + if (p->dicPos >= dicLimit) + { + if (p->remainLen == 0 && p->code == 0) + { + *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; + return SZ_OK; + } + if (finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + if (p->remainLen != 0) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + checkEndMarkNow = 1; + } + + if (p->needInitState) + LzmaDec_InitStateReal(p); + + if (p->tempBufSize == 0) + { + SizeT processed; + const Byte *bufLimit; + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + int dummyRes = LzmaDec_TryDummy(p, src, inSize); + if (dummyRes == DUMMY_ERROR) + { + memcpy(p->tempBuf, src, inSize); + p->tempBufSize = (unsigned)inSize; + (*srcLen) += inSize; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + bufLimit = src; + } + else + bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + p->buf = src; + if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) + return SZ_ERROR_DATA; + processed = (SizeT)(p->buf - src); + (*srcLen) += processed; + src += processed; + inSize -= processed; + } + else + { + unsigned rem = p->tempBufSize, lookAhead = 0; + while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) + p->tempBuf[rem++] = src[lookAhead++]; + p->tempBufSize = rem; + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); + if (dummyRes == DUMMY_ERROR) + { + (*srcLen) += lookAhead; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + } + p->buf = p->tempBuf; + if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) + return SZ_ERROR_DATA; + + { + unsigned kkk = (unsigned)(p->buf - p->tempBuf); + if (rem < kkk) + return SZ_ERROR_FAIL; /* some internal error */ + rem -= kkk; + if (lookAhead < rem) + return SZ_ERROR_FAIL; /* some internal error */ + lookAhead -= rem; + } + (*srcLen) += lookAhead; + src += lookAhead; + inSize -= lookAhead; + p->tempBufSize = 0; + } + } + if (p->code == 0) + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; +} + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT outSize = *destLen; + SizeT inSize = *srcLen; + *srcLen = *destLen = 0; + for (;;) + { + SizeT inSizeCur = inSize, outSizeCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + if (p->dicPos == p->dicBufSize) + p->dicPos = 0; + dicPos = p->dicPos; + if (outSize > p->dicBufSize - dicPos) + { + outSizeCur = p->dicBufSize; + curFinishMode = LZMA_FINISH_ANY; + } + else + { + outSizeCur = dicPos + outSize; + curFinishMode = finishMode; + } + + res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); + src += inSizeCur; + inSize -= inSizeCur; + *srcLen += inSizeCur; + outSizeCur = p->dicPos - dicPos; + memcpy(dest, p->dic + dicPos, outSizeCur); + dest += outSizeCur; + outSize -= outSizeCur; + *destLen += outSizeCur; + if (res != 0) + return res; + if (outSizeCur == 0 || outSize == 0) + return SZ_OK; + } +} + +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc) +{ + alloc->Free(alloc, p->probs); + p->probs = NULL; +} + +static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc) +{ + alloc->Free(alloc, p->dic); + p->dic = NULL; +} + +void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc) +{ + LzmaDec_FreeProbs(p, alloc); + LzmaDec_FreeDict(p, alloc); +} + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) +{ + UInt32 dicSize; + Byte d; + + if (size < LZMA_PROPS_SIZE) + return SZ_ERROR_UNSUPPORTED; + else + dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); + + if (dicSize < LZMA_DIC_MIN) + dicSize = LZMA_DIC_MIN; + p->dicSize = dicSize; + + d = data[0]; + if (d >= (9 * 5 * 5)) + return SZ_ERROR_UNSUPPORTED; + + p->lc = d % 9; + d /= 9; + p->pb = d / 5; + p->lp = d % 5; + + return SZ_OK; +} + +static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc) +{ + UInt32 numProbs = LzmaProps_GetNumProbs(propNew); + if (!p->probs || numProbs != p->numProbs) + { + LzmaDec_FreeProbs(p, alloc); + p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); + p->numProbs = numProbs; + if (!p->probs) + return SZ_ERROR_MEM; + } + return SZ_OK; +} + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) +{ + CLzmaProps propNew; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) +{ + CLzmaProps propNew; + SizeT dicBufSize; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + + { + UInt32 dictSize = propNew.dicSize; + SizeT mask = ((UInt32)1 << 12) - 1; + if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; + else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; + dicBufSize = ((SizeT)dictSize + mask) & ~mask; + if (dicBufSize < dictSize) + dicBufSize = dictSize; + } + + if (!p->dic || dicBufSize != p->dicBufSize) + { + LzmaDec_FreeDict(p, alloc); + p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize); + if (!p->dic) + { + LzmaDec_FreeProbs(p, alloc); + return SZ_ERROR_MEM; + } + } + p->dicBufSize = dicBufSize; + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAlloc *alloc) +{ + CLzmaDec p; + SRes res; + SizeT outSize = *destLen, inSize = *srcLen; + *destLen = *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + if (inSize < RC_INIT_SIZE) + return SZ_ERROR_INPUT_EOF; + LzmaDec_Construct(&p); + RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); + p.dic = dest; + p.dicBufSize = outSize; + LzmaDec_Init(&p); + *srcLen = inSize; + res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + *destLen = p.dicPos; + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + LzmaDec_FreeProbs(&p, alloc); + return res; +} diff --git a/src/lzma/LzmaDec.h b/src/lzma/LzmaDec.h new file mode 100644 index 000000000000..2633abeac9db --- /dev/null +++ b/src/lzma/LzmaDec.h @@ -0,0 +1,227 @@ +/* LzmaDec.h -- LZMA Decoder +2013-01-18 : Igor Pavlov : Public domain */ + +#ifndef __LZMA_DEC_H +#define __LZMA_DEC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* #define _LZMA_PROB32 */ +/* _LZMA_PROB32 can increase the speed on some CPUs, + but memory usage for CLzmaDec::probs will be doubled in that case */ + +#ifdef _LZMA_PROB32 +#define CLzmaProb UInt32 +#else +#define CLzmaProb UInt16 +#endif + + +/* ---------- LZMA Properties ---------- */ + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaProps +{ + unsigned lc, lp, pb; + UInt32 dicSize; +} CLzmaProps; + +/* LzmaProps_Decode - decodes properties +Returns: + SZ_OK + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); + + +/* ---------- LZMA Decoder state ---------- */ + +/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. + Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ + +#define LZMA_REQUIRED_INPUT_MAX 20 + +typedef struct +{ + CLzmaProps prop; + CLzmaProb *probs; + Byte *dic; + const Byte *buf; + UInt32 range, code; + SizeT dicPos; + SizeT dicBufSize; + UInt32 processedPos; + UInt32 checkDicSize; + unsigned state; + UInt32 reps[4]; + unsigned remainLen; + int needFlush; + int needInitState; + UInt32 numProbs; + unsigned tempBufSize; + Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; +} CLzmaDec; + +#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; } + +void LzmaDec_Init(CLzmaDec *p); + +/* There are two types of LZMA streams: + 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. + 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ + +typedef enum +{ + LZMA_FINISH_ANY, /* finish at any point */ + LZMA_FINISH_END /* block must be finished at the end */ +} ELzmaFinishMode; + +/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! + + You must use LZMA_FINISH_END, when you know that current output buffer + covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. + + If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, + and output value of destLen will be less than output buffer size limit. + You can check status result also. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + +typedef enum +{ + LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ + LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ + LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ + LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ +} ELzmaStatus; + +/* ELzmaStatus is used only as output value for function call */ + + +/* ---------- Interfaces ---------- */ + +/* There are 3 levels of interfaces: + 1) Dictionary Interface + 2) Buffer Interface + 3) One Call Interface + You can select any of these interfaces, but don't mix functions from different + groups for same object. */ + + +/* There are two variants to allocate state for Dictionary Interface: + 1) LzmaDec_Allocate / LzmaDec_Free + 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs + You can use variant 2, if you set dictionary buffer manually. + For Buffer Interface you must always use variant 1. + +LzmaDec_Allocate* can return: + SZ_OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc); +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc); + +SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc); +void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); + +/* ---------- Dictionary Interface ---------- */ + +/* You can use it, if you want to eliminate the overhead for data copying from + dictionary to some other external buffer. + You must work with CLzmaDec variables directly in this interface. + + STEPS: + LzmaDec_Constr() + LzmaDec_Allocate() + for (each new stream) + { + LzmaDec_Init() + while (it needs more decompression) + { + LzmaDec_DecodeToDic() + use data from CLzmaDec::dic and update CLzmaDec::dicPos + } + } + LzmaDec_Free() +*/ + +/* LzmaDec_DecodeToDic + + The decoding to internal dictionary buffer (CLzmaDec::dic). + You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! + +finishMode: + It has meaning only if the decoding reaches output limit (dicLimit). + LZMA_FINISH_ANY - Decode just dicLimit bytes. + LZMA_FINISH_END - Stream must be finished after dicLimit. + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error +*/ + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- Buffer Interface ---------- */ + +/* It's zlib-like interface. + See LzmaDec_DecodeToDic description for information about STEPS and return results, + but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need + to work with CLzmaDec variables manually. + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). +*/ + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- One Call Interface ---------- */ + +/* LzmaDecode + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). +*/ + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAlloc *alloc); + +EXTERN_C_END + +#endif diff --git a/src/lzma/Precomp.h b/src/lzma/Precomp.h new file mode 100644 index 000000000000..edb581443982 --- /dev/null +++ b/src/lzma/Precomp.h @@ -0,0 +1,10 @@ +/* Precomp.h -- StdAfx +2013-11-12 : Igor Pavlov : Public domain */ + +#ifndef __7Z_PRECOMP_H +#define __7Z_PRECOMP_H + +#include "Compiler.h" +/* #include "7zTypes.h" */ + +#endif diff --git a/src/lzma/__init__.py b/src/lzma/__init__.py new file mode 100644 index 000000000000..dcd564a80199 --- /dev/null +++ b/src/lzma/__init__.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2015, Kovid Goyal ' + + + diff --git a/src/lzma/lzma_binding.c b/src/lzma/lzma_binding.c new file mode 100644 index 000000000000..e771ede742a3 --- /dev/null +++ b/src/lzma/lzma_binding.c @@ -0,0 +1,189 @@ +/* + * lzma_binding.c + * Copyright (C) 2015 Kovid Goyal + * + * Distributed under terms of the GPL3 license. + */ + +#define PY_SSIZE_T_CLEAN +#define UNICODE +#include "Python.h" +#include "Lzma2Dec.h" + +static void *Alloc(void *p, size_t size) { p = p; return PyMem_Malloc(size); } +static void Free(void *p, void *address) { p = p; PyMem_Free(address); } +static ISzAlloc allocator = { Alloc, Free }; +static const char* error_codes[18] = { + "OK", + "SZ_ERROR_DATA", + "SZ_ERROR_MEM", + "SZ_ERROR_CRC", + "SZ_ERROR_UNSUPPORTED", + "SZ_ERROR_PARAM", + "SZ_ERROR_INPUT_EOF", + "SZ_ERROR_OUTPUT_EOF", + "SZ_ERROR_READ", + "SZ_ERROR_WRITE", + "SZ_ERROR_PROGRESS", + "SZ_ERROR_FAIL", + "SZ_ERROR_THREAD", + "UNKNOWN", "UNKNOWN", "UNKNOWN", + "SZ_ERROR_ARCHIVE", + "SZ_ERROR_NO_ARCHIVE", +}; +#define SET_ERROR(x) PyErr_SetString(LZMAError, ((x) > 0 && (x) < 17) ? error_codes[(x)] : "UNKNOWN") + +static PyObject *LZMAError = NULL; +static UInt64 crc64_table[256]; + +static void init_crc_table() { + static const UInt64 poly64 = (UInt64)(0xC96C5795D7870F42); + for (size_t i = 0; i < 256; ++i) { + UInt64 crc64 = i; + for (size_t j = 0; j < 8; ++j) { + if (crc64 & 1) + crc64 = (crc64 >> 1) ^ poly64; + else + crc64 >>= 1; + } + crc64_table[i] = crc64; + } +} + +static PyObject * +crc64(PyObject *self, PyObject *args) { + unsigned char *data = NULL; + Py_ssize_t size = 0; + UInt64 crc = 0; + if (!PyArg_ParseTuple(args, "s#|K", &data, &size, &crc)) return NULL; + crc = ~crc; + for (size_t i = 0; i < size; ++i) + crc = crc64_table[data[i] ^ (crc & 0xFF)] ^ (crc >> 8); + + return Py_BuildValue("K", ~crc); +} + +static PyObject* +delta_decode(PyObject *self, PyObject *args) { + PyObject *array = NULL, *histarray = NULL; + unsigned char *data = NULL, pos = 0, *history = NULL; + unsigned int distance = 0; + Py_ssize_t datalen = 0; + if (!PyArg_ParseTuple(args, "O!O!BB", &PyByteArray_Type, &array, &PyByteArray_Type, &histarray, &pos, &distance)) return NULL; + if (PyByteArray_GET_SIZE(histarray) != 256) { + PyErr_SetString(PyExc_TypeError, "histarray must be 256 bytes long"); + return NULL; + } + data = (unsigned char*)PyByteArray_AS_STRING(array); history = (unsigned char*)PyByteArray_AS_STRING(histarray); + datalen = PyBytes_GET_SIZE(array); + + for (Py_ssize_t i = 0; i < datalen; i++) { + data[i] += history[(unsigned char)(pos + distance)]; + history[pos--] = data[i]; + } + return Py_BuildValue("B", pos); +} + +static PyObject * +decompress2(PyObject *self, PyObject *args) { + PyObject *read = NULL, *seek = NULL, *write = NULL, *rres = NULL; + unsigned long bufsize = 0, bytes_written = 0, bytes_read = 0, inbuf_pos = 0, inbuf_len = 0, leftover = 0; + unsigned char props = 0; + char *inbuf = NULL, *outbuf = NULL; + CLzma2Dec state; + SRes res = 0; + ELzmaStatus status = LZMA_STATUS_NOT_FINISHED; + + if (!PyArg_ParseTuple(args, "OOOBk", &read, &seek, &write, &props, &bufsize)) return NULL; + + Lzma2Dec_Construct(&state); + res = Lzma2Dec_Allocate(&state, (Byte)props, &allocator); + if (res == SZ_ERROR_MEM) { PyErr_NoMemory(); return NULL; } + if (res != SZ_OK) { PyErr_SetString(PyExc_TypeError, "Incorrect stream properties"); goto exit; } + inbuf = (char*)PyMem_Malloc(bufsize); + outbuf = (char*)PyMem_Malloc(bufsize); + if (!inbuf || !outbuf) {PyErr_NoMemory(); goto exit;} + + Lzma2Dec_Init(&state); + + while (status != LZMA_STATUS_FINISHED_WITH_MARK) { + bytes_written = bufsize; bytes_read = inbuf_len - inbuf_pos; + Py_BEGIN_ALLOW_THREADS; + res = Lzma2Dec_DecodeToBuf(&state, (Byte*)outbuf, &bytes_written, (Byte*)(inbuf) + inbuf_pos, &bytes_read, LZMA_FINISH_ANY, &status); + Py_END_ALLOW_THREADS; + if (res != SZ_OK) { SET_ERROR(res); goto exit; } + if (bytes_written > 0) { + if(!PyObject_CallFunction(write, "s#", outbuf, bytes_written)) goto exit; + } + if (bytes_read > 0) inbuf_pos += bytes_read; + if (status == LZMA_STATUS_NEEDS_MORE_INPUT) { + leftover = inbuf_len - inbuf_pos; + inbuf_pos = 0; + if (!PyObject_CallFunction(seek, "ii", -leftover, SEEK_CUR)) goto exit; + rres = PyObject_CallFunction(read, "n", bufsize); + if (rres == NULL) goto exit; + inbuf_len = PyBytes_GET_SIZE(rres); + if (inbuf_len == 0) { PyErr_SetString(PyExc_ValueError, "LZMA2 block was truncated"); goto exit; } + memcpy(inbuf, PyBytes_AS_STRING(rres), inbuf_len); + Py_DECREF(rres); rres = NULL; + } + } + leftover = inbuf_len - inbuf_pos; + if (leftover > 0) { + if (!PyObject_CallFunction(seek, "ii", -leftover, SEEK_CUR)) goto exit; + } + + +exit: + Lzma2Dec_Free(&state, &allocator); + PyMem_Free(inbuf); PyMem_Free(outbuf); + if (PyErr_Occurred()) return NULL; + Py_RETURN_NONE; +} + +static PyMethodDef lzma_binding_methods[] = { + {"decompress2", decompress2, METH_VARARGS, + "Decompress an LZMA2 encoded block, of unknown compressed size (reads till LZMA2 EOS marker)" + }, + + {"crc64", crc64, METH_VARARGS, + "crc64(bytes) -> CRC 64 for the provided python bytes object" + }, + + {"delta_decode", delta_decode, METH_VARARGS, + "delta_decode(rawarray, histarray, pos, distance) -> Apply the delta decode filter to the bytearray rawarray" + }, + + {NULL, NULL, 0, NULL} +}; + + +PyMODINIT_FUNC +initlzma_binding(void) { + PyObject *m = NULL; + init_crc_table(); + LZMAError = PyErr_NewException("lzma_binding.error", NULL, NULL); + if (!LZMAError) return; + m = Py_InitModule3("lzma_binding", lzma_binding_methods, + "Bindings to the LZMA (de)compression C code" + ); + Py_INCREF(LZMAError); + PyModule_AddObject(m, "error", LZMAError); + PyModule_AddIntMacro(m, SZ_OK); + PyModule_AddIntMacro(m, SZ_ERROR_DATA); + PyModule_AddIntMacro(m, SZ_ERROR_MEM); + PyModule_AddIntMacro(m, SZ_ERROR_CRC); + PyModule_AddIntMacro(m, SZ_ERROR_UNSUPPORTED); + PyModule_AddIntMacro(m, SZ_ERROR_PARAM); + PyModule_AddIntMacro(m, SZ_ERROR_INPUT_EOF); + PyModule_AddIntMacro(m, SZ_ERROR_OUTPUT_EOF); + PyModule_AddIntMacro(m, SZ_ERROR_READ); + PyModule_AddIntMacro(m, SZ_ERROR_WRITE); + PyModule_AddIntMacro(m, SZ_ERROR_PROGRESS); + PyModule_AddIntMacro(m, SZ_ERROR_FAIL); + PyModule_AddIntMacro(m, SZ_ERROR_THREAD); + PyModule_AddIntMacro(m, SZ_ERROR_ARCHIVE); + PyModule_AddIntMacro(m, SZ_ERROR_NO_ARCHIVE); + + if (m == NULL) return; +} diff --git a/src/lzma/xz.py b/src/lzma/xz.py new file mode 100644 index 000000000000..531b76ca06e1 --- /dev/null +++ b/src/lzma/xz.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2015, Kovid Goyal ' + +from collections import namedtuple +from io import BytesIO +from hashlib import sha256 +from struct import unpack, error as struct_error +from binascii import crc32 as _crc32 + +from calibre.constants import plugins +from calibre.ptempfile import SpooledTemporaryFile + +lzma = plugins['lzma_binding'][0] +if not lzma: + raise RuntimeError('Failed to load lzma_binding module with error: %s' % plugins['lzma_binding'][1]) + +HEADER_MAGIC = b'\xfd7zXZ\0' +DELTA_FILTER_ID = 0x03 +LZMA2_FILTER_ID = 0x21 + +def crc32(raw, start=0): + return 0xFFFFFFFF & _crc32(raw, start) + +class XZError(ValueError): + pass + +class NotXZ(XZError): + pass + +class InvalidXZ(XZError): + pass + +def decode_var_int(f): + ans, i, ch = 0, -1, 0x80 + while ch >= 0x80: + ch = ord(f.read(1)) + i += 1 + if ch == 0: + return 0 + ans |= (ch & 0x7f) << (i * 7) + return ans + +def decode_var_int2(raw, pos): + ans, ch, opos = 0, 0x80, pos + while ch >= 0x80: + ch = ord(raw[pos]) + if ch == 0: + return 0, pos + ans |= (ch & 0x7f) << ((pos - opos) * 7) + pos += 1 + return ans, pos + +def encode_var_int(num): + if num == 0: + return b'\0' + buf = bytearray() + a = buf.append + while num != 0: + a(0x80 | (num & 0x7F)) + num >>= 7 + buf[-1] &= 0x7F + return bytes(buf) + +def read_stream_header(f): + try: + magic, stream_flags1, stream_flags2, crc = unpack(b'<6s2BI', f.read(12)) + except struct_error as e: + raise NotXZ('Not an XZ file. Invalid stream header: ' % e) + if magic != HEADER_MAGIC: + raise NotXZ('Not an XZ file. Header Magic is: %r' % magic) + if stream_flags1 != 0: + raise InvalidXZ('Stream flags first byte is not null') + check_type, reserved = 0x0f & stream_flags2, 0xf0 & stream_flags2 + if reserved != 0: + raise InvalidXZ('Stream flags reserved bits not null') + if crc32(bytes(bytearray([stream_flags1, stream_flags2]))) != crc: + raise InvalidXZ('Stream flags header CRC incorrect') + return check_type + +class CRCChecker(object): + + def __init__(self, check_type): + self.code = 0 + if check_type == 0x1: + self.func = crc32 + self.size = 4 + self.fmt = b'= 0x4000000000000000: + raise InvalidXZ('Invalid filter id: %d' % filter_id) + if filter_id not in (LZMA2_FILTER_ID, DELTA_FILTER_ID): + raise InvalidXZ('Unsupported filter ID: 0x%x' % filter_id) + props = header[pos:pos+size_of_properties] + pos += size_of_properties + if len(props) != size_of_properties: + raise InvalidXZ('Incomplete filter properties') + if filter_id == LZMA2_FILTER_ID and number_of_filters: + raise InvalidXZ('LZMA2 filter must be the last filter') + elif filter_id == DELTA_FILTER_ID and not number_of_filters: + raise InvalidXZ('Delta filter cannot be the last filter') + filters.append((LZMA2Filter if filter_id == LZMA2_FILTER_ID else DeltaFilter)(props, check_type)) + padding = header[pos:] + if padding.lstrip(b'\0'): + raise InvalidXZ('Non-null block header padding: %r' % padding) + return filters, compressed_size, uncompressed_size + +def read_block(f, block_header_size_, check_type, outfile): + start_pos = f.tell() - 1 + filters, compressed_size, uncompressed_size = read_block_header(f, block_header_size_, check_type) + fpos, opos = f.tell(), outfile.tell() + filters.reverse() + filters[0](f, outfile, filters[1:]) + actual_compressed_size = f.tell() - fpos + uncompressed_actual_size = outfile.tell() - opos + if uncompressed_size is not None and uncompressed_size != uncompressed_actual_size: + raise InvalidXZ('Uncompressed size for block does not match') + if compressed_size is not None and compressed_size != actual_compressed_size: + raise InvalidXZ('Compressed size for block does not match') + padding_count = f.tell() % 4 + if padding_count: + padding_count = 4 - padding_count + padding = f.read(padding_count) + if len(padding) != padding_count: + raise InvalidXZ('Block is not aligned') + if padding.lstrip(b'\0'): + raise InvalidXZ('Block padding has non null bytes') + if check_type: + q = f.read(filters[0].crc.size) + if not filters[0].crc.check(q): + raise InvalidXZ('CRC for data does not match') + return Block(f.tell() - padding_count - start_pos, uncompressed_actual_size) + +def read_index(f): + pos = f.tell() - 1 + number_of_records = decode_var_int(f) + while number_of_records: + number_of_records -= 1 + unpadded_size = decode_var_int(f) + if unpadded_size < 1: + raise InvalidXZ('Invalid unpadded size in index: %d' % unpadded_size) + yield Block(unpadded_size, decode_var_int(f)) + if f.tell() % 4: + padding_count = 4 - f.tell() % 4 + padding = f.read(padding_count) + if len(padding) != padding_count or padding.lstrip(b'\0'): + raise InvalidXZ('Incorrect Index padding') + epos = f.tell() + f.seek(pos) + raw = f.read(epos - pos) + crc, = unpack(b' -1: + # Found another stream + raw.seek(pos) + if idx: + padding = raw.read(idx) + if padding.lstrip(b'\0') or len(padding) % 4: + raise InvalidXZ('Found trailing garbage between streams') + return outfile + +if __name__ == '__main__': + import sys + decompress(open(sys.argv[-1], 'rb'))