diff options
author | DLL125 <134442578+DLL125@users.noreply.github.com> | 2023-06-23 21:19:50 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-23 21:19:50 +0200 |
commit | 097cfa947e068bcfc439cd466e53361d7f6d1b46 (patch) | |
tree | 83b0399b71529b2847c78ae20d45b100651a9cbc | |
parent | 9e1c0e5b22e6b668440c7e738cbd6cd9f651ddef (diff) | |
download | VeraCrypt-097cfa947e068bcfc439cd466e53361d7f6d1b46.tar.gz VeraCrypt-097cfa947e068bcfc439cd466e53361d7f6d1b46.zip |
Dll125 lzma (#1120)
* Update LZMA to latest
* Add missing file
-rw-r--r-- | src/Common/lzma/7zTypes.h | 274 | ||||
-rw-r--r-- | src/Common/lzma/7zWindows.h | 101 | ||||
-rw-r--r-- | src/Common/lzma/Alloc.c | 192 | ||||
-rw-r--r-- | src/Common/lzma/Alloc.h | 19 | ||||
-rw-r--r-- | src/Common/lzma/Compiler.h | 162 | ||||
-rw-r--r-- | src/Common/lzma/CpuArch.c | 795 | ||||
-rw-r--r-- | src/Common/lzma/CpuArch.h | 233 | ||||
-rw-r--r-- | src/Common/lzma/LzFind.c | 519 | ||||
-rw-r--r-- | src/Common/lzma/LzFind.h | 53 | ||||
-rw-r--r-- | src/Common/lzma/LzFindMt.c | 70 | ||||
-rw-r--r-- | src/Common/lzma/LzFindMt.h | 10 | ||||
-rw-r--r-- | src/Common/lzma/LzFindOpt.c | 14 | ||||
-rw-r--r-- | src/Common/lzma/LzHash.h | 8 | ||||
-rw-r--r-- | src/Common/lzma/LzmaDec.c | 190 | ||||
-rw-r--r-- | src/Common/lzma/LzmaDec.h | 17 | ||||
-rw-r--r-- | src/Common/lzma/LzmaEnc.c | 389 | ||||
-rw-r--r-- | src/Common/lzma/LzmaEnc.h | 23 | ||||
-rw-r--r-- | src/Common/lzma/LzmaLib.c | 8 | ||||
-rw-r--r-- | src/Common/lzma/LzmaLib.h | 12 | ||||
-rw-r--r-- | src/Common/lzma/Precomp.h | 6 | ||||
-rw-r--r-- | src/Common/lzma/Threads.c | 86 | ||||
-rw-r--r-- | src/Common/lzma/Threads.h | 50 | ||||
-rw-r--r-- | src/Common/lzma/lzma-history.txt | 40 | ||||
-rw-r--r-- | src/Common/lzma/lzma-sdk.txt | 2 |
24 files changed, 2124 insertions, 1149 deletions
diff --git a/src/Common/lzma/7zTypes.h b/src/Common/lzma/7zTypes.h index fe4fde3f..1fcb2473 100644 --- a/src/Common/lzma/7zTypes.h +++ b/src/Common/lzma/7zTypes.h @@ -1,8 +1,8 @@ /* 7zTypes.h -- Basic types -2021-12-25 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __7Z_TYPES_H -#define __7Z_TYPES_H +#ifndef ZIP7_7Z_TYPES_H +#define ZIP7_7Z_TYPES_H #ifdef _WIN32 /* #include <windows.h> */ @@ -52,6 +52,11 @@ typedef int SRes; #define MY_ALIGN(n) #endif #else + /* + // C11/C++11: + #include <stdalign.h> + #define MY_ALIGN(n) alignas(n) + */ #define MY_ALIGN(n) __attribute__ ((aligned(n))) #endif @@ -62,7 +67,7 @@ typedef int SRes; typedef unsigned WRes; #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) -// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) +// #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) #else // _WIN32 @@ -70,13 +75,13 @@ typedef unsigned WRes; typedef int WRes; // (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT -#define MY__FACILITY_ERRNO 0x800 -#define MY__FACILITY_WIN32 7 -#define MY__FACILITY__WRes MY__FACILITY_ERRNO +#define MY_FACILITY_ERRNO 0x800 +#define MY_FACILITY_WIN32 7 +#define MY_FACILITY_WRes MY_FACILITY_ERRNO #define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ ( (HRESULT)(x) & 0x0000FFFF) \ - | (MY__FACILITY__WRes << 16) \ + | (MY_FACILITY_WRes << 16) \ | (HRESULT)0x80000000 )) #define MY_SRes_HRESULT_FROM_WRes(x) \ @@ -120,23 +125,19 @@ typedef int WRes; #define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) #define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) -// if (MY__FACILITY__WRes != FACILITY_WIN32), +// if (MY_FACILITY_WRes != FACILITY_WIN32), // we use FACILITY_WIN32 for COM errors: #define E_OUTOFMEMORY ((HRESULT)0x8007000EL) #define E_INVALIDARG ((HRESULT)0x80070057L) -#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) +#define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) /* // we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: #define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) #define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) -#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +#define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) */ -// gcc / clang : (sizeof(long) == sizeof(void*)) in 32/64 bits -typedef long INT_PTR; -typedef unsigned long UINT_PTR; - #define TEXT(quote) quote #define FILE_ATTRIBUTE_READONLY 0x0001 @@ -160,18 +161,18 @@ typedef unsigned long UINT_PTR; #ifndef RINOK -#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } +#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; } #endif #ifndef RINOK_WRes -#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; } +#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; } #endif typedef unsigned char Byte; typedef short Int16; typedef unsigned short UInt16; -#ifdef _LZMA_UINT32_IS_ULONG +#ifdef Z7_DECL_Int32_AS_long typedef long Int32; typedef unsigned long UInt32; #else @@ -210,37 +211,51 @@ typedef size_t SIZE_T; #endif // _WIN32 -#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL) - +#define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL) -#ifdef _SZ_NO_INT_64 -/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. - NOTES: Some code will work incorrectly in that case! */ +#ifdef Z7_DECL_Int64_AS_long typedef long Int64; typedef unsigned long UInt64; #else -#if defined(_MSC_VER) || defined(__BORLANDC__) +#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__) typedef __int64 Int64; typedef unsigned __int64 UInt64; -#define UINT64_CONST(n) n +#else +#if defined(__clang__) || defined(__GNUC__) +#include <stdint.h> +typedef int64_t Int64; +typedef uint64_t UInt64; #else typedef long long int Int64; typedef unsigned long long int UInt64; -#define UINT64_CONST(n) n ## ULL +// #define UINT64_CONST(n) n ## ULL +#endif #endif #endif -#ifdef _LZMA_NO_SYSTEM_SIZE_T -typedef UInt32 SizeT; +#define UINT64_CONST(n) n + + +#ifdef Z7_DECL_SizeT_AS_unsigned_int +typedef unsigned int SizeT; #else typedef size_t SizeT; #endif +/* +#if (defined(_MSC_VER) && _MSC_VER <= 1200) +typedef size_t MY_uintptr_t; +#else +#include <stdint.h> +typedef uintptr_t MY_uintptr_t; +#endif +*/ + typedef int BoolInt; /* typedef BoolInt Bool; */ #define True 1 @@ -248,23 +263,23 @@ typedef int BoolInt; #ifdef _WIN32 -#define MY_STD_CALL __stdcall +#define Z7_STDCALL __stdcall #else -#define MY_STD_CALL +#define Z7_STDCALL #endif #ifdef _MSC_VER #if _MSC_VER >= 1300 -#define MY_NO_INLINE __declspec(noinline) +#define Z7_NO_INLINE __declspec(noinline) #else -#define MY_NO_INLINE +#define Z7_NO_INLINE #endif -#define MY_FORCE_INLINE __forceinline +#define Z7_FORCE_INLINE __forceinline -#define MY_CDECL __cdecl -#define MY_FAST_CALL __fastcall +#define Z7_CDECL __cdecl +#define Z7_FASTCALL __fastcall #else // _MSC_VER @@ -272,27 +287,25 @@ typedef int BoolInt; || (defined(__clang__) && (__clang_major__ >= 4)) \ || defined(__INTEL_COMPILER) \ || defined(__xlC__) -#define MY_NO_INLINE __attribute__((noinline)) -// #define MY_FORCE_INLINE __attribute__((always_inline)) inline +#define Z7_NO_INLINE __attribute__((noinline)) +#define Z7_FORCE_INLINE __attribute__((always_inline)) inline #else -#define MY_NO_INLINE +#define Z7_NO_INLINE +#define Z7_FORCE_INLINE #endif -#define MY_FORCE_INLINE - - -#define MY_CDECL +#define Z7_CDECL #if defined(_M_IX86) \ || defined(__i386__) -// #define MY_FAST_CALL __attribute__((fastcall)) -// #define MY_FAST_CALL __attribute__((cdecl)) -#define MY_FAST_CALL +// #define Z7_FASTCALL __attribute__((fastcall)) +// #define Z7_FASTCALL __attribute__((cdecl)) +#define Z7_FASTCALL #elif defined(MY_CPU_AMD64) -// #define MY_FAST_CALL __attribute__((ms_abi)) -#define MY_FAST_CALL +// #define Z7_FASTCALL __attribute__((ms_abi)) +#define Z7_FASTCALL #else -#define MY_FAST_CALL +#define Z7_FASTCALL #endif #endif // _MSC_VER @@ -300,41 +313,49 @@ typedef int BoolInt; /* The following interfaces use first parameter as pointer to structure */ -typedef struct IByteIn IByteIn; -struct IByteIn +// #define Z7_C_IFACE_CONST_QUAL +#define Z7_C_IFACE_CONST_QUAL const + +#define Z7_C_IFACE_DECL(a) \ + struct a ## _; \ + typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \ + typedef struct a ## _ a; \ + struct a ## _ + + +Z7_C_IFACE_DECL (IByteIn) { - Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ + Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */ }; #define IByteIn_Read(p) (p)->Read(p) -typedef struct IByteOut IByteOut; -struct IByteOut +Z7_C_IFACE_DECL (IByteOut) { - void (*Write)(const IByteOut *p, Byte b); + void (*Write)(IByteOutPtr p, Byte b); }; #define IByteOut_Write(p, b) (p)->Write(p, b) -typedef struct ISeqInStream ISeqInStream; -struct ISeqInStream +Z7_C_IFACE_DECL (ISeqInStream) { - SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); + SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size); /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. (output(*size) < input(*size)) is allowed */ }; #define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) +/* try to read as much as avail in stream and limited by (*processedSize) */ +SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize); /* it can return SZ_ERROR_INPUT_EOF */ -SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); -SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); -SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); +// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size); +// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf); -typedef struct ISeqOutStream ISeqOutStream; -struct ISeqOutStream +Z7_C_IFACE_DECL (ISeqOutStream) { - size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); + size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size); /* Returns: result - the number of actually written bytes. (result < size) means error */ }; @@ -348,29 +369,26 @@ typedef enum } ESzSeek; -typedef struct ISeekInStream ISeekInStream; -struct ISeekInStream +Z7_C_IFACE_DECL (ISeekInStream) { - SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ - SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); + SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin); }; #define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) #define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) -typedef struct ILookInStream ILookInStream; -struct ILookInStream +Z7_C_IFACE_DECL (ILookInStream) { - SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); + SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size); /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. (output(*size) > input(*size)) is not allowed (output(*size) < input(*size)) is allowed */ - SRes (*Skip)(const ILookInStream *p, size_t offset); + SRes (*Skip)(ILookInStreamPtr p, size_t offset); /* offset must be <= output(*size) of Look */ - - SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); + SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size); /* reads directly (without buffer). It's same as ISeqInStream::Read */ - SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); + SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin); }; #define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) @@ -379,19 +397,18 @@ struct ILookInStream #define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) -SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); -SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); +SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset); /* reads via ILookInStream::Read */ -SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); -SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); - +SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size); typedef struct { ILookInStream vt; - const ISeekInStream *realStream; + ISeekInStreamPtr realStream; size_t pos; size_t size; /* it's data size */ @@ -403,13 +420,13 @@ typedef struct void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); -#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } +#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; } typedef struct { ISeqInStream vt; - const ILookInStream *realStream; + ILookInStreamPtr realStream; } CSecToLook; void SecToLook_CreateVTable(CSecToLook *p); @@ -419,20 +436,19 @@ void SecToLook_CreateVTable(CSecToLook *p); typedef struct { ISeqInStream vt; - const ILookInStream *realStream; + ILookInStreamPtr realStream; } CSecToRead; void SecToRead_CreateVTable(CSecToRead *p); -typedef struct ICompressProgress ICompressProgress; - -struct ICompressProgress +Z7_C_IFACE_DECL (ICompressProgress) { - SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); + SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize); /* Returns: result. (result != SZ_OK) means break. Value (UInt64)(Int64)-1 for size means unknown value. */ }; + #define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) @@ -470,13 +486,13 @@ struct ISzAlloc -#ifndef MY_container_of +#ifndef Z7_container_of /* -#define MY_container_of(ptr, type, m) container_of(ptr, type, m) -#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) -#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) -#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) +#define Z7_container_of(ptr, type, m) container_of(ptr, type, m) +#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) +#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) +#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) */ /* @@ -485,24 +501,64 @@ struct ISzAlloc GCC 4.8.1 : classes with non-public variable members" */ -#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) +#define Z7_container_of(ptr, type, m) \ + ((type *)(void *)((char *)(void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) -#endif - -#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) +#define Z7_container_of_CONST(ptr, type, m) \ + ((const type *)(const void *)((const char *)(const void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) /* -#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \ + ((type *)(void *)(const void *)((const char *)(const void *) \ + (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m))) */ -#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) -#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +#endif + +#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) + +// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m) +// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) + +#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m) + +#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) /* -#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) +#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m) */ +#if defined (__clang__) || defined(__GNUC__) +#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \ + _Pragma("GCC diagnostic pop") +#else +#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL +#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL +#endif + +#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \ + Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ + type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \ + Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL + +#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \ + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p) -#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) +// #define ZIP7_DECLARE_HANDLE(name) typedef void *name; +#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name; + + +#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) + +#ifndef Z7_ARRAY_SIZE +#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#endif + #ifdef _WIN32 @@ -520,6 +576,22 @@ struct ISzAlloc #endif +#define k_PropVar_TimePrec_0 0 +#define k_PropVar_TimePrec_Unix 1 +#define k_PropVar_TimePrec_DOS 2 +#define k_PropVar_TimePrec_HighPrec 3 +#define k_PropVar_TimePrec_Base 16 +#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7) +#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9) + EXTERN_C_END #endif + +/* +#ifndef Z7_ST +#ifdef _7ZIP_ST +#define Z7_ST +#endif +#endif +*/ diff --git a/src/Common/lzma/7zWindows.h b/src/Common/lzma/7zWindows.h new file mode 100644 index 00000000..0967a799 --- /dev/null +++ b/src/Common/lzma/7zWindows.h @@ -0,0 +1,101 @@ +/* 7zWindows.h -- StdAfx
+2023-04-02 : Igor Pavlov : Public domain */
+
+#ifndef ZIP7_INC_7Z_WINDOWS_H
+#define ZIP7_INC_7Z_WINDOWS_H
+
+#ifdef _WIN32
+
+#if defined(__clang__)
+# pragma clang diagnostic push
+#endif
+
+#if defined(_MSC_VER)
+
+#pragma warning(push)
+#pragma warning(disable : 4668) // '_WIN32_WINNT' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
+
+#if _MSC_VER == 1900
+// for old kit10 versions
+// #pragma warning(disable : 4255) // winuser.h(13979): warning C4255: 'GetThreadDpiAwarenessContext':
+#endif
+// win10 Windows Kit:
+#endif // _MSC_VER
+
+#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
+// for msvc6 without sdk2003
+#define RPC_NO_WINDOWS_H
+#endif
+
+#if defined(__MINGW32__) || defined(__MINGW64__)
+// #if defined(__GNUC__) && !defined(__clang__)
+#include <windows.h>
+#else
+#include <Windows.h>
+#endif
+// #include <basetsd.h>
+// #include <wtypes.h>
+
+// but if precompiled with clang-cl then we need
+// #include <windows.h>
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+#if defined(__clang__)
+# pragma clang diagnostic pop
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
+#ifndef _W64
+
+typedef long LONG_PTR, *PLONG_PTR;
+typedef unsigned long ULONG_PTR, *PULONG_PTR;
+typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
+
+#define Z7_OLD_WIN_SDK
+#endif // _W64
+#endif // _MSC_VER == 1200
+
+#ifdef Z7_OLD_WIN_SDK
+
+#ifndef INVALID_FILE_ATTRIBUTES
+#define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
+#endif
+#ifndef INVALID_SET_FILE_POINTER
+#define INVALID_SET_FILE_POINTER ((DWORD)-1)
+#endif
+#ifndef FILE_SPECIAL_ACCESS
+#define FILE_SPECIAL_ACCESS (FILE_ANY_ACCESS)
+#endif
+
+// ShlObj.h:
+// #define BIF_NEWDIALOGSTYLE 0x0040
+
+#pragma warning(disable : 4201)
+// #pragma warning(disable : 4115)
+
+#undef VARIANT_TRUE
+#define VARIANT_TRUE ((VARIANT_BOOL)-1)
+#endif
+
+#endif // Z7_OLD_WIN_SDK
+
+#ifdef UNDER_CE
+#undef VARIANT_TRUE
+#define VARIANT_TRUE ((VARIANT_BOOL)-1)
+#endif
+
+
+#if defined(_MSC_VER)
+#if _MSC_VER >= 1400 && _MSC_VER <= 1600
+ // BaseTsd.h(148) : 'HandleToULong' : unreferenced inline function has been removed
+ // string.h
+ // #pragma warning(disable : 4514)
+#endif
+#endif
+
+
+/* #include "7zTypes.h" */
+
+#endif
diff --git a/src/Common/lzma/Alloc.c b/src/Common/lzma/Alloc.c index d1af76c5..d841bf20 100644 --- a/src/Common/lzma/Alloc.c +++ b/src/Common/lzma/Alloc.c @@ -1,38 +1,54 @@ /* Alloc.c -- Memory allocation functions -2021-07-13 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" -#include <stdio.h> - #ifdef _WIN32 -#include <Windows.h> +#include "7zWindows.h" #endif #include <stdlib.h> #include "Alloc.h" -/* #define _SZ_ALLOC_DEBUG */ +#ifdef _WIN32 +#ifdef Z7_LARGE_PAGES +#if defined(__clang__) || defined(__GNUC__) +typedef void (*Z7_voidFunction)(void); +#define MY_CAST_FUNC (Z7_voidFunction) +#elif defined(_MSC_VER) && _MSC_VER > 1920 +#define MY_CAST_FUNC (void *) +// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' +#else +#define MY_CAST_FUNC +#endif +#endif // Z7_LARGE_PAGES +#endif // _WIN32 + +// #define SZ_ALLOC_DEBUG +/* #define SZ_ALLOC_DEBUG */ -/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ -#ifdef _SZ_ALLOC_DEBUG +/* use SZ_ALLOC_DEBUG to debug alloc/free operations */ +#ifdef SZ_ALLOC_DEBUG +#include <string.h> #include <stdio.h> -int g_allocCount = 0; -int g_allocCountMid = 0; -int g_allocCountBig = 0; +static int g_allocCount = 0; +#ifdef _WIN32 +static int g_allocCountMid = 0; +static int g_allocCountBig = 0; +#endif #define CONVERT_INT_TO_STR(charType, tempSize) \ - unsigned char temp[tempSize]; unsigned i = 0; \ - while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \ + char temp[tempSize]; unsigned i = 0; \ + while (val >= 10) { temp[i++] = (char)('0' + (unsigned)(val % 10)); val /= 10; } \ *s++ = (charType)('0' + (unsigned)val); \ while (i != 0) { i--; *s++ = temp[i]; } \ *s = 0; static void ConvertUInt64ToString(UInt64 val, char *s) { - CONVERT_INT_TO_STR(char, 24); + CONVERT_INT_TO_STR(char, 24) } #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) @@ -77,7 +93,7 @@ static void PrintAligned(const char *s, size_t align) Print(s); } -static void PrintLn() +static void PrintLn(void) { Print("\n"); } @@ -89,10 +105,10 @@ static void PrintHex(UInt64 v, size_t align) PrintAligned(s, align); } -static void PrintDec(UInt64 v, size_t align) +static void PrintDec(int v, size_t align) { char s[32]; - ConvertUInt64ToString(v, s); + ConvertUInt64ToString((unsigned)v, s); PrintAligned(s, align); } @@ -102,12 +118,19 @@ static void PrintAddr(void *p) } -#define PRINT_ALLOC(name, cnt, size, ptr) \ +#define PRINT_REALLOC(name, cnt, size, ptr) { \ + Print(name " "); \ + if (!ptr) PrintDec(cnt++, 10); \ + PrintHex(size, 10); \ + PrintAddr(ptr); \ + PrintLn(); } + +#define PRINT_ALLOC(name, cnt, size, ptr) { \ Print(name " "); \ PrintDec(cnt++, 10); \ PrintHex(size, 10); \ PrintAddr(ptr); \ - PrintLn(); + PrintLn(); } #define PRINT_FREE(name, cnt, ptr) if (ptr) { \ Print(name " "); \ @@ -117,7 +140,9 @@ static void PrintAddr(void *p) #else +#ifdef _WIN32 #define PRINT_ALLOC(name, cnt, size, ptr) +#endif #define PRINT_FREE(name, cnt, ptr) #define Print(s) #define PrintLn() @@ -127,16 +152,31 @@ static void PrintAddr(void *p) #endif +/* +by specification: + malloc(non_NULL, 0) : returns NULL or a unique pointer value that can later be successfully passed to free() + realloc(NULL, size) : the call is equivalent to malloc(size) + realloc(non_NULL, 0) : the call is equivalent to free(ptr) + +in main compilers: + malloc(0) : returns non_NULL + realloc(NULL, 0) : returns non_NULL + realloc(non_NULL, 0) : returns NULL +*/ + void *MyAlloc(size_t size) { if (size == 0) return NULL; - PRINT_ALLOC("Alloc ", g_allocCount, size, NULL); - #ifdef _SZ_ALLOC_DEBUG + // PRINT_ALLOC("Alloc ", g_allocCount, size, NULL) + #ifdef SZ_ALLOC_DEBUG { void *p = malloc(size); - // PRINT_ALLOC("Alloc ", g_allocCount, size, p); + if (p) + { + PRINT_ALLOC("Alloc ", g_allocCount, size, p) + } return p; } #else @@ -146,33 +186,64 @@ void *MyAlloc(size_t size) void MyFree(void *address) { - PRINT_FREE("Free ", g_allocCount, address); + PRINT_FREE("Free ", g_allocCount, address) free(address); } +void *MyRealloc(void *address, size_t size) +{ + if (size == 0) + { + MyFree(address); + return NULL; + } + // PRINT_REALLOC("Realloc ", g_allocCount, size, address) + #ifdef SZ_ALLOC_DEBUG + { + void *p = realloc(address, size); + if (p) + { + PRINT_REALLOC("Realloc ", g_allocCount, size, address) + } + return p; + } + #else + return realloc(address, size); + #endif +} + + #ifdef _WIN32 void *MidAlloc(size_t size) { if (size == 0) return NULL; - - PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL); - + #ifdef SZ_ALLOC_DEBUG + { + void *p = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); + if (p) + { + PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, p) + } + return p; + } + #else return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); + #endif } void MidFree(void *address) { - PRINT_FREE("Free-Mid", g_allocCountMid, address); + PRINT_FREE("Free-Mid", g_allocCountMid, address) if (!address) return; VirtualFree(address, 0, MEM_RELEASE); } -#ifdef _7ZIP_LARGE_PAGES +#ifdef Z7_LARGE_PAGES #ifdef MEM_LARGE_PAGES #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES @@ -183,34 +254,35 @@ void MidFree(void *address) extern SIZE_T g_LargePageSize; SIZE_T g_LargePageSize = 0; -typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID); +typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID); -#endif // _7ZIP_LARGE_PAGES - -void SetLargePageSize() +void SetLargePageSize(void) { - #ifdef _7ZIP_LARGE_PAGES + #ifdef Z7_LARGE_PAGES SIZE_T size; - GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) - GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); - if (!largePageMinimum) + const + Func_GetLargePageMinimum fn = + (Func_GetLargePageMinimum) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "GetLargePageMinimum"); + if (!fn) return; - size = largePageMinimum(); + size = fn(); if (size == 0 || (size & (size - 1)) != 0) return; g_LargePageSize = size; #endif } +#endif // Z7_LARGE_PAGES void *BigAlloc(size_t size) { if (size == 0) return NULL; - PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL); - - #ifdef _7ZIP_LARGE_PAGES + PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL) + + #ifdef Z7_LARGE_PAGES { SIZE_T ps = g_LargePageSize; if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) @@ -220,38 +292,38 @@ void *BigAlloc(size_t size) size2 = (size + ps) & ~ps; if (size2 >= size) { - void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); - if (res) - return res; + void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); + if (p) + { + PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p) + return p; + } } } } #endif - return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); + return MidAlloc(size); } void BigFree(void *address) { - PRINT_FREE("Free-Big", g_allocCountBig, address); - - if (!address) - return; - VirtualFree(address, 0, MEM_RELEASE); + PRINT_FREE("Free-Big", g_allocCountBig, address) + MidFree(address); } -#endif +#endif // _WIN32 -static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); } -static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); } +static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MyAlloc(size); } +static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MyFree(address); } const ISzAlloc g_Alloc = { SzAlloc, SzFree }; #ifdef _WIN32 -static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); } -static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); } -static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } -static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); } +static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MidAlloc(size); } +static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MidFree(address); } +static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return BigAlloc(size); } +static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(address); } const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; #endif @@ -334,7 +406,7 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) void *p; void *pAligned; size_t newSize; - UNUSED_VAR(pp); + UNUSED_VAR(pp) /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned block to prevent cache line sharing with another allocated blocks */ @@ -362,7 +434,7 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) #else void *p; - UNUSED_VAR(pp); + UNUSED_VAR(pp) if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) return NULL; @@ -377,7 +449,7 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) static void SzAlignedFree(ISzAllocPtr pp, void *address) { - UNUSED_VAR(pp); + UNUSED_VAR(pp) #ifndef USE_posix_memalign if (address) MyFree(((void **)address)[-1]); @@ -401,7 +473,7 @@ const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree }; static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) { - CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); + const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); void *adr; void *pAligned; size_t newSize; @@ -447,7 +519,7 @@ static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address) { if (address) { - CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); + const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); PrintLn(); Print("- Aligned Free: "); PrintLn(); diff --git a/src/Common/lzma/Alloc.h b/src/Common/lzma/Alloc.h index 3be2041e..fac5b62f 100644 --- a/src/Common/lzma/Alloc.h +++ b/src/Common/lzma/Alloc.h @@ -1,19 +1,32 @@ /* Alloc.h -- Memory allocation functions -2021-07-13 : Igor Pavlov : Public domain */ +2023-03-04 : Igor Pavlov : Public domain */ -#ifndef __COMMON_ALLOC_H -#define __COMMON_ALLOC_H +#ifndef ZIP7_INC_ALLOC_H +#define ZIP7_INC_ALLOC_H #include "7zTypes.h" EXTERN_C_BEGIN +/* + MyFree(NULL) : is allowed, as free(NULL) + MyAlloc(0) : returns NULL : but malloc(0) is allowed to return NULL or non_NULL + MyRealloc(NULL, 0) : returns NULL : but realloc(NULL, 0) is allowed to return NULL or non_NULL +MyRealloc() is similar to realloc() for the following cases: + MyRealloc(non_NULL, 0) : returns NULL and always calls MyFree(ptr) + MyRealloc(NULL, non_ZERO) : returns NULL, if allocation failed + MyRealloc(non_NULL, non_ZERO) : returns NULL, if reallocation failed +*/ + void *MyAlloc(size_t size); void MyFree(void *address); +void *MyRealloc(void *address, size_t size); #ifdef _WIN32 +#ifdef Z7_LARGE_PAGES void SetLargePageSize(void); +#endif void *MidAlloc(size_t size); void MidFree(void *address); diff --git a/src/Common/lzma/Compiler.h b/src/Common/lzma/Compiler.h index a9816fa5..185a52de 100644 --- a/src/Common/lzma/Compiler.h +++ b/src/Common/lzma/Compiler.h @@ -1,12 +1,37 @@ -/* Compiler.h -2021-01-05 : Igor Pavlov : Public domain */ +/* Compiler.h : Compiler specific defines and pragmas +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __7Z_COMPILER_H -#define __7Z_COMPILER_H +#ifndef ZIP7_INC_COMPILER_H +#define ZIP7_INC_COMPILER_H + +#if defined(__clang__) +# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#endif +#if defined(__clang__) && defined(__apple_build_version__) +# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION +#elif defined(__clang__) +# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION +#elif defined(__GNUC__) +# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif + +#ifdef _MSC_VER +#if !defined(__clang__) && !defined(__GNUC__) +#define Z7_MSC_VER_ORIGINAL _MSC_VER +#endif +#endif + +#if defined(__MINGW32__) || defined(__MINGW64__) +#define Z7_MINGW +#endif + +// #pragma GCC diagnostic ignored "-Wunknown-pragmas" + +#ifdef __clang__ +// padding size of '' with 4 bytes to alignment boundary +#pragma GCC diagnostic ignored "-Wpadded" +#endif - #ifdef __clang__ - #pragma clang diagnostic ignored "-Wunused-private-field" - #endif #ifdef _MSC_VER @@ -17,24 +42,115 @@ #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int #endif - #if _MSC_VER >= 1300 - #pragma warning(disable : 4996) // This function or variable may be unsafe - #else - #pragma warning(disable : 4511) // copy constructor could not be generated - #pragma warning(disable : 4512) // assignment operator could not be generated - #pragma warning(disable : 4514) // unreferenced inline function has been removed - #pragma warning(disable : 4702) // unreachable code - #pragma warning(disable : 4710) // not inlined - #pragma warning(disable : 4714) // function marked as __forceinline not inlined - #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information - #endif +#if defined(_MSC_VER) && _MSC_VER >= 1800 +#pragma warning(disable : 4464) // relative include path contains '..' +#endif + +// == 1200 : -O1 : for __forceinline +// >= 1900 : -O1 : for printf +#pragma warning(disable : 4710) // function not inlined + +#if _MSC_VER < 1900 +// winnt.h: 'Int64ShllMod32' +#pragma warning(disable : 4514) // unreferenced inline function has been removed +#endif + +#if _MSC_VER < 1300 +// #pragma warning(disable : 4702) // unreachable code +// Bra.c : -O1: +#pragma warning(disable : 4714) // function marked as __forceinline not inlined +#endif + +/* +#if _MSC_VER > 1400 && _MSC_VER <= 1900 +// strcat: This function or variable may be unsafe +// sysinfoapi.h: kit10: GetVersion was declared deprecated +#pragma warning(disable : 4996) +#endif +*/ + +#if _MSC_VER > 1200 +// -Wall warnings + +#pragma warning(disable : 4711) // function selected for automatic inline expansion +#pragma warning(disable : 4820) // '2' bytes padding added after data member + +#if _MSC_VER >= 1400 && _MSC_VER < 1920 +// 1400: string.h: _DBG_MEMCPY_INLINE_ +// 1600 - 191x : smmintrin.h __cplusplus' +// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif' +#pragma warning(disable : 4668) + +// 1400 - 1600 : WinDef.h : 'FARPROC' : +// 1900 - 191x : immintrin.h: _readfsbase_u32 +// no function prototype given : converting '()' to '(void)' +#pragma warning(disable : 4255) +#endif + +#if _MSC_VER >= 1914 +// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified +#pragma warning(disable : 5045) +#endif + +#endif // _MSC_VER > 1200 +#endif // _MSC_VER + + +#if defined(__clang__) && (__clang_major__ >= 4) + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ + _Pragma("clang loop unroll(disable)") \ + _Pragma("clang loop vectorize(disable)") + #define Z7_ATTRIB_NO_VECTORIZE +#elif defined(__GNUC__) && (__GNUC__ >= 5) + #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) + // __attribute__((optimize("no-unroll-loops"))); + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE +#elif defined(_MSC_VER) && (_MSC_VER >= 1920) + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ + _Pragma("loop( no_vector )") + #define Z7_ATTRIB_NO_VECTORIZE +#else + #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + #define Z7_ATTRIB_NO_VECTORIZE +#endif + +#if defined(MY_CPU_X86_OR_AMD64) && ( \ + defined(__clang__) && (__clang_major__ >= 4) \ + || defined(__GNUC__) && (__GNUC__ >= 5)) + #define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse"))) +#else + #define Z7_ATTRIB_NO_SSE +#endif + +#define Z7_ATTRIB_NO_VECTOR \ + Z7_ATTRIB_NO_VECTORIZE \ + Z7_ATTRIB_NO_SSE + + +#if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 1000) \ + /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ + // GCC is not good for __builtin_expect() + #define Z7_LIKELY(x) (__builtin_expect((x), 1)) + #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) + // #define Z7_unlikely [[unlikely]] + // #define Z7_likely [[likely]] +#else + #define Z7_LIKELY(x) (x) + #define Z7_UNLIKELY(x) (x) + // #define Z7_likely +#endif - #ifdef __clang__ - #pragma clang diagnostic ignored "-Wdeprecated-declarations" - #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec" - // #pragma clang diagnostic ignored "-Wreserved-id-macro" - #endif +#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000)) +#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"") +#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic pop") +#else +#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER #endif #define UNUSED_VAR(x) (void)x; diff --git a/src/Common/lzma/CpuArch.c b/src/Common/lzma/CpuArch.c index fa9afe39..33f8a3ab 100644 --- a/src/Common/lzma/CpuArch.c +++ b/src/Common/lzma/CpuArch.c @@ -1,187 +1,318 @@ /* CpuArch.c -- CPU specific code -2021-07-13 : Igor Pavlov : Public domain */ +2023-05-18 : Igor Pavlov : Public domain */ #include "Precomp.h" +// #include <stdio.h> + #include "CpuArch.h" #ifdef MY_CPU_X86_OR_AMD64 -#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__) -#define USE_ASM +#undef NEED_CHECK_FOR_CPUID +#if !defined(MY_CPU_AMD64) +#define NEED_CHECK_FOR_CPUID #endif -#if !defined(USE_ASM) && _MSC_VER >= 1500 -#include <intrin.h> +/* + cpuid instruction supports (subFunction) parameter in ECX, + that is used only with some specific (function) parameter values. + But we always use only (subFunction==0). +*/ +/* + __cpuid(): MSVC and GCC/CLANG use same function/macro name + but parameters are different. + We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function. +*/ + +#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \ + || defined(__clang__) /* && (__clang_major__ >= 10) */ + +/* there was some CLANG/GCC compilers that have issues with + rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined). + compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code. + The history of __cpuid() changes in CLANG/GCC: + GCC: + 2007: it preserved ebx for (__PIC__ && __i386__) + 2013: it preserved rbx and ebx for __PIC__ + 2014: it doesn't preserves rbx and ebx anymore + we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem. + CLANG: + 2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check. + Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)? + Do we need __PIC__ test for CLANG or we must care about rbx even if + __PIC__ is not defined? +*/ + +#define ASM_LN "\n" + +#if defined(MY_CPU_AMD64) && defined(__PIC__) \ + && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) + +#define x86_cpuid_MACRO(p, func) { \ + __asm__ __volatile__ ( \ + ASM_LN "mov %%rbx, %q1" \ + ASM_LN "cpuid" \ + ASM_LN "xchg %%rbx, %q1" \ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + + /* "=&r" selects free register. It can select even rbx, if that register is free. + "=&D" for (RDI) also works, but the code can be larger with "=&D" + "2"(0) means (subFunction = 0), + 2 is (zero-based) index in the output constraint list "=c" (ECX). */ + +#elif defined(MY_CPU_X86) && defined(__PIC__) \ + && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) + +#define x86_cpuid_MACRO(p, func) { \ + __asm__ __volatile__ ( \ + ASM_LN "mov %%ebx, %k1" \ + ASM_LN "cpuid" \ + ASM_LN "xchg %%ebx, %k1" \ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + +#else + +#define x86_cpuid_MACRO(p, func) { \ + __asm__ __volatile__ ( \ + ASM_LN "cpuid" \ + : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + #endif -#if defined(USE_ASM) && !defined(MY_CPU_AMD64) -static UInt32 CheckFlag(UInt32 flag) -{ - #ifdef _MSC_VER - __asm pushfd; - __asm pop EAX; - __asm mov EDX, EAX; - __asm xor EAX, flag; - __asm push EAX; - __asm popfd; - __asm pushfd; - __asm pop EAX; - __asm xor EAX, EDX; - __asm push EDX; - __asm popfd; - __asm and flag, EAX; - #else + +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + x86_cpuid_MACRO(p, func) +} + + +Z7_NO_INLINE +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + #if defined(NEED_CHECK_FOR_CPUID) + #define EFALGS_CPUID_BIT 21 + UInt32 a; __asm__ __volatile__ ( - "pushf\n\t" - "pop %%EAX\n\t" - "movl %%EAX,%%EDX\n\t" - "xorl %0,%%EAX\n\t" - "push %%EAX\n\t" - "popf\n\t" - "pushf\n\t" - "pop %%EAX\n\t" - "xorl %%EDX,%%EAX\n\t" - "push %%EDX\n\t" - "popf\n\t" - "andl %%EAX, %0\n\t": - "=c" (flag) : "c" (flag) : - "%eax", "%edx"); + ASM_LN "pushf" + ASM_LN "pushf" + ASM_LN "pop %0" + // ASM_LN "movl %0, %1" + // ASM_LN "xorl $0x200000, %0" + ASM_LN "btc %1, %0" + ASM_LN "push %0" + ASM_LN "popf" + ASM_LN "pushf" + ASM_LN "pop %0" + ASM_LN "xorl (%%esp), %0" + + ASM_LN "popf" + ASM_LN + : "=&r" (a) // "=a" + : "i" (EFALGS_CPUID_BIT) + ); + if ((a & (1 << EFALGS_CPUID_BIT)) == 0) + return 0; + #endif + { + UInt32 p[4]; + x86_cpuid_MACRO(p, 0) + return p[0]; + } +} + +#undef ASM_LN + +#elif !defined(_MSC_VER) + +/* +// for gcc/clang and other: we can try to use __cpuid macro: +#include <cpuid.h> +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + __cpuid(func, p[0], p[1], p[2], p[3]); +} +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + return (UInt32)__get_cpuid_max(0, NULL); +} +*/ +// for unsupported cpuid: +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + UNUSED_VAR(func) + p[0] = p[1] = p[2] = p[3] = 0; +} +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + return 0; +} + +#else // _MSC_VER + +#if !defined(MY_CPU_AMD64) + +UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + #if defined(NEED_CHECK_FOR_CPUID) + #define EFALGS_CPUID_BIT 21 + __asm pushfd + __asm pushfd + /* + __asm pop eax + // __asm mov edx, eax + __asm btc eax, EFALGS_CPUID_BIT + __asm push eax + */ + __asm btc dword ptr [esp], EFALGS_CPUID_BIT + __asm popfd + __asm pushfd + __asm pop eax + // __asm xor eax, edx + __asm xor eax, [esp] + // __asm push edx + __asm popfd + __asm and eax, (1 shl EFALGS_CPUID_BIT) + __asm jz end_func + #endif + __asm push ebx + __asm xor eax, eax // func + __asm xor ecx, ecx // subFunction (optional) for (func == 0) + __asm cpuid + __asm pop ebx + #if defined(NEED_CHECK_FOR_CPUID) + end_func: #endif - return flag; + __asm ret 0 } -#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False; -#else -#define CHECK_CPUID_IS_SUPPORTED -#endif -#ifndef USE_ASM - #ifdef _MSC_VER +void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) +{ + UNUSED_VAR(p) + UNUSED_VAR(func) + __asm push ebx + __asm push edi + __asm mov edi, ecx // p + __asm mov eax, edx // func + __asm xor ecx, ecx // subfunction (optional) for (func == 0) + __asm cpuid + __asm mov [edi ], eax + __asm mov [edi + 4], ebx + __asm mov [edi + 8], ecx + __asm mov [edi + 12], edx + __asm pop edi + __asm pop ebx + __asm ret 0 +} + +#else // MY_CPU_AMD64 + #if _MSC_VER >= 1600 - #define MY__cpuidex __cpuidex + #include <intrin.h> + #define MY_cpuidex __cpuidex #else - /* - __cpuid (function == 4) requires subfunction number in ECX. + __cpuid (func == (0 or 7)) requires subfunction number in ECX. MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. __cpuid() in new MSVC clears ECX. - __cpuid() in old MSVC (14.00) doesn't clear ECX - We still can use __cpuid for low (function) values that don't require ECX, - but __cpuid() in old MSVC will be incorrect for some function values: (function == 4). + __cpuid() in old MSVC (14.00) x64 doesn't clear ECX + We still can use __cpuid for low (func) values that don't require ECX, + but __cpuid() in old MSVC will be incorrect for some func values: (func == 7). So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, - where ECX value is first parameter for FAST_CALL / NO_INLINE function, - So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and + where ECX value is first parameter for FASTCALL / NO_INLINE func, + So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. - DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!! +DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!! */ - static -MY_NO_INLINE -void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function) +Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo) { - UNUSED_VAR(subFunction); - __cpuid(CPUInfo, function); + UNUSED_VAR(subFunction) + __cpuid(CPUInfo, func); } - - #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func) - #pragma message("======== MY__cpuidex_HACK WAS USED ========") - #endif - #else - #define MY__cpuidex(info, func, func2) __cpuid(info, func) - #pragma message("======== (INCORRECT ?) cpuid WAS USED ========") - #endif + #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info) + #pragma message("======== MY_cpuidex_HACK WAS USED ========") + #endif // _MSC_VER >= 1600 + +#if !defined(MY_CPU_AMD64) +/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code, + so we disable inlining here */ +Z7_NO_INLINE #endif - - - - -void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) +void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) { - #ifdef USE_ASM - - #ifdef _MSC_VER - - UInt32 a2, b2, c2, d2; - __asm xor EBX, EBX; - __asm xor ECX, ECX; - __asm xor EDX, EDX; - __asm mov EAX, function; - __asm cpuid; - __asm mov a2, EAX; - __asm mov b2, EBX; - __asm mov c2, ECX; - __asm mov d2, EDX; - - *a = a2; - *b = b2; - *c = c2; - *d = d2; - - #else - - __asm__ __volatile__ ( - #if defined(MY_CPU_AMD64) && defined(__PIC__) - "mov %%rbx, %%rdi;" - "cpuid;" - "xchg %%rbx, %%rdi;" - : "=a" (*a) , - "=D" (*b) , - #elif defined(MY_CPU_X86) && defined(__PIC__) - "mov %%ebx, %%edi;" - "cpuid;" - "xchgl %%ebx, %%edi;" - : "=a" (*a) , - "=D" (*b) , - #else - "cpuid" - : "=a" (*a) , - "=b" (*b) , - #endif - "=c" (*c) , - "=d" (*d) - : "0" (function), "c"(0) ) ; - - #endif - - #else + MY_cpuidex((int *)p, (int)func, 0); +} - int CPUInfo[4]; +Z7_NO_INLINE +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) +{ + int a[4]; + MY_cpuidex(a, 0, 0); + return a[0]; +} - MY__cpuidex(CPUInfo, (int)function, 0); +#endif // MY_CPU_AMD64 +#endif // _MSC_VER - *a = (UInt32)CPUInfo[0]; - *b = (UInt32)CPUInfo[1]; - *c = (UInt32)CPUInfo[2]; - *d = (UInt32)CPUInfo[3]; +#if defined(NEED_CHECK_FOR_CPUID) +#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; } +#else +#define CHECK_CPUID_IS_SUPPORTED +#endif +#undef NEED_CHECK_FOR_CPUID - #endif -} -BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p) +static +BoolInt x86cpuid_Func_1(UInt32 *p) { CHECK_CPUID_IS_SUPPORTED - MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]); - MyCPUID(1, &p->ver, &p->b, &p->c, &p->d); + z7_x86_cpuid(p, 1); return True; } -static const UInt32 kVendors[][3] = +/* +static const UInt32 kVendors[][1] = { - { 0x756E6547, 0x49656E69, 0x6C65746E}, - { 0x68747541, 0x69746E65, 0x444D4163}, - { 0x746E6543, 0x48727561, 0x736C7561} + { 0x756E6547 }, // , 0x49656E69, 0x6C65746E }, + { 0x68747541 }, // , 0x69746E65, 0x444D4163 }, + { 0x746E6543 } // , 0x48727561, 0x736C7561 } }; +*/ + +/* +typedef struct +{ + UInt32 maxFunc; + UInt32 vendor[3]; + UInt32 ver; + UInt32 b; + UInt32 c; + UInt32 d; +} Cx86cpuid; + +enum +{ + CPU_FIRM_INTEL, + CPU_FIRM_AMD, + CPU_FIRM_VIA +}; +int x86cpuid_GetFirm(const Cx86cpuid *p); +#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf)) +#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf)) +#define x86cpuid_ver_GetStepping(ver) (ver & 0xf) int x86cpuid_GetFirm(const Cx86cpuid *p) { unsigned i; - for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++) + for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++) { const UInt32 *v = kVendors[i]; - if (v[0] == p->vendor[0] && - v[1] == p->vendor[1] && - v[2] == p->vendor[2]) + if (v[0] == p->vendor[0] + // && v[1] == p->vendor[1] + // && v[2] == p->vendor[2] + ) return (int)i; } return -1; @@ -190,41 +321,55 @@ int x86cpuid_GetFirm(const Cx86cpuid *p) BoolInt CPU_Is_InOrder() { Cx86cpuid p; - int firm; UInt32 family, model; if (!x86cpuid_CheckAndRead(&p)) return True; - family = x86cpuid_GetFamily(p.ver); - model = x86cpuid_GetModel(p.ver); - - firm = x86cpuid_GetFirm(&p); + family = x86cpuid_ver_GetFamily(p.ver); + model = x86cpuid_ver_GetModel(p.ver); - switch (firm) + switch (x86cpuid_GetFirm(&p)) { case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( - /* In-Order Atom CPU */ - model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */ - || model == 0x26 /* 45 nm, Z6xx */ - || model == 0x27 /* 32 nm, Z2460 */ - || model == 0x35 /* 32 nm, Z2760 */ - || model == 0x36 /* 32 nm, N2xxx, D2xxx */ + // In-Order Atom CPU + model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 + || model == 0x26 // 45 nm, Z6xx + || model == 0x27 // 32 nm, Z2460 + || model == 0x35 // 32 nm, Z2760 + || model == 0x36 // 32 nm, N2xxx, D2xxx ))); case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); } - return True; + return False; // v23 : unknown processors are not In-Order } +*/ + +#ifdef _WIN32 +#include "7zWindows.h" +#endif #if !defined(MY_CPU_AMD64) && defined(_WIN32) -#include <Windows.h> -static BoolInt CPU_Sys_Is_SSE_Supported() + +/* for legacy SSE ia32: there is no user-space cpu instruction to check + that OS supports SSE register storing/restoring on context switches. + So we need some OS-specific function to check that it's safe to use SSE registers. +*/ + +Z7_FORCE_INLINE +static BoolInt CPU_Sys_Is_SSE_Supported(void) { - OSVERSIONINFO vi; - vi.dwOSVersionInfoSize = sizeof(vi); - if (!GetVersionEx(&vi)) - return False; - return (vi.dwMajorVersion >= 5); +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4996) // `GetVersion': was declared deprecated +#endif + /* low byte is major version of Windows + We suppose that any Windows version since + Windows2000 (major == 5) supports SSE registers */ + return (Byte)GetVersion() >= 5; +#if defined(_MSC_VER) + #pragma warning(pop) +#endif } #define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; #else @@ -232,94 +377,300 @@ static BoolInt CPU_Sys_Is_SSE_Supported() #endif -static UInt32 X86_CPUID_ECX_Get_Flags() +#if !defined(MY_CPU_AMD64) + +BoolInt CPU_IsSupported_CMOV(void) { - Cx86cpuid p; + UInt32 a[4]; + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (a[3] >> 15) & 1; +} + +BoolInt CPU_IsSupported_SSE(void) +{ + UInt32 a[4]; CHECK_SYS_SSE_SUPPORT - if (!x86cpuid_CheckAndRead(&p)) + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (a[3] >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSE2(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) + return 0; + return (a[3] >> 26) & 1; +} + +#endif + + +static UInt32 x86cpuid_Func_1_ECX(void) +{ + UInt32 a[4]; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_Func_1(&a[0])) return 0; - return p.c; + return a[2]; } -BoolInt CPU_IsSupported_AES() +BoolInt CPU_IsSupported_AES(void) { - return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; + return (x86cpuid_Func_1_ECX() >> 25) & 1; } -BoolInt CPU_IsSupported_SSSE3() +BoolInt CPU_IsSupported_SSSE3(void) { - return (X86_CPUID_ECX_Get_Flags() >> 9) & 1; + return (x86cpuid_Func_1_ECX() >> 9) & 1; } -BoolInt CPU_IsSupported_SSE41() +BoolInt CPU_IsSupported_SSE41(void) { - return (X86_CPUID_ECX_Get_Flags() >> 19) & 1; + return (x86cpuid_Func_1_ECX() >> 19) & 1; } -BoolInt CPU_IsSupported_SHA() +BoolInt CPU_IsSupported_SHA(void) { - Cx86cpuid p; CHECK_SYS_SSE_SUPPORT - if (!x86cpuid_CheckAndRead(&p)) - return False; - if (p.maxFunc < 7) + if (z7_x86_cpuid_GetMaxFunc() < 7) return False; { - UInt32 d[4] = { 0 }; - MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + UInt32 d[4]; + z7_x86_cpuid(d, 7); return (d[1] >> 29) & 1; } } -// #include <stdio.h> +/* +MSVC: _xgetbv() intrinsic is available since VS2010SP1. + MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in + <immintrin.h> that we can use or check. + For any 32-bit x86 we can use asm code in MSVC, + but MSVC asm code is huge after compilation. + So _xgetbv() is better + +ICC: _xgetbv() intrinsic is available (in what version of ICC?) + ICC defines (__GNUC___) and it supports gnu assembler + also ICC supports MASM style code with -use-msasm switch. + but ICC doesn't support __attribute__((__target__)) + +GCC/CLANG 9: + _xgetbv() is macro that works via __builtin_ia32_xgetbv() + and we need __attribute__((__target__("xsave")). + But with __target__("xsave") the function will be not + inlined to function that has no __target__("xsave") attribute. + If we want _xgetbv() call inlining, then we should use asm version + instead of calling _xgetbv(). + Note:intrinsic is broke before GCC 8.2: + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684 +*/ -#ifdef _WIN32 -#include <Windows.h> +#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \ + || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \ + || defined(__GNUC__) && (__GNUC__ >= 9) \ + || defined(__clang__) && (__clang_major__ >= 9) +// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler +#if defined(__INTEL_COMPILER) +#define ATTRIB_XGETBV +#elif defined(__GNUC__) || defined(__clang__) +// we don't define ATTRIB_XGETBV here, because asm version is better for inlining. +// #define ATTRIB_XGETBV __attribute__((__target__("xsave"))) +#else +#define ATTRIB_XGETBV +#endif +#endif + +#if defined(ATTRIB_XGETBV) +#include <immintrin.h> #endif -BoolInt CPU_IsSupported_AVX2() + +// XFEATURE_ENABLED_MASK/XCR0 +#define MY_XCR_XFEATURE_ENABLED_MASK 0 + +#if defined(ATTRIB_XGETBV) +ATTRIB_XGETBV +#endif +static UInt64 x86_xgetbv_0(UInt32 num) { - Cx86cpuid p; - CHECK_SYS_SSE_SUPPORT +#if defined(ATTRIB_XGETBV) + { + return + #if (defined(_MSC_VER)) + _xgetbv(num); + #else + __builtin_ia32_xgetbv( + #if !defined(__clang__) + (int) + #endif + num); + #endif + } + +#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC) + + UInt32 a, d; + #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) + __asm__ + ( + "xgetbv" + : "=a"(a), "=d"(d) : "c"(num) : "cc" + ); + #else // is old gcc + __asm__ + ( + ".byte 0x0f, 0x01, 0xd0" "\n\t" + : "=a"(a), "=d"(d) : "c"(num) : "cc" + ); + #endif + return ((UInt64)d << 32) | a; + // return a; + +#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64) + + UInt32 a, d; + __asm { + push eax + push edx + push ecx + mov ecx, num; + // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK + _emit 0x0f + _emit 0x01 + _emit 0xd0 + mov a, eax + mov d, edx + pop ecx + pop edx + pop eax + } + return ((UInt64)d << 32) | a; + // return a; + +#else // it's unknown compiler + // #error "Need xgetbv function" + UNUSED_VAR(num) + // for MSVC-X64 we could call external function from external file. + /* Actually we had checked OSXSAVE/AVX in cpuid before. + So it's expected that OS supports at least AVX and below. */ + // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0 + return + // (1 << 0) | // x87 + (1 << 1) // SSE + | (1 << 2); // AVX + +#endif +} +#ifdef _WIN32 +/* + Windows versions do not know about new ISA extensions that + can be introduced. But we still can use new extensions, + even if Windows doesn't report about supporting them, + But we can use new extensions, only if Windows knows about new ISA extension + that changes the number or size of registers: SSE, AVX/XSAVE, AVX512 + So it's enough to check + MY_PF_AVX_INSTRUCTIONS_AVAILABLE + instead of + MY_PF_AVX2_INSTRUCTIONS_AVAILABLE +*/ +#define MY_PF_XSAVE_ENABLED 17 +// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36 +// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37 +// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38 +// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39 +// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40 +// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41 +#endif + +BoolInt CPU_IsSupported_AVX(void) +{ #ifdef _WIN32 - #define MY__PF_XSAVE_ENABLED 17 - if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED)) return False; + /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from + some latest Win10 revisions. But we need AVX in older Windows also. + So we don't use the following check: */ + /* + if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE)) + return False; + */ #endif - if (!x86cpuid_CheckAndRead(&p)) + /* + OS must use new special XSAVE/XRSTOR instructions to save + AVX registers when it required for context switching. + At OS statring: + OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions. + Also OS sets bitmask in XCR0 register that defines what + registers will be processed by XSAVE instruction: + XCR0.SSE[bit 0] - x87 registers and state + XCR0.SSE[bit 1] - SSE registers and state + XCR0.AVX[bit 2] - AVX registers and state + CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27]. + So we can read that bit in user-space. + XCR0 is available for reading in user-space by new XGETBV instruction. + */ + { + const UInt32 c = x86cpuid_Func_1_ECX(); + if (0 == (1 + & (c >> 28) // AVX instructions are supported by hardware + & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS. + return False; + } + + /* also we can check + CPUID.1:ECX.XSAVE [bit 26] : that shows that + XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware. + But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */ + + /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1), + in most cases we expect that OS also will support storing/restoring + for AVX and SSE states at least. + But to be ensure for that we call user-space instruction + XGETBV(0) to get XCR0 value that contains bitmask that defines + what exact states(registers) OS have enabled for storing/restoring. + */ + + { + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=%d\n", bm); + return 1 + & (bm >> 1) // SSE state is supported (set by OS) for storing/restoring + & (bm >> 2); // AVX state is supported (set by OS) for storing/restoring + } + // since Win7SP1: we can use GetEnabledXStateFeatures(); +} + + +BoolInt CPU_IsSupported_AVX2(void) +{ + if (!CPU_IsSupported_AVX()) return False; - if (p.maxFunc < 7) + if (z7_x86_cpuid_GetMaxFunc() < 7) return False; { - UInt32 d[4] = { 0 }; - MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + UInt32 d[4]; + z7_x86_cpuid(d, 7); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); return 1 & (d[1] >> 5); // avx2 } } -BoolInt CPU_IsSupported_VAES_AVX2() +BoolInt CPU_IsSupported_VAES_AVX2(void) { - Cx86cpuid p; - CHECK_SYS_SSE_SUPPORT - - #ifdef _WIN32 - #define MY__PF_XSAVE_ENABLED 17 - if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + if (!CPU_IsSupported_AVX()) return False; - #endif - - if (!x86cpuid_CheckAndRead(&p)) - return False; - if (p.maxFunc < 7) + if (z7_x86_cpuid_GetMaxFunc() < 7) return False; { - UInt32 d[4] = { 0 }; - MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + UInt32 d[4]; + z7_x86_cpuid(d, 7); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); return 1 & (d[1] >> 5) // avx2 @@ -328,20 +679,15 @@ BoolInt CPU_IsSupported_VAES_AVX2() } } -BoolInt CPU_IsSupported_PageGB() +BoolInt CPU_IsSupported_PageGB(void) { - Cx86cpuid cpuid; - if (!x86cpuid_CheckAndRead(&cpuid)) - return False; + CHECK_CPUID_IS_SUPPORTED { - UInt32 d[4] = { 0 }; - MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]); + UInt32 d[4]; + z7_x86_cpuid(d, 0x80000000); if (d[0] < 0x80000001) return False; - } - { - UInt32 d[4] = { 0 }; - MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]); + z7_x86_cpuid(d, 0x80000001); return (d[3] >> 26) & 1; } } @@ -351,11 +697,11 @@ BoolInt CPU_IsSupported_PageGB() #ifdef _WIN32 -#include <Windows.h> +#include "7zWindows.h" -BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } -BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } -BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } #else @@ -378,28 +724,27 @@ static void Print_sysctlbyname(const char *name) } } */ +/* + Print_sysctlbyname("hw.pagesize"); + Print_sysctlbyname("machdep.cpu.brand_string"); +*/ -static BoolInt My_sysctlbyname_Get_BoolInt(const char *name) +static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name) { UInt32 val = 0; - if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) + if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) return 1; return 0; } - /* - Print_sysctlbyname("hw.pagesize"); - Print_sysctlbyname("machdep.cpu.brand_string"); - */ - BoolInt CPU_IsSupported_CRC32(void) { - return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); } BoolInt CPU_IsSupported_NEON(void) { - return My_sysctlbyname_Get_BoolInt("hw.optional.neon"); + return z7_sysctlbyname_Get_BoolInt("hw.optional.neon"); } #ifdef MY_CPU_ARM64 @@ -461,15 +806,15 @@ MY_HWCAP_CHECK_FUNC (AES) #include <sys/sysctl.h> -int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) +int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) { return sysctlbyname(name, buf, bufSize, NULL, 0); } -int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) +int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) { size_t bufSize = sizeof(*val); - int res = My_sysctlbyname_Get(name, val, &bufSize); + const int res = z7_sysctlbyname_Get(name, val, &bufSize); if (res == 0 && bufSize != sizeof(*val)) return EFAULT; return res; diff --git a/src/Common/lzma/CpuArch.h b/src/Common/lzma/CpuArch.h index 529d3a50..8e5d8a54 100644 --- a/src/Common/lzma/CpuArch.h +++ b/src/Common/lzma/CpuArch.h @@ -1,8 +1,8 @@ /* CpuArch.h -- CPU specific code -2021-07-13 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __CPU_ARCH_H -#define __CPU_ARCH_H +#ifndef ZIP7_INC_CPU_ARCH_H +#define ZIP7_INC_CPU_ARCH_H #include "7zTypes.h" @@ -51,7 +51,13 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. || defined(__AARCH64EB__) \ || defined(__aarch64__) #define MY_CPU_ARM64 - #define MY_CPU_NAME "arm64" + #ifdef __ILP32__ + #define MY_CPU_NAME "arm64-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "arm64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif #define MY_CPU_64BIT #endif @@ -68,8 +74,10 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define MY_CPU_ARM #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) + #define MY_CPU_ARMT #define MY_CPU_NAME "armt" #else + #define MY_CPU_ARM32 #define MY_CPU_NAME "arm" #endif /* #define MY_CPU_32BIT */ @@ -103,6 +111,8 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. || defined(__PPC__) \ || defined(_POWER) +#define MY_CPU_PPC_OR_PPC64 + #if defined(__ppc64__) \ || defined(__powerpc64__) \ || defined(_LP64) \ @@ -123,12 +133,15 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif -#if defined(__sparc64__) - #define MY_CPU_NAME "sparc64" - #define MY_CPU_64BIT -#elif defined(__sparc__) - #define MY_CPU_NAME "sparc" - /* #define MY_CPU_32BIT */ +#if defined(__riscv) \ + || defined(__riscv__) + #if __riscv_xlen == 32 + #define MY_CPU_NAME "riscv32" + #elif __riscv_xlen == 64 + #define MY_CPU_NAME "riscv64" + #else + #define MY_CPU_NAME "riscv" + #endif #endif @@ -194,6 +207,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #error Stop_Compiling_Bad_Endian #endif +#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE) + #error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time +#endif #if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) #error Stop_Compiling_Bad_32_64_BIT @@ -250,6 +266,67 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. +#ifdef __has_builtin + #define Z7_has_builtin(x) __has_builtin(x) +#else + #define Z7_has_builtin(x) 0 +#endif + + +#define Z7_BSWAP32_CONST(v) \ + ( (((UInt32)(v) << 24) ) \ + | (((UInt32)(v) << 8) & (UInt32)0xff0000) \ + | (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \ + | (((UInt32)(v) >> 24) )) + + +#if defined(_MSC_VER) && (_MSC_VER >= 1300) + +#include <stdlib.h> + +/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */ + +#pragma intrinsic(_byteswap_ushort) +#pragma intrinsic(_byteswap_ulong) +#pragma intrinsic(_byteswap_uint64) + +#define Z7_BSWAP16(v) _byteswap_ushort(v) +#define Z7_BSWAP32(v) _byteswap_ulong (v) +#define Z7_BSWAP64(v) _byteswap_uint64(v) +#define Z7_CPU_FAST_BSWAP_SUPPORTED + +#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) + +#define Z7_BSWAP16(v) __builtin_bswap16(v) +#define Z7_BSWAP32(v) __builtin_bswap32(v) +#define Z7_BSWAP64(v) __builtin_bswap64(v) +#define Z7_CPU_FAST_BSWAP_SUPPORTED + +#else + +#define Z7_BSWAP16(v) ((UInt16) \ + ( ((UInt32)(v) << 8) \ + | ((UInt32)(v) >> 8) \ + )) + +#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v) + +#define Z7_BSWAP64(v) \ + ( ( ( (UInt64)(v) ) << 8 * 7 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \ + | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \ + | ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \ + | ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \ + | ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \ + | ( ( (UInt64)(v) >> 8 * 7 ) ) \ + ) + +#endif + + + #ifdef MY_CPU_LE #if defined(MY_CPU_X86_OR_AMD64) \ || defined(MY_CPU_ARM64) @@ -269,13 +346,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define GetUi32(p) (*(const UInt32 *)(const void *)(p)) #ifdef MY_CPU_LE_UNALIGN_64 #define GetUi64(p) (*(const UInt64 *)(const void *)(p)) +#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } #endif #define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } #define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } -#ifdef MY_CPU_LE_UNALIGN_64 -#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } -#endif #else @@ -302,51 +377,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif -#ifndef MY_CPU_LE_UNALIGN_64 - +#ifndef GetUi64 #define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) +#endif +#ifndef SetUi64 #define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ - SetUi32(_ppp2_ , (UInt32)_vvv2_); \ - SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); } - + SetUi32(_ppp2_ , (UInt32)_vvv2_) \ + SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) } #endif +#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) +#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p)) +#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); } -#ifdef __has_builtin - #define MY__has_builtin(x) __has_builtin(x) -#else - #define MY__has_builtin(x) 0 +#if defined(MY_CPU_LE_UNALIGN_64) +#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p)) #endif -#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300) - -/* Note: we use bswap instruction, that is unsupported in 386 cpu */ - -#include <stdlib.h> - -#pragma intrinsic(_byteswap_ushort) -#pragma intrinsic(_byteswap_ulong) -#pragma intrinsic(_byteswap_uint64) - -/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */ -#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p)) -#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p)) - -#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v) - -#elif defined(MY_CPU_LE_UNALIGN) && ( \ - (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ - || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) ) - -/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */ -#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p)) -#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p)) - -#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v) - #else #define GetBe32(p) ( \ @@ -355,8 +405,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. ((UInt32)((const Byte *)(p))[2] << 8) | \ ((const Byte *)(p))[3] ) -#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) - #define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ _ppp_[0] = (Byte)(_vvv_ >> 24); \ _ppp_[1] = (Byte)(_vvv_ >> 16); \ @@ -365,50 +413,83 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif +#ifndef GetBe64 +#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) +#endif #ifndef GetBe16 - #define GetBe16(p) ( (UInt16) ( \ ((UInt16)((const Byte *)(p))[0] << 8) | \ ((const Byte *)(p))[1] )) +#endif + +#if defined(MY_CPU_BE) +#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v) +#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) +#define Z7_CONV_NATIVE_TO_BE_32(v) (v) +#elif defined(MY_CPU_LE) +#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) +#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v) +#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v) +#else +#error Stop_Compiling_Unknown_Endian_CONV #endif +#if defined(MY_CPU_BE) -#ifdef MY_CPU_X86_OR_AMD64 +#define GetBe32a(p) (*(const UInt32 *)(const void *)(p)) +#define GetBe16a(p) (*(const UInt16 *)(const void *)(p)) +#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); } -typedef struct -{ - UInt32 maxFunc; - UInt32 vendor[3]; - UInt32 ver; - UInt32 b; - UInt32 c; - UInt32 d; -} Cx86cpuid; +#define GetUi32a(p) GetUi32(p) +#define GetUi16a(p) GetUi16(p) +#define SetUi32a(p, v) SetUi32(p, v) +#define SetUi16a(p, v) SetUi16(p, v) -enum -{ - CPU_FIRM_INTEL, - CPU_FIRM_AMD, - CPU_FIRM_VIA -}; +#elif defined(MY_CPU_LE) -void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d); +#define GetUi32a(p) (*(const UInt32 *)(const void *)(p)) +#define GetUi16a(p) (*(const UInt16 *)(const void *)(p)) +#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); } +#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); } -BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p); -int x86cpuid_GetFirm(const Cx86cpuid *p); +#define GetBe32a(p) GetBe32(p) +#define GetBe16a(p) GetBe16(p) +#define SetBe32a(p, v) SetBe32(p, v) +#define SetBe16a(p, v) SetBe16(p, v) -#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF)) -#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF)) -#define x86cpuid_GetStepping(ver) (ver & 0xF) +#else +#error Stop_Compiling_Unknown_Endian_CPU_a +#endif -BoolInt CPU_Is_InOrder(void); + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_OR_ARM64) \ + || defined(MY_CPU_PPC_OR_PPC64) + #define Z7_CPU_FAST_ROTATE_SUPPORTED +#endif + + +#ifdef MY_CPU_X86_OR_AMD64 + +void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function); +UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void); +#if defined(MY_CPU_AMD64) +#define Z7_IF_X86_CPUID_SUPPORTED +#else +#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc()) +#endif BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_AVX(void); BoolInt CPU_IsSupported_AVX2(void); BoolInt CPU_IsSupported_VAES_AVX2(void); +BoolInt CPU_IsSupported_CMOV(void); +BoolInt CPU_IsSupported_SSE(void); +BoolInt CPU_IsSupported_SSE2(void); BoolInt CPU_IsSupported_SSSE3(void); BoolInt CPU_IsSupported_SSE41(void); BoolInt CPU_IsSupported_SHA(void); @@ -433,8 +514,8 @@ BoolInt CPU_IsSupported_AES(void); #endif #if defined(__APPLE__) -int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); -int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); +int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); +int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); #endif EXTERN_C_END diff --git a/src/Common/lzma/LzFind.c b/src/Common/lzma/LzFind.c index 1b73c284..0fbd5aae 100644 --- a/src/Common/lzma/LzFind.c +++ b/src/Common/lzma/LzFind.c @@ -1,5 +1,5 @@ /* LzFind.c -- Match finder for LZ algorithms -2021-11-29 : Igor Pavlov : Public domain */ +2023-03-14 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -17,7 +17,7 @@ #define kEmptyHashValue 0 #define kMaxValForNormalize ((UInt32)0) -// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug +// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses @@ -67,10 +67,10 @@ static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) { - if (!p->directInput) + // if (!p->directInput) { - ISzAlloc_Free(alloc, p->bufferBase); - p->bufferBase = NULL; + ISzAlloc_Free(alloc, p->bufBase); + p->bufBase = NULL; } } @@ -79,7 +79,7 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all { if (blockSize == 0) return 0; - if (!p->bufferBase || p->blockSize != blockSize) + if (!p->bufBase || p->blockSize != blockSize) { // size_t blockSizeT; LzInWindow_Free(p, alloc); @@ -101,11 +101,11 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all #endif */ - p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); - // printf("\nbufferBase = %p\n", p->bufferBase); + p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); + // printf("\nbufferBase = %p\n", p->bufBase); // return 0; // for debug } - return (p->bufferBase != NULL); + return (p->bufBase != NULL); } static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } @@ -113,7 +113,7 @@ static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } -MY_NO_INLINE +Z7_NO_INLINE static void MatchFinder_ReadBlock(CMatchFinder *p) { if (p->streamEndWasReached || p->result != SZ_OK) @@ -127,8 +127,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p); if (curSize > p->directInputRem) curSize = (UInt32)p->directInputRem; - p->directInputRem -= curSize; p->streamPos += curSize; + p->directInputRem -= curSize; if (p->directInputRem == 0) p->streamEndWasReached = 1; return; @@ -136,8 +136,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) for (;;) { - Byte *dest = p->buffer + GET_AVAIL_BYTES(p); - size_t size = (size_t)(p->bufferBase + p->blockSize - dest); + const Byte *dest = p->buffer + GET_AVAIL_BYTES(p); + size_t size = (size_t)(p->bufBase + p->blockSize - dest); if (size == 0) { /* we call ReadBlock() after NeedMove() and MoveBlock(). @@ -153,7 +153,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) // #define kRead 3 // if (size > kRead) size = kRead; // for debug - p->result = ISeqInStream_Read(p->stream, dest, &size); + /* + // we need cast (Byte *)dest. + #ifdef __clang__ + #pragma GCC diagnostic ignored "-Wcast-qual" + #endif + */ + p->result = ISeqInStream_Read(p->stream, + p->bufBase + (dest - p->bufBase), &size); if (p->result != SZ_OK) return; if (size == 0) @@ -173,14 +180,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) -MY_NO_INLINE +Z7_NO_INLINE void MatchFinder_MoveBlock(CMatchFinder *p) { - const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore; + const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore; const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore; - p->buffer = p->bufferBase + keepBefore; - memmove(p->bufferBase, - p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)), + p->buffer = p->bufBase + keepBefore; + memmove(p->bufBase, + p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)), keepBefore + (size_t)GET_AVAIL_BYTES(p)); } @@ -198,7 +205,7 @@ int MatchFinder_NeedMove(CMatchFinder *p) return 0; if (p->streamEndWasReached || p->result != SZ_OK) return 0; - return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); + return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter); } void MatchFinder_ReadIfRequired(CMatchFinder *p) @@ -214,6 +221,8 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p) p->cutValue = 32; p->btMode = 1; p->numHashBytes = 4; + p->numHashBytes_Min = 2; + p->numHashOutBits = 0; p->bigHash = 0; } @@ -222,8 +231,10 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p) void MatchFinder_Construct(CMatchFinder *p) { unsigned i; - p->bufferBase = NULL; + p->buffer = NULL; + p->bufBase = NULL; p->directInput = 0; + p->stream = NULL; p->hash = NULL; p->expectedDataSize = (UInt64)(Int64)-1; MatchFinder_SetDefaultSettings(p); @@ -238,6 +249,8 @@ void MatchFinder_Construct(CMatchFinder *p) } } +#undef kCrcPoly + static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->hash); @@ -252,7 +265,7 @@ void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) { - size_t sizeInBytes = (size_t)num * sizeof(CLzRef); + const size_t sizeInBytes = (size_t)num * sizeof(CLzRef); if (sizeInBytes / sizeof(CLzRef) != num) return NULL; return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); @@ -298,6 +311,62 @@ static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize) } +// input is historySize +static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs) +{ + if (p->numHashBytes == 2) + return (1 << 16) - 1; + if (hs != 0) + hs--; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later + if (hs >= (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ + } + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; + return hs; +} + +// input is historySize +static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs) +{ + if (p->numHashBytes == 2) + return (1 << 16) - 1; + if (hs != 0) + hs--; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later + hs >>= 1; + if (hs >= (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + else + hs >>= 1; + /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ + } + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; + return hs; +} + + int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc) @@ -318,78 +387,91 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, p->blockSize = 0; if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc)) { - const UInt32 newCyclicBufferSize = historySize + 1; // do not change it - UInt32 hs; - p->matchMaxLen = matchMaxLen; + size_t hashSizeSum; { - // UInt32 hs4; - p->fixedHashSize = 0; - hs = (1 << 16) - 1; - if (p->numHashBytes != 2) + UInt32 hs; + UInt32 hsCur; + + if (p->numHashOutBits != 0) { - hs = historySize; - if (hs > p->expectedDataSize) - hs = (UInt32)p->expectedDataSize; - if (hs != 0) - hs--; - hs |= (hs >> 1); - hs |= (hs >> 2); - hs |= (hs >> 4); - hs |= (hs >> 8); - // we propagated 16 bits in (hs). Low 16 bits must be set later - hs >>= 1; - if (hs >= (1 << 24)) - { - if (p->numHashBytes == 3) - hs = (1 << 24) - 1; - else - hs >>= 1; - /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ - } - - // hs = ((UInt32)1 << 25) - 1; // for test - + unsigned numBits = p->numHashOutBits; + const unsigned nbMax = + (p->numHashBytes == 2 ? 16 : + (p->numHashBytes == 3 ? 24 : 32)); + if (numBits > nbMax) + numBits = nbMax; + if (numBits >= 32) + hs = (UInt32)0 - 1; + else + hs = ((UInt32)1 << numBits) - 1; // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) hs |= (1 << 16) - 1; /* don't change it! */ - - // bt5: we adjust the size with recommended minimum size if (p->numHashBytes >= 5) hs |= (256 << kLzHash_CrcShift_2) - 1; + { + const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize); + if (hs > hs2) + hs = hs2; + } + hsCur = hs; + if (p->expectedDataSize < historySize) + { + const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize); + if (hsCur > hs2) + hsCur = hs2; + } } - p->hashMask = hs; - hs++; - - /* - hs4 = (1 << 20); - if (hs4 > hs) - hs4 = hs; - // hs4 = (1 << 16); // for test - p->hash4Mask = hs4 - 1; - */ + else + { + hs = MatchFinder_GetHashMask(p, historySize); + hsCur = hs; + if (p->expectedDataSize < historySize) + { + hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize); + if (hsCur > hs) // is it possible? + hsCur = hs; + } + } + + p->hashMask = hsCur; - if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; - if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; - // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; - hs += p->fixedHashSize; + hashSizeSum = hs; + hashSizeSum++; + if (hashSizeSum < hs) + return 0; + { + UInt32 fixedHashSize = 0; + if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size; + if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size; + // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; + hashSizeSum += fixedHashSize; + p->fixedHashSize = fixedHashSize; + } } + p->matchMaxLen = matchMaxLen; + { size_t newSize; size_t numSons; + const UInt32 newCyclicBufferSize = historySize + 1; // do not change it p->historySize = historySize; - p->hashSizeSum = hs; p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1) numSons = newCyclicBufferSize; if (p->btMode) numSons <<= 1; - newSize = hs + numSons; + newSize = hashSizeSum + numSons; + + if (numSons < newCyclicBufferSize || newSize < numSons) + return 0; // aligned size is not required here, but it can be better for some loops #define NUM_REFS_ALIGN_MASK 0xF newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; - if (p->hash && p->numRefs == newSize) + // 22.02: we don't reallocate buffer, if old size is enough + if (p->hash && p->numRefs >= newSize) return 1; MatchFinder_FreeThisClassMemory(p, alloc); @@ -398,7 +480,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, if (p->hash) { - p->son = p->hash + p->hashSizeSum; + p->son = p->hash + hashSizeSum; return 1; } } @@ -470,7 +552,8 @@ void MatchFinder_Init_HighHash(CMatchFinder *p) void MatchFinder_Init_4(CMatchFinder *p) { - p->buffer = p->bufferBase; + if (!p->directInput) + p->buffer = p->bufBase; { /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker. the code in CMatchFinderMt expects (pos = 1) */ @@ -507,20 +590,20 @@ void MatchFinder_Init(CMatchFinder *p) #ifdef MY_CPU_X86_OR_AMD64 - #if defined(__clang__) && (__clang_major__ >= 8) \ - || defined(__GNUC__) && (__GNUC__ >= 8) \ - || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) - #define USE_SATUR_SUB_128 - #define USE_AVX2 - #define ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) - #define ATTRIB_AVX2 __attribute__((__target__("avx2"))) + #if defined(__clang__) && (__clang_major__ >= 4) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701) + // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) + + #define USE_LZFIND_SATUR_SUB_128 + #define USE_LZFIND_SATUR_SUB_256 + #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) + #define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2"))) #elif defined(_MSC_VER) #if (_MSC_VER >= 1600) - #define USE_SATUR_SUB_128 - #if (_MSC_VER >= 1900) - #define USE_AVX2 - #include <immintrin.h> // avx - #endif + #define USE_LZFIND_SATUR_SUB_128 + #endif + #if (_MSC_VER >= 1900) + #define USE_LZFIND_SATUR_SUB_256 #endif #endif @@ -529,16 +612,16 @@ void MatchFinder_Init(CMatchFinder *p) #if defined(__clang__) && (__clang_major__ >= 8) \ || defined(__GNUC__) && (__GNUC__ >= 8) - #define USE_SATUR_SUB_128 + #define USE_LZFIND_SATUR_SUB_128 #ifdef MY_CPU_ARM64 - // #define ATTRIB_SSE41 __attribute__((__target__(""))) + // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__(""))) #else - // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) #endif #elif defined(_MSC_VER) #if (_MSC_VER >= 1910) - #define USE_SATUR_SUB_128 + #define USE_LZFIND_SATUR_SUB_128 #endif #endif @@ -550,121 +633,130 @@ void MatchFinder_Init(CMatchFinder *p) #endif -/* -#ifndef ATTRIB_SSE41 - #define ATTRIB_SSE41 -#endif -#ifndef ATTRIB_AVX2 - #define ATTRIB_AVX2 -#endif -*/ -#ifdef USE_SATUR_SUB_128 +#ifdef USE_LZFIND_SATUR_SUB_128 -// #define _SHOW_HW_STATUS +// #define Z7_SHOW_HW_STATUS -#ifdef _SHOW_HW_STATUS +#ifdef Z7_SHOW_HW_STATUS #include <stdio.h> -#define _PRF(x) x -_PRF(;) +#define PRF(x) x +PRF(;) #else -#define _PRF(x) +#define PRF(x) #endif + #ifdef MY_CPU_ARM_OR_ARM64 #ifdef MY_CPU_ARM64 -// #define FORCE_SATUR_SUB_128 +// #define FORCE_LZFIND_SATUR_SUB_128 #endif +typedef uint32x4_t LzFind_v128; +#define SASUB_128_V(v, s) \ + vsubq_u32(vmaxq_u32(v, s), s) -typedef uint32x4_t v128; -#define SASUB_128(i) \ - *(v128 *)(void *)(items + (i) * 4) = \ - vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); - -#else +#else // MY_CPU_ARM_OR_ARM64 #include <smmintrin.h> // sse4.1 -typedef __m128i v128; -#define SASUB_128(i) \ - *(v128 *)(void *)(items + (i) * 4) = \ - _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1 +typedef __m128i LzFind_v128; +// SSE 4.1 +#define SASUB_128_V(v, s) \ + _mm_sub_epi32(_mm_max_epu32(v, s), s) -#endif +#endif // MY_CPU_ARM_OR_ARM64 +#define SASUB_128(i) \ + *( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \ + *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2); + -MY_NO_INLINE +Z7_NO_INLINE static -#ifdef ATTRIB_SSE41 -ATTRIB_SSE41 +#ifdef LZFIND_ATTRIB_SSE41 +LZFIND_ATTRIB_SSE41 #endif void -MY_FAST_CALL +Z7_FASTCALL LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) { - v128 sub2 = + const LzFind_v128 sub2 = #ifdef MY_CPU_ARM_OR_ARM64 vdupq_n_u32(subValue); #else _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); #endif + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE do { - SASUB_128(0) - SASUB_128(1) - SASUB_128(2) - SASUB_128(3) - items += 4 * 4; + SASUB_128(0) SASUB_128(1) items += 2 * 4; + SASUB_128(0) SASUB_128(1) items += 2 * 4; } while (items != lim); } -#ifdef USE_AVX2 +#ifdef USE_LZFIND_SATUR_SUB_256 #include <immintrin.h> // avx +/* +clang :immintrin.h uses +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVX2__) +#include <avx2intrin.h> +#endif +so we need <avxintrin.h> for clang-cl */ -#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2 +#if defined(__clang__) +#include <avxintrin.h> +#include <avx2intrin.h> +#endif -MY_NO_INLINE +// AVX2: +#define SASUB_256(i) \ + *( __m256i *)( void *)(items + (i) * 8) = \ + _mm256_sub_epi32(_mm256_max_epu32( \ + *(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); + +Z7_NO_INLINE static -#ifdef ATTRIB_AVX2 -ATTRIB_AVX2 +#ifdef LZFIND_ATTRIB_AVX2 +LZFIND_ATTRIB_AVX2 #endif void -MY_FAST_CALL +Z7_FASTCALL LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) { - __m256i sub2 = _mm256_set_epi32( + const __m256i sub2 = _mm256_set_epi32( (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE do { - SASUB_256(0) - SASUB_256(1) - items += 2 * 8; + SASUB_256(0) SASUB_256(1) items += 2 * 8; + SASUB_256(0) SASUB_256(1) items += 2 * 8; } while (items != lim); } -#endif // USE_AVX2 +#endif // USE_LZFIND_SATUR_SUB_256 -#ifndef FORCE_SATUR_SUB_128 -typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)( +#ifndef FORCE_LZFIND_SATUR_SUB_128 +typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)( UInt32 subValue, CLzRef *items, const CLzRef *lim); static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; -#endif // FORCE_SATUR_SUB_128 +#endif // FORCE_LZFIND_SATUR_SUB_128 -#endif // USE_SATUR_SUB_128 +#endif // USE_LZFIND_SATUR_SUB_128 // kEmptyHashValue must be zero -// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; -#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; +// #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; } +#define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; } -#ifdef FORCE_SATUR_SUB_128 +#ifdef FORCE_LZFIND_SATUR_SUB_128 #define DEFAULT_SaturSub LzFind_SaturSub_128 @@ -672,24 +764,19 @@ static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; #define DEFAULT_SaturSub LzFind_SaturSub_32 -MY_NO_INLINE +Z7_NO_INLINE static void -MY_FAST_CALL +Z7_FASTCALL LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) { + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE do { - UInt32 v; - SASUB_32(0) - SASUB_32(1) - SASUB_32(2) - SASUB_32(3) - SASUB_32(4) - SASUB_32(5) - SASUB_32(6) - SASUB_32(7) - items += 8; + SASUB_32(0) SASUB_32(1) items += 2; + SASUB_32(0) SASUB_32(1) items += 2; + SASUB_32(0) SASUB_32(1) items += 2; + SASUB_32(0) SASUB_32(1) items += 2; } while (items != lim); } @@ -697,27 +784,23 @@ LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) #endif -MY_NO_INLINE +Z7_NO_INLINE void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) { - #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6) - - CLzRef *lim; - - for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--) + #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7) + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE + for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--) { - UInt32 v; - SASUB_32(0); + SASUB_32(0) items++; } - { - #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1) - lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK); - numItems &= K_NORM_ALIGN_MASK; + const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1); + CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask); + numItems &= k_Align_Mask; if (items != lim) { - #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128) + #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128) if (g_LzFind_SaturSub) g_LzFind_SaturSub(subValue, items, lim); else @@ -726,12 +809,10 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) } items = lim; } - - + Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE for (; numItems != 0; numItems--) { - UInt32 v; - SASUB_32(0); + SASUB_32(0) items++; } } @@ -740,7 +821,7 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) // call MatchFinder_CheckLimits() only after (p->pos++) update -MY_NO_INLINE +Z7_NO_INLINE static void MatchFinder_CheckLimits(CMatchFinder *p) { if (// !p->streamEndWasReached && p->result == SZ_OK && @@ -768,11 +849,14 @@ static void MatchFinder_CheckLimits(CMatchFinder *p) const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */; // const UInt32 subValue = (1 << 15); // for debug // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue); - size_t numSonRefs = p->cyclicBufferSize; - if (p->btMode) - numSonRefs <<= 1; - Inline_MatchFinder_ReduceOffsets(p, subValue); - MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs); + MatchFinder_REDUCE_OFFSETS(p, subValue) + MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize); + { + size_t numSonRefs = p->cyclicBufferSize; + if (p->btMode) + numSonRefs <<= 1; + MatchFinder_Normalize3(subValue, p->son, numSonRefs); + } } if (p->cyclicBufferPos == p->cyclicBufferSize) @@ -785,7 +869,7 @@ static void MatchFinder_CheckLimits(CMatchFinder *p) /* (lenLimit > maxLen) */ -MY_FORCE_INLINE +Z7_FORCE_INLINE static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, UInt32 *d, unsigned maxLen) @@ -867,7 +951,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, } -MY_FORCE_INLINE +Z7_FORCE_INLINE UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, UInt32 *d, UInt32 maxLen) @@ -1004,7 +1088,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const #define MOVE_POS_RET MOVE_POS return distances; -MY_NO_INLINE +Z7_NO_INLINE static void MatchFinder_MovePos(CMatchFinder *p) { /* we go here at the end of stream data, when (avail < num_hash_bytes) @@ -1015,11 +1099,11 @@ static void MatchFinder_MovePos(CMatchFinder *p) if (p->btMode) p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue */ - MOVE_POS; + MOVE_POS } #define GET_MATCHES_HEADER2(minLen, ret_op) \ - unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \ + unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \ lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ cur = p->buffer; @@ -1028,11 +1112,11 @@ static void MatchFinder_MovePos(CMatchFinder *p) #define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue -#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num); +#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num); #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ distances = func(MF_PARAMS(p), \ - distances, (UInt32)_maxLen_); MOVE_POS_RET; + distances, (UInt32)_maxLen_); MOVE_POS_RET #define GET_MATCHES_FOOTER_BT(_maxLen_) \ GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) @@ -1052,7 +1136,7 @@ static void MatchFinder_MovePos(CMatchFinder *p) static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { GET_MATCHES_HEADER(2) - HASH2_CALC; + HASH2_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; GET_MATCHES_FOOTER_BT(1) @@ -1061,7 +1145,7 @@ static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; + HASH_ZIP_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; GET_MATCHES_FOOTER_BT(2) @@ -1082,7 +1166,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32 *hash; GET_MATCHES_HEADER(3) - HASH3_CALC; + HASH3_CALC hash = p->hash; pos = p->pos; @@ -1107,7 +1191,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (maxLen == lenLimit) { SkipMatchesSpec(MF_PARAMS(p)); - MOVE_POS_RET; + MOVE_POS_RET } } @@ -1123,7 +1207,7 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32 *hash; GET_MATCHES_HEADER(4) - HASH4_CALC; + HASH4_CALC hash = p->hash; pos = p->pos; @@ -1190,7 +1274,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32 *hash; GET_MATCHES_HEADER(5) - HASH5_CALC; + HASH5_CALC hash = p->hash; pos = p->pos; @@ -1246,7 +1330,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (maxLen == lenLimit) { SkipMatchesSpec(MF_PARAMS(p)); - MOVE_POS_RET; + MOVE_POS_RET } break; } @@ -1263,7 +1347,7 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32 *hash; GET_MATCHES_HEADER(4) - HASH4_CALC; + HASH4_CALC hash = p->hash; pos = p->pos; @@ -1314,12 +1398,12 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; + MOVE_POS_RET } break; } - GET_MATCHES_FOOTER_HC(maxLen); + GET_MATCHES_FOOTER_HC(maxLen) } @@ -1330,7 +1414,7 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32 *hash; GET_MATCHES_HEADER(5) - HASH5_CALC; + HASH5_CALC hash = p->hash; pos = p->pos; @@ -1386,19 +1470,19 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; + MOVE_POS_RET } break; } - GET_MATCHES_FOOTER_HC(maxLen); + GET_MATCHES_FOOTER_HC(maxLen) } UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; + HASH_ZIP_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; GET_MATCHES_FOOTER_HC(2) @@ -1409,7 +1493,7 @@ static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { SKIP_HEADER(2) { - HASH2_CALC; + HASH2_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; } @@ -1420,7 +1504,7 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { SKIP_HEADER(3) { - HASH_ZIP_CALC; + HASH_ZIP_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; } @@ -1433,7 +1517,7 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { UInt32 h2; UInt32 *hash; - HASH3_CALC; + HASH3_CALC hash = p->hash; curMatch = (hash + kFix3HashSize)[hv]; hash[h2] = @@ -1448,7 +1532,7 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { UInt32 h2, h3; UInt32 *hash; - HASH4_CALC; + HASH4_CALC hash = p->hash; curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = @@ -1464,7 +1548,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { UInt32 h2, h3; UInt32 *hash; - HASH5_CALC; + HASH5_CALC hash = p->hash; curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = @@ -1478,7 +1562,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) #define HC_SKIP_HEADER(minLen) \ do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \ - Byte *cur; \ + const Byte *cur; \ UInt32 *hash; \ UInt32 *son; \ UInt32 pos = p->pos; \ @@ -1510,7 +1594,7 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) HC_SKIP_HEADER(4) UInt32 h2, h3; - HASH4_CALC; + HASH4_CALC curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = @@ -1540,7 +1624,7 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { HC_SKIP_HEADER(3) - HASH_ZIP_CALC; + HASH_ZIP_CALC curMatch = hash[hv]; hash[hv] = pos; @@ -1590,17 +1674,17 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) -void LzFindPrepare() +void LzFindPrepare(void) { - #ifndef FORCE_SATUR_SUB_128 - #ifdef USE_SATUR_SUB_128 + #ifndef FORCE_LZFIND_SATUR_SUB_128 + #ifdef USE_LZFIND_SATUR_SUB_128 LZFIND_SATUR_SUB_CODE_FUNC f = NULL; #ifdef MY_CPU_ARM_OR_ARM64 { if (CPU_IsSupported_NEON()) { // #pragma message ("=== LzFind NEON") - _PRF(printf("\n=== LzFind NEON\n")); + PRF(printf("\n=== LzFind NEON\n")); f = LzFind_SaturSub_128; } // f = 0; // for debug @@ -1609,20 +1693,25 @@ void LzFindPrepare() if (CPU_IsSupported_SSE41()) { // #pragma message ("=== LzFind SSE41") - _PRF(printf("\n=== LzFind SSE41\n")); + PRF(printf("\n=== LzFind SSE41\n")); f = LzFind_SaturSub_128; - #ifdef USE_AVX2 + #ifdef USE_LZFIND_SATUR_SUB_256 if (CPU_IsSupported_AVX2()) { // #pragma message ("=== LzFind AVX2") - _PRF(printf("\n=== LzFind AVX2\n")); + PRF(printf("\n=== LzFind AVX2\n")); f = LzFind_SaturSub_256; } #endif } #endif // MY_CPU_ARM_OR_ARM64 g_LzFind_SaturSub = f; - #endif // USE_SATUR_SUB_128 - #endif // FORCE_SATUR_SUB_128 + #endif // USE_LZFIND_SATUR_SUB_128 + #endif // FORCE_LZFIND_SATUR_SUB_128 } + + +#undef MOVE_POS +#undef MOVE_POS_RET +#undef PRF diff --git a/src/Common/lzma/LzFind.h b/src/Common/lzma/LzFind.h index eea873ff..a3f72c98 100644 --- a/src/Common/lzma/LzFind.h +++ b/src/Common/lzma/LzFind.h @@ -1,8 +1,8 @@ /* LzFind.h -- Match finder for LZ algorithms -2021-07-13 : Igor Pavlov : Public domain */ +2023-03-04 : Igor Pavlov : Public domain */ -#ifndef __LZ_FIND_H -#define __LZ_FIND_H +#ifndef ZIP7_INC_LZ_FIND_H +#define ZIP7_INC_LZ_FIND_H #include "7zTypes.h" @@ -10,9 +10,9 @@ EXTERN_C_BEGIN typedef UInt32 CLzRef; -typedef struct _CMatchFinder +typedef struct { - Byte *buffer; + const Byte *buffer; UInt32 pos; UInt32 posLimit; UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */ @@ -32,8 +32,8 @@ typedef struct _CMatchFinder UInt32 hashMask; UInt32 cutValue; - Byte *bufferBase; - ISeqInStream *stream; + Byte *bufBase; + ISeqInStreamPtr stream; UInt32 blockSize; UInt32 keepSizeBefore; @@ -43,7 +43,9 @@ typedef struct _CMatchFinder size_t directInputRem; UInt32 historySize; UInt32 fixedHashSize; - UInt32 hashSizeSum; + Byte numHashBytes_Min; + Byte numHashOutBits; + Byte _pad2_[2]; SRes result; UInt32 crc[256]; size_t numRefs; @@ -69,24 +71,45 @@ void MatchFinder_ReadIfRequired(CMatchFinder *p); void MatchFinder_Construct(CMatchFinder *p); -/* Conditions: - historySize <= 3 GB - keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB +/* (directInput = 0) is default value. + It's required to provide correct (directInput) value + before calling MatchFinder_Create(). + You can set (directInput) by any of the following calls: + - MatchFinder_SET_DIRECT_INPUT_BUF() + - MatchFinder_SET_STREAM() + - MatchFinder_SET_STREAM_MODE() */ + +#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \ + (p)->stream = NULL; \ + (p)->directInput = 1; \ + (p)->buffer = (_src_); \ + (p)->directInputRem = (_srcLen_); } + +/* +#define MatchFinder_SET_STREAM_MODE(p) { \ + (p)->directInput = 0; } +*/ + +#define MatchFinder_SET_STREAM(p, _stream_) { \ + (p)->stream = _stream_; \ + (p)->directInput = 0; } + + int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc); void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); -// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); /* -#define Inline_MatchFinder_InitPos(p, val) \ +#define MatchFinder_INIT_POS(p, val) \ (p)->pos = (val); \ (p)->streamPos = (val); */ -#define Inline_MatchFinder_ReduceOffsets(p, subValue) \ +// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); +#define MatchFinder_REDUCE_OFFSETS(p, subValue) \ (p)->pos -= (subValue); \ (p)->streamPos -= (subValue); @@ -107,7 +130,7 @@ typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances); typedef void (*Mf_Skip_Func)(void *object, UInt32); -typedef struct _IMatchFinder +typedef struct { Mf_Init_Func Init; Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; diff --git a/src/Common/lzma/LzFindMt.c b/src/Common/lzma/LzFindMt.c index 4e67fc3f..5253e6eb 100644 --- a/src/Common/lzma/LzFindMt.c +++ b/src/Common/lzma/LzFindMt.c @@ -1,5 +1,5 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2021-12-21 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -69,7 +69,7 @@ extern UInt64 g_NumIters_Bytes; UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } -#define __MT_HASH4_CALC { \ +#define MT_HASH4_CALC { \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ h2 = temp & (kHash2Size - 1); \ temp ^= ((UInt32)cur[2] << 8); \ @@ -79,14 +79,14 @@ extern UInt64 g_NumIters_Bytes; */ -MY_NO_INLINE +Z7_NO_INLINE static void MtSync_Construct(CMtSync *p) { p->affinity = 0; p->wasCreated = False; p->csWasInitialized = False; p->csWasEntered = False; - Thread_Construct(&p->thread); + Thread_CONSTRUCT(&p->thread) Event_Construct(&p->canStart); Event_Construct(&p->wasStopped); Semaphore_Construct(&p->freeSemaphore); @@ -116,7 +116,7 @@ static void MtSync_Construct(CMtSync *p) (p)->csWasEntered = False; } -MY_NO_INLINE +Z7_NO_INLINE static UInt32 MtSync_GetNextBlock(CMtSync *p) { UInt32 numBlocks = 0; @@ -140,14 +140,14 @@ static UInt32 MtSync_GetNextBlock(CMtSync *p) // buffer is UNLOCKED here Semaphore_Wait(&p->filledSemaphore); - LOCK_BUFFER(p); + LOCK_BUFFER(p) return numBlocks; } /* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */ -MY_NO_INLINE +Z7_NO_INLINE static void MtSync_StopWriting(CMtSync *p) { if (!Thread_WasCreated(&p->thread) || p->needStart) @@ -185,7 +185,7 @@ static void MtSync_StopWriting(CMtSync *p) } -MY_NO_INLINE +Z7_NO_INLINE static void MtSync_Destruct(CMtSync *p) { PRF(printf("\nMtSync_Destruct %p\n", p)); @@ -220,11 +220,11 @@ static void MtSync_Destruct(CMtSync *p) // #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } // we want to get real system error codes here instead of SZ_ERROR_THREAD -#define RINOK_THREAD(x) RINOK(x) +#define RINOK_THREAD(x) RINOK_WRes(x) // call it before each new file (when new starting is required): -MY_NO_INLINE +Z7_NO_INLINE static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks) { WRes wres; @@ -245,12 +245,12 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void * if (p->wasCreated) return SZ_OK; - RINOK_THREAD(CriticalSection_Init(&p->cs)); + RINOK_THREAD(CriticalSection_Init(&p->cs)) p->csWasInitialized = True; p->csWasEntered = False; - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)) + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)) p->needStart = True; p->exit = True; /* p->exit is unused before (canStart) Event. @@ -264,13 +264,13 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void * else wres = Thread_Create(&p->thread, startAddress, obj); - RINOK_THREAD(wres); + RINOK_THREAD(wres) p->wasCreated = True; return SZ_OK; } -MY_NO_INLINE +Z7_NO_INLINE static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) { const WRes wres = MtSync_Create_WRes(p, startAddress, obj); @@ -519,7 +519,7 @@ static void HashThreadFunc(CMatchFinderMt *mt) if (mf->pos > (UInt32)kMtMaxValForNormalize - num) { const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); - Inline_MatchFinder_ReduceOffsets(mf, subValue); + MatchFinder_REDUCE_OFFSETS(mf, subValue) MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); } @@ -560,7 +560,7 @@ static void HashThreadFunc(CMatchFinderMt *mt) */ -UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, +UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 *posRes); @@ -749,7 +749,7 @@ static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) } -MY_NO_INLINE +Z7_NO_INLINE static void BtThreadFunc(CMatchFinderMt *mt) { CMtSync *p = &mt->btSync; @@ -864,15 +864,15 @@ SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddB if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) return SZ_ERROR_MEM; - RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p)); - RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p)); + RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p)) + RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p)) return SZ_OK; } SRes MatchFinderMt_InitMt(CMatchFinderMt *p) { - RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)); + RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)) return MtSync_Init(&p->btSync, kMtBtNumBlocks); } @@ -941,7 +941,7 @@ void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) } -MY_NO_INLINE +Z7_NO_INLINE static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) { if (p->failure_LZ_BT) @@ -1163,7 +1163,7 @@ UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d) */ -static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) +static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) { UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */; UInt32 *hash = p->hash; @@ -1179,9 +1179,8 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) (hash + kFix3HashSize)[h3] = m; // (hash + kFix4HashSize)[h4] = m; - #define _USE_H2 - - #ifdef _USE_H2 + // #define BT5_USE_H2 + // #ifdef BT5_USE_H2 if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) { d[1] = m - c2 - 1; @@ -1197,8 +1196,8 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) } d[0] = 3; d += 2; - - #ifdef _USE_H4 + + #ifdef BT5_USE_H4 if (c4 >= matchMinPos) if ( cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && @@ -1214,7 +1213,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) d[0] = 2; d += 2; } - #endif + // #endif if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) { @@ -1228,7 +1227,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) d += 2; } - #ifdef _USE_H4 + #ifdef BT5_USE_H4 if (c4 >= matchMinPos) if ( cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && @@ -1244,7 +1243,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) } -static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) +static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) { const UInt32 *bt = p->btBufPos; const UInt32 len = *bt++; @@ -1268,7 +1267,7 @@ static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) -static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) +static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) { const UInt32 *bt = p->btBufPos; UInt32 len = *bt++; @@ -1398,3 +1397,10 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable) break; } } + +#undef RINOK_THREAD +#undef PRF +#undef MF +#undef GetUi24hi_from32 +#undef LOCK_BUFFER +#undef UNLOCK_BUFFER diff --git a/src/Common/lzma/LzFindMt.h b/src/Common/lzma/LzFindMt.h index 660b7244..db5923ea 100644 --- a/src/Common/lzma/LzFindMt.h +++ b/src/Common/lzma/LzFindMt.h @@ -1,15 +1,15 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2021-07-12 : Igor Pavlov : Public domain */ +2023-03-05 : Igor Pavlov : Public domain */ -#ifndef __LZ_FIND_MT_H -#define __LZ_FIND_MT_H +#ifndef ZIP7_INC_LZ_FIND_MT_H +#define ZIP7_INC_LZ_FIND_MT_H #include "LzFind.h" #include "Threads.h" EXTERN_C_BEGIN -typedef struct _CMtSync +typedef struct { UInt32 numProcessedBlocks; CThread thread; @@ -39,7 +39,7 @@ typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distance typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); -typedef struct _CMatchFinderMt +typedef struct { /* LZ */ const Byte *pointerToCurPos; diff --git a/src/Common/lzma/LzFindOpt.c b/src/Common/lzma/LzFindOpt.c index 8ff006e0..85bdc136 100644 --- a/src/Common/lzma/LzFindOpt.c +++ b/src/Common/lzma/LzFindOpt.c @@ -1,5 +1,5 @@ /* LzFindOpt.c -- multithreaded Match finder for LZ algorithms -2021-07-13 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -41,8 +41,8 @@ UInt64 g_NumIters_Bytes; // #define CYC_TO_POS_OFFSET 1 // for debug /* -MY_NO_INLINE -UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, +Z7_NO_INLINE +UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes) { do @@ -214,13 +214,13 @@ else to eliminate "movsx" BUG in old MSVC x64 compiler. */ -UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, +UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 *posRes); -MY_NO_INLINE -UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, +Z7_NO_INLINE +UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 *posRes) @@ -404,7 +404,7 @@ else /* typedef UInt32 uint32plus; // size_t -UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son, +UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 *posRes) diff --git a/src/Common/lzma/LzHash.h b/src/Common/lzma/LzHash.h index 77b898cf..2b6290b6 100644 --- a/src/Common/lzma/LzHash.h +++ b/src/Common/lzma/LzHash.h @@ -1,8 +1,8 @@ -/* LzHash.h -- HASH functions for LZ algorithms -2019-10-30 : Igor Pavlov : Public domain */ +/* LzHash.h -- HASH constants for LZ algorithms +2023-03-05 : Igor Pavlov : Public domain */ -#ifndef __LZ_HASH_H -#define __LZ_HASH_H +#ifndef ZIP7_INC_LZ_HASH_H +#define ZIP7_INC_LZ_HASH_H /* (kHash2Size >= (1 << 8)) : Required diff --git a/src/Common/lzma/LzmaDec.c b/src/Common/lzma/LzmaDec.c index d6742e5a..69bb8bba 100644 --- a/src/Common/lzma/LzmaDec.c +++ b/src/Common/lzma/LzmaDec.c @@ -1,5 +1,5 @@ /* LzmaDec.c -- LZMA Decoder -2021-04-01 : Igor Pavlov : Public domain */ +2023-04-07 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -8,15 +8,15 @@ /* #include "CpuArch.h" */ #include "LzmaDec.h" -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) +// #define kNumTopBits 24 +#define kTopValue ((UInt32)1 << 24) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) #define RC_INIT_SIZE 5 -#ifndef _LZMA_DEC_OPT +#ifndef Z7_LZMA_DEC_OPT #define kNumMoveBits 5 #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } @@ -25,14 +25,14 @@ #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ - { UPDATE_0(p); i = (i + i); A0; } else \ - { UPDATE_1(p); i = (i + i) + 1; A1; } + { UPDATE_0(p) i = (i + i); A0; } else \ + { UPDATE_1(p) i = (i + i) + 1; A1; } #define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } #define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ - { UPDATE_0(p + i); A0; } else \ - { UPDATE_1(p + i); A1; } + { UPDATE_0(p + i) A0; } else \ + { UPDATE_1(p + i) A1; } #define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) #define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) #define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) @@ -40,19 +40,19 @@ #define TREE_DECODE(probs, limit, i) \ { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } -/* #define _LZMA_SIZE_OPT */ +/* #define Z7_LZMA_SIZE_OPT */ -#ifdef _LZMA_SIZE_OPT +#ifdef Z7_LZMA_SIZE_OPT #define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) #else #define TREE_6_DECODE(probs, i) \ { i = 1; \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ + TREE_GET_BIT(probs, i) \ i -= 0x40; } #endif @@ -64,25 +64,25 @@ probLit = prob + (offs + bit + symbol); \ GET_BIT2(probLit, symbol, offs ^= bit; , ;) -#endif // _LZMA_DEC_OPT +#endif // Z7_LZMA_DEC_OPT #define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } -#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) #define UPDATE_0_CHECK range = bound; #define UPDATE_1_CHECK range -= bound; code -= bound; #define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ - { UPDATE_0_CHECK; i = (i + i); A0; } else \ - { UPDATE_1_CHECK; i = (i + i) + 1; A1; } + { UPDATE_0_CHECK i = (i + i); A0; } else \ + { UPDATE_1_CHECK i = (i + i) + 1; A1; } #define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) #define TREE_DECODE_CHECK(probs, limit, i) \ { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } #define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ - { UPDATE_0_CHECK; i += m; m += m; } else \ - { UPDATE_1_CHECK; m += m; i += m; } + { UPDATE_0_CHECK i += m; m += m; } else \ + { UPDATE_1_CHECK m += m; i += m; } #define kNumPosBitsMax 4 @@ -224,14 +224,14 @@ Out: */ -#ifdef _LZMA_DEC_OPT +#ifdef Z7_LZMA_DEC_OPT -int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); +int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); #else static -int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { CLzmaProb *probs = GET_PROBS; unsigned state = (unsigned)p->state; @@ -263,7 +263,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit IF_BIT_0(prob) { unsigned symbol; - UPDATE_0(prob); + UPDATE_0(prob) prob = probs + Literal; if (processedPos != 0 || checkDicSize != 0) prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); @@ -273,7 +273,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit { state -= (state < 4) ? state : 3; symbol = 1; - #ifdef _LZMA_SIZE_OPT + #ifdef Z7_LZMA_SIZE_OPT do { NORMAL_LITER_DEC } while (symbol < 0x100); #else NORMAL_LITER_DEC @@ -292,7 +292,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit unsigned offs = 0x100; state -= (state < 10) ? 3 : 6; symbol = 1; - #ifdef _LZMA_SIZE_OPT + #ifdef Z7_LZMA_SIZE_OPT do { unsigned bit; @@ -321,25 +321,25 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit } { - UPDATE_1(prob); + UPDATE_1(prob) prob = probs + IsRep + state; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) state += kNumStates; prob = probs + LenCoder; } else { - UPDATE_1(prob); + UPDATE_1(prob) prob = probs + IsRepG0 + state; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) prob = probs + IsRep0Long + COMBINED_PS_STATE; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) // that case was checked before with kBadRepCode // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } @@ -353,30 +353,30 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit state = state < kNumLitStates ? 9 : 11; continue; } - UPDATE_1(prob); + UPDATE_1(prob) } else { UInt32 distance; - UPDATE_1(prob); + UPDATE_1(prob) prob = probs + IsRepG1 + state; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) distance = rep1; } else { - UPDATE_1(prob); + UPDATE_1(prob) prob = probs + IsRepG2 + state; IF_BIT_0(prob) { - UPDATE_0(prob); + UPDATE_0(prob) distance = rep2; } else { - UPDATE_1(prob); + UPDATE_1(prob) distance = rep3; rep3 = rep2; } @@ -389,37 +389,37 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit prob = probs + RepLenCoder; } - #ifdef _LZMA_SIZE_OPT + #ifdef Z7_LZMA_SIZE_OPT { unsigned lim, offset; CLzmaProb *probLen = prob + LenChoice; IF_BIT_0(probLen) { - UPDATE_0(probLen); + UPDATE_0(probLen) probLen = prob + LenLow + GET_LEN_STATE; offset = 0; lim = (1 << kLenNumLowBits); } else { - UPDATE_1(probLen); + UPDATE_1(probLen) probLen = prob + LenChoice2; IF_BIT_0(probLen) { - UPDATE_0(probLen); + UPDATE_0(probLen) probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); offset = kLenNumLowSymbols; lim = (1 << kLenNumLowBits); } else { - UPDATE_1(probLen); + UPDATE_1(probLen) probLen = prob + LenHigh; offset = kLenNumLowSymbols * 2; lim = (1 << kLenNumHighBits); } } - TREE_DECODE(probLen, lim, len); + TREE_DECODE(probLen, lim, len) len += offset; } #else @@ -427,32 +427,32 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit CLzmaProb *probLen = prob + LenChoice; IF_BIT_0(probLen) { - UPDATE_0(probLen); + UPDATE_0(probLen) probLen = prob + LenLow + GET_LEN_STATE; len = 1; - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) len -= 8; } else { - UPDATE_1(probLen); + UPDATE_1(probLen) probLen = prob + LenChoice2; IF_BIT_0(probLen) { - UPDATE_0(probLen); + UPDATE_0(probLen) probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); len = 1; - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) + TREE_GET_BIT(probLen, len) } else { - UPDATE_1(probLen); + UPDATE_1(probLen) probLen = prob + LenHigh; - TREE_DECODE(probLen, (1 << kLenNumHighBits), len); + TREE_DECODE(probLen, (1 << kLenNumHighBits), len) len += kLenNumLowSymbols * 2; } } @@ -464,7 +464,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit UInt32 distance; prob = probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_6_DECODE(prob, distance); + TREE_6_DECODE(prob, distance) if (distance >= kStartPosModelIndex) { unsigned posSlot = (unsigned)distance; @@ -479,7 +479,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit distance++; do { - REV_BIT_VAR(prob, distance, m); + REV_BIT_VAR(prob, distance, m) } while (--numDirectBits); distance -= m; @@ -514,10 +514,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit distance <<= kNumAlignBits; { unsigned i = 1; - REV_BIT_CONST(prob, i, 1); - REV_BIT_CONST(prob, i, 2); - REV_BIT_CONST(prob, i, 4); - REV_BIT_LAST (prob, i, 8); + REV_BIT_CONST(prob, i, 1) + REV_BIT_CONST(prob, i, 2) + REV_BIT_CONST(prob, i, 4) + REV_BIT_LAST (prob, i, 8) distance |= i; } if (distance == (UInt32)0xFFFFFFFF) @@ -592,7 +592,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit } while (dicPos < limit && buf < bufLimit); - NORMALIZE; + NORMALIZE p->buf = buf; p->range = range; @@ -613,7 +613,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit -static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) +static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) { unsigned len = (unsigned)p->remainLen; if (len == 0 /* || len >= kMatchSpecLenStart */) @@ -683,7 +683,7 @@ and we support the following state of (p->checkDicSize): (p->checkDicSize == p->prop.dicSize) */ -static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { if (p->checkDicSize == 0) { @@ -767,54 +767,54 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt else { unsigned len; - UPDATE_1_CHECK; + UPDATE_1_CHECK prob = probs + IsRep + state; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK state = 0; prob = probs + LenCoder; res = DUMMY_MATCH; } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK res = DUMMY_REP; prob = probs + IsRepG0 + state; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK prob = probs + IsRep0Long + COMBINED_PS_STATE; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK break; } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK } } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK prob = probs + IsRepG1 + state; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK prob = probs + IsRepG2 + state; IF_BIT_0_CHECK(prob) { - UPDATE_0_CHECK; + UPDATE_0_CHECK } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK } } } @@ -826,31 +826,31 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt const CLzmaProb *probLen = prob + LenChoice; IF_BIT_0_CHECK(probLen) { - UPDATE_0_CHECK; + UPDATE_0_CHECK probLen = prob + LenLow + GET_LEN_STATE; offset = 0; limit = 1 << kLenNumLowBits; } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK probLen = prob + LenChoice2; IF_BIT_0_CHECK(probLen) { - UPDATE_0_CHECK; + UPDATE_0_CHECK probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); offset = kLenNumLowSymbols; limit = 1 << kLenNumLowBits; } else { - UPDATE_1_CHECK; + UPDATE_1_CHECK probLen = prob + LenHigh; offset = kLenNumLowSymbols * 2; limit = 1 << kLenNumHighBits; } } - TREE_DECODE_CHECK(probLen, limit, len); + TREE_DECODE_CHECK(probLen, limit, len) len += offset; } @@ -860,7 +860,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt prob = probs + PosSlot + ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot) if (posSlot >= kStartPosModelIndex) { unsigned numDirectBits = ((posSlot >> 1) - 1); @@ -888,7 +888,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt unsigned m = 1; do { - REV_BIT_CHECK(prob, i, m); + REV_BIT_CHECK(prob, i, m) } while (--numDirectBits); } @@ -897,7 +897,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt } break; } - NORMALIZE_CHECK; + NORMALIZE_CHECK *bufOut = buf; return res; @@ -943,7 +943,7 @@ When the decoder lookahead, and the lookahead symbol is not end_marker, we have */ -#define RETURN__NOT_FINISHED__FOR_FINISH \ +#define RETURN_NOT_FINISHED_FOR_FINISH \ *status = LZMA_STATUS_NOT_FINISHED; \ return SZ_ERROR_DATA; // for strict mode // return SZ_OK; // for relaxed mode @@ -1029,7 +1029,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr } if (p->remainLen != 0) { - RETURN__NOT_FINISHED__FOR_FINISH; + RETURN_NOT_FINISHED_FOR_FINISH } checkEndMarkNow = 1; } @@ -1072,7 +1072,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr for (i = 0; i < (unsigned)dummyProcessed; i++) p->tempBuf[i] = src[i]; // p->remainLen = kMatchSpecLen_Error_Data; - RETURN__NOT_FINISHED__FOR_FINISH; + RETURN_NOT_FINISHED_FOR_FINISH } bufLimit = src; @@ -1150,7 +1150,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; p->tempBufSize = (unsigned)dummyProcessed; // p->remainLen = kMatchSpecLen_Error_Data; - RETURN__NOT_FINISHED__FOR_FINISH; + RETURN_NOT_FINISHED_FOR_FINISH } } @@ -1299,8 +1299,8 @@ static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAl SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) { CLzmaProps propNew; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + RINOK(LzmaProps_Decode(&propNew, props, propsSize)) + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)) p->prop = propNew; return SZ_OK; } @@ -1309,14 +1309,14 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll { CLzmaProps propNew; SizeT dicBufSize; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + RINOK(LzmaProps_Decode(&propNew, props, propsSize)) + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)) { UInt32 dictSize = propNew.dicSize; SizeT mask = ((UInt32)1 << 12) - 1; if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; - else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; + else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1; dicBufSize = ((SizeT)dictSize + mask) & ~mask; if (dicBufSize < dictSize) dicBufSize = dictSize; @@ -1348,8 +1348,8 @@ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, *status = LZMA_STATUS_NOT_SPECIFIED; if (inSize < RC_INIT_SIZE) return SZ_ERROR_INPUT_EOF; - LzmaDec_Construct(&p); - RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); + LzmaDec_CONSTRUCT(&p) + RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)) p.dic = dest; p.dicBufSize = outSize; LzmaDec_Init(&p); diff --git a/src/Common/lzma/LzmaDec.h b/src/Common/lzma/LzmaDec.h index 6f129625..b0ce28fa 100644 --- a/src/Common/lzma/LzmaDec.h +++ b/src/Common/lzma/LzmaDec.h @@ -1,19 +1,19 @@ /* LzmaDec.h -- LZMA Decoder -2020-03-19 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __LZMA_DEC_H -#define __LZMA_DEC_H +#ifndef ZIP7_INC_LZMA_DEC_H +#define ZIP7_INC_LZMA_DEC_H #include "7zTypes.h" EXTERN_C_BEGIN -/* #define _LZMA_PROB32 */ -/* _LZMA_PROB32 can increase the speed on some CPUs, +/* #define Z7_LZMA_PROB32 */ +/* Z7_LZMA_PROB32 can increase the speed on some CPUs, but memory usage for CLzmaDec::probs will be doubled in that case */ typedef -#ifdef _LZMA_PROB32 +#ifdef Z7_LZMA_PROB32 UInt32 #else UInt16 @@ -25,7 +25,7 @@ typedef #define LZMA_PROPS_SIZE 5 -typedef struct _CLzmaProps +typedef struct { Byte lc; Byte lp; @@ -73,7 +73,8 @@ typedef struct Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; } CLzmaDec; -#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } +#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; } +#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p) void LzmaDec_Init(CLzmaDec *p); diff --git a/src/Common/lzma/LzmaEnc.c b/src/Common/lzma/LzmaEnc.c index b04a7b7b..6d13cac8 100644 --- a/src/Common/lzma/LzmaEnc.c +++ b/src/Common/lzma/LzmaEnc.c @@ -1,5 +1,5 @@ /* LzmaEnc.c -- LZMA Encoder -2021-11-18: Igor Pavlov : Public domain */ +2023-04-13: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -16,22 +16,22 @@ #include "LzmaEnc.h" #include "LzFind.h" -#ifndef _7ZIP_ST +#ifndef Z7_ST #include "LzFindMt.h" #endif /* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */ -SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); -SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, +SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); -SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit, Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); -const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp); -void LzmaEnc_Finish(CLzmaEncHandle pp); -void LzmaEnc_SaveState(CLzmaEncHandle pp); -void LzmaEnc_RestoreState(CLzmaEncHandle pp); +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p); +void LzmaEnc_Finish(CLzmaEncHandle p); +void LzmaEnc_SaveState(CLzmaEncHandle p); +void LzmaEnc_RestoreState(CLzmaEncHandle p); #ifdef SHOW_STAT static unsigned g_STAT_OFFSET = 0; @@ -40,8 +40,8 @@ static unsigned g_STAT_OFFSET = 0; /* for good normalization speed we still reserve 256 MB before 4 GB range */ #define kLzmaMaxHistorySize ((UInt32)15 << 28) -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) +// #define kNumTopBits 24 +#define kTopValue ((UInt32)1 << 24) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) @@ -60,6 +60,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p) p->dictSize = p->mc = 0; p->reduceSize = (UInt64)(Int64)-1; p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; + p->numHashOutBits = 0; p->writeEndMark = 0; p->affinity = 0; } @@ -99,7 +100,7 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (p->numThreads < 0) p->numThreads = - #ifndef _7ZIP_ST + #ifndef Z7_ST ((p->btMode && p->algo) ? 2 : 1); #else 1; @@ -293,7 +294,7 @@ typedef struct #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) typedef -#ifdef _LZMA_PROB32 +#ifdef Z7_LZMA_PROB32 UInt32 #else UInt16 @@ -350,7 +351,7 @@ typedef struct Byte *buf; Byte *bufLim; Byte *bufBase; - ISeqOutStream *outStream; + ISeqOutStreamPtr outStream; UInt64 processed; SRes res; } CRangeEnc; @@ -383,7 +384,7 @@ typedef struct typedef UInt32 CProbPrice; -typedef struct +struct CLzmaEnc { void *matchFinderObj; IMatchFinder2 matchFinder; @@ -426,7 +427,7 @@ typedef struct UInt32 dictSize; SRes result; - #ifndef _7ZIP_ST + #ifndef Z7_ST BoolInt mtMode; // begin of CMatchFinderMt is used in LZ thread CMatchFinderMt matchFinderMt; @@ -439,7 +440,7 @@ typedef struct // we suppose that we have 8-bytes alignment after CMatchFinder - #ifndef _7ZIP_ST + #ifndef Z7_ST Byte pad[128]; #endif @@ -479,77 +480,59 @@ typedef struct CSaveState saveState; // BoolInt mf_Failure; - #ifndef _7ZIP_ST + #ifndef Z7_ST Byte pad2[128]; #endif -} CLzmaEnc; +}; #define MFB (p->matchFinderBase) /* -#ifndef _7ZIP_ST +#ifndef Z7_ST #define MFB (p->matchFinderMt.MatchFinder) #endif */ -#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); - -void LzmaEnc_SaveState(CLzmaEncHandle pp) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - CSaveState *dest = &p->saveState; - - dest->state = p->state; - - dest->lenProbs = p->lenProbs; - dest->repLenProbs = p->repLenProbs; - - COPY_ARR(dest, p, reps); - - COPY_ARR(dest, p, posAlignEncoder); - COPY_ARR(dest, p, isRep); - COPY_ARR(dest, p, isRepG0); - COPY_ARR(dest, p, isRepG1); - COPY_ARR(dest, p, isRepG2); - COPY_ARR(dest, p, isMatch); - COPY_ARR(dest, p, isRep0Long); - COPY_ARR(dest, p, posSlotEncoder); - COPY_ARR(dest, p, posEncoders); - - memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb)); +// #define GET_CLzmaEnc_p CLzmaEnc *p = (CLzmaEnc*)(void *)p; +// #define GET_const_CLzmaEnc_p const CLzmaEnc *p = (const CLzmaEnc*)(const void *)p; + +#define COPY_ARR(dest, src, arr) memcpy((dest)->arr, (src)->arr, sizeof((src)->arr)); + +#define COPY_LZMA_ENC_STATE(d, s, p) \ + (d)->state = (s)->state; \ + COPY_ARR(d, s, reps) \ + COPY_ARR(d, s, posAlignEncoder) \ + COPY_ARR(d, s, isRep) \ + COPY_ARR(d, s, isRepG0) \ + COPY_ARR(d, s, isRepG1) \ + COPY_ARR(d, s, isRepG2) \ + COPY_ARR(d, s, isMatch) \ + COPY_ARR(d, s, isRep0Long) \ + COPY_ARR(d, s, posSlotEncoder) \ + COPY_ARR(d, s, posEncoders) \ + (d)->lenProbs = (s)->lenProbs; \ + (d)->repLenProbs = (s)->repLenProbs; \ + memcpy((d)->litProbs, (s)->litProbs, ((UInt32)0x300 << (p)->lclp) * sizeof(CLzmaProb)); + +void LzmaEnc_SaveState(CLzmaEncHandle p) +{ + // GET_CLzmaEnc_p + CSaveState *v = &p->saveState; + COPY_LZMA_ENC_STATE(v, p, p) } - -void LzmaEnc_RestoreState(CLzmaEncHandle pp) +void LzmaEnc_RestoreState(CLzmaEncHandle p) { - CLzmaEnc *dest = (CLzmaEnc *)pp; - const CSaveState *p = &dest->saveState; - - dest->state = p->state; - - dest->lenProbs = p->lenProbs; - dest->repLenProbs = p->repLenProbs; - - COPY_ARR(dest, p, reps); - - COPY_ARR(dest, p, posAlignEncoder); - COPY_ARR(dest, p, isRep); - COPY_ARR(dest, p, isRepG0); - COPY_ARR(dest, p, isRepG1); - COPY_ARR(dest, p, isRepG2); - COPY_ARR(dest, p, isMatch); - COPY_ARR(dest, p, isRep0Long); - COPY_ARR(dest, p, posSlotEncoder); - COPY_ARR(dest, p, posEncoders); - - memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb)); + // GET_CLzmaEnc_p + const CSaveState *v = &p->saveState; + COPY_LZMA_ENC_STATE(p, v, p) } - -SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) +Z7_NO_INLINE +SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2) { - CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_CLzmaEnc_p CLzmaEncProps props = *props2; LzmaEncProps_Normalize(&props); @@ -585,6 +568,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) p->fastMode = (props.algo == 0); // p->_maxMode = True; MFB.btMode = (Byte)(props.btMode ? 1 : 0); + // MFB.btMode = (Byte)(props.btMode); { unsigned numHashBytes = 4; if (props.btMode) @@ -595,13 +579,15 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) if (props.numHashBytes >= 5) numHashBytes = 5; MFB.numHashBytes = numHashBytes; + // MFB.numHashBytes_Min = 2; + MFB.numHashOutBits = (Byte)props.numHashOutBits; } MFB.cutValue = props.mc; p->writeEndMark = (BoolInt)props.writeEndMark; - #ifndef _7ZIP_ST + #ifndef Z7_ST /* if (newMultiThread != _multiThread) { @@ -618,9 +604,9 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) } -void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) +void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize) { - CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_CLzmaEnc_p MFB.expectedDataSize = expectedDataSiize; } @@ -684,7 +670,7 @@ static void RangeEnc_Init(CRangeEnc *p) p->res = SZ_OK; } -MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) +Z7_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) { const size_t num = (size_t)(p->buf - p->bufBase); if (p->res == SZ_OK) @@ -696,7 +682,7 @@ MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) p->buf = p->bufBase; } -MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) +Z7_NO_INLINE static void Z7_FASTCALL RangeEnc_ShiftLow(CRangeEnc *p) { UInt32 low = (UInt32)p->low; unsigned high = (unsigned)(p->low >> 32); @@ -741,9 +727,9 @@ static void RangeEnc_FlushData(CRangeEnc *p) ttt = *(prob); \ newBound = (range >> kNumBitModelTotalBits) * ttt; -// #define _LZMA_ENC_USE_BRANCH +// #define Z7_LZMA_ENC_USE_BRANCH -#ifdef _LZMA_ENC_USE_BRANCH +#ifdef Z7_LZMA_ENC_USE_BRANCH #define RC_BIT(p, prob, bit) { \ RC_BIT_PRE(p, prob) \ @@ -811,7 +797,7 @@ static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym) CLzmaProb *prob = probs + (sym >> 8); UInt32 bit = (sym >> 7) & 1; sym <<= 1; - RC_BIT(p, prob, bit); + RC_BIT(p, prob, bit) } while (sym < 0x10000); p->range = range; @@ -833,7 +819,7 @@ static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UIn bit = (sym >> 7) & 1; sym <<= 1; offs &= ~(matchByte ^ sym); - RC_BIT(p, prob, bit); + RC_BIT(p, prob, bit) } while (sym < 0x10000); p->range = range; @@ -867,10 +853,10 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) #define GET_PRICE(prob, bit) \ - p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits] #define GET_PRICEa(prob, bit) \ - ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits] #define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] @@ -921,7 +907,7 @@ static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBi unsigned bit = sym & 1; // RangeEnc_EncodeBit(rc, probs + m, bit); sym >>= 1; - RC_BIT(rc, probs + m, bit); + RC_BIT(rc, probs + m, bit) m = (m << 1) | bit; } while (--numBits); @@ -944,15 +930,15 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS UInt32 range, ttt, newBound; CLzmaProb *probs = p->low; range = rc->range; - RC_BIT_PRE(rc, probs); + RC_BIT_PRE(rc, probs) if (sym >= kLenNumLowSymbols) { - RC_BIT_1(rc, probs); + RC_BIT_1(rc, probs) probs += kLenNumLowSymbols; - RC_BIT_PRE(rc, probs); + RC_BIT_PRE(rc, probs) if (sym >= kLenNumLowSymbols * 2) { - RC_BIT_1(rc, probs); + RC_BIT_1(rc, probs) rc->range = range; // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2); LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2); @@ -965,11 +951,11 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS { unsigned m; unsigned bit; - RC_BIT_0(rc, probs); + RC_BIT_0(rc, probs) probs += (posState << (1 + kLenNumLowBits)); - bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit; - bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit; - bit = sym & 1; RC_BIT(rc, probs + m, bit); + bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit) m = (1 << 1) + bit; + bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit) m = (m << 1) + bit; + bit = sym & 1; RC_BIT(rc, probs + m, bit) rc->range = range; } } @@ -990,7 +976,7 @@ static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *price } -MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( +Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables( CLenPriceEnc *p, unsigned numPosStates, const CLenEnc *enc, @@ -1152,7 +1138,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) + GET_PRICE_1(p->isRep[state]) \ + GET_PRICE_0(p->isRepG0[state]) -MY_FORCE_INLINE +Z7_FORCE_INLINE static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) { UInt32 price; @@ -1331,7 +1317,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) LitEnc_GetPrice(probs, curByte, p->ProbPrices)); } - MakeAs_Lit(&p->opt[1]); + MakeAs_Lit(&p->opt[1]) matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); @@ -1343,7 +1329,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) if (shortRepPrice < p->opt[1].price) { p->opt[1].price = shortRepPrice; - MakeAs_ShortRep(&p->opt[1]); + MakeAs_ShortRep(&p->opt[1]) } if (last < 2) { @@ -1410,7 +1396,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) else { unsigned slot; - GetPosSlot2(dist, slot); + GetPosSlot2(dist, slot) price += p->alignPrices[dist & kAlignMask]; price += p->posSlotPrices[lenToPosState][slot]; } @@ -1486,7 +1472,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) unsigned delta = best - cur; if (delta != 0) { - MOVE_POS(p, delta); + MOVE_POS(p, delta) } } cur = best; @@ -1633,7 +1619,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) { nextOpt->price = litPrice; nextOpt->len = 1; - MakeAs_Lit(nextOpt); + MakeAs_Lit(nextOpt) nextIsLit = True; } } @@ -1667,7 +1653,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) { nextOpt->price = shortRepPrice; nextOpt->len = 1; - MakeAs_ShortRep(nextOpt); + MakeAs_ShortRep(nextOpt) nextIsLit = False; } } @@ -1871,7 +1857,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) dist = MATCHES[(size_t)offs + 1]; // if (dist >= kNumFullDistances) - GetPosSlot2(dist, posSlot); + GetPosSlot2(dist, posSlot) for (len = /*2*/ startLen; ; len++) { @@ -1962,7 +1948,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) break; dist = MATCHES[(size_t)offs + 1]; // if (dist >= kNumFullDistances) - GetPosSlot2(dist, posSlot); + GetPosSlot2(dist, posSlot) } } } @@ -2138,7 +2124,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState) { UInt32 ttt, newBound; RC_BIT_PRE(p, probs + m) - RC_BIT_1(&p->rc, probs + m); + RC_BIT_1(&p->rc, probs + m) m = (m << 1) + 1; } while (m < (1 << kNumPosSlotBits)); @@ -2163,7 +2149,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState) { UInt32 ttt, newBound; RC_BIT_PRE(p, probs + m) - RC_BIT_1(&p->rc, probs + m); + RC_BIT_1(&p->rc, probs + m) m = (m << 1) + 1; } while (m < kAlignTableSize); @@ -2179,7 +2165,7 @@ static SRes CheckErrors(CLzmaEnc *p) if (p->rc.res != SZ_OK) p->result = SZ_ERROR_WRITE; - #ifndef _7ZIP_ST + #ifndef Z7_ST if ( // p->mf_Failure || (p->mtMode && @@ -2187,7 +2173,7 @@ static SRes CheckErrors(CLzmaEnc *p) p->matchFinderMt.failure_LZ_BT)) ) { - p->result = MY_HRES_ERROR__INTERNAL_ERROR; + p->result = MY_HRES_ERROR_INTERNAL_ERROR; // printf("\nCheckErrors p->matchFinderMt.failureLZ\n"); } #endif @@ -2201,7 +2187,7 @@ static SRes CheckErrors(CLzmaEnc *p) } -MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) +Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) { /* ReleaseMFStream(); */ p->finished = True; @@ -2213,7 +2199,7 @@ MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) } -MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) +Z7_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) { unsigned i; const CProbPrice *ProbPrices = p->ProbPrices; @@ -2237,7 +2223,7 @@ MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) } -MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) +Z7_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) { // int y; for (y = 0; y < 100; y++) { @@ -2337,7 +2323,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p) RangeEnc_Construct(&p->rc); MatchFinder_Construct(&MFB); - #ifndef _7ZIP_ST + #ifndef Z7_ST p->matchFinderMt.MatchFinder = &MFB; MatchFinderMt_Construct(&p->matchFinderMt); #endif @@ -2345,7 +2331,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p) { CLzmaEncProps props; LzmaEncProps_Init(&props); - LzmaEnc_SetProps(p, &props); + LzmaEnc_SetProps((CLzmaEncHandle)(void *)p, &props); } #ifndef LZMA_LOG_BSR @@ -2376,7 +2362,7 @@ static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - #ifndef _7ZIP_ST + #ifndef Z7_ST MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); #endif @@ -2387,21 +2373,22 @@ static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBi void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); + // GET_CLzmaEnc_p + LzmaEnc_Destruct(p, alloc, allocBig); ISzAlloc_Free(alloc, p); } -MY_NO_INLINE +Z7_NO_INLINE static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) { UInt32 nowPos32, startPos32; if (p->needInit) { - #ifndef _7ZIP_ST + #ifndef Z7_ST if (p->mtMode) { - RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)); + RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)) } #endif p->matchFinder.Init(p->matchFinderObj); @@ -2410,7 +2397,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa if (p->finished) return p->result; - RINOK(CheckErrors(p)); + RINOK(CheckErrors(p)) nowPos32 = (UInt32)p->nowPos64; startPos32 = nowPos32; @@ -2473,7 +2460,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa const Byte *data; unsigned state; - RC_BIT_0(&p->rc, probs); + RC_BIT_0(&p->rc, probs) p->rc.range = range; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; probs = LIT_PROBS(nowPos32, *(data - 1)); @@ -2487,53 +2474,53 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa } else { - RC_BIT_1(&p->rc, probs); + RC_BIT_1(&p->rc, probs) probs = &p->isRep[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist < LZMA_NUM_REPS) { - RC_BIT_1(&p->rc, probs); + RC_BIT_1(&p->rc, probs) probs = &p->isRepG0[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist == 0) { - RC_BIT_0(&p->rc, probs); + RC_BIT_0(&p->rc, probs) probs = &p->isRep0Long[p->state][posState]; RC_BIT_PRE(&p->rc, probs) if (len != 1) { - RC_BIT_1_BASE(&p->rc, probs); + RC_BIT_1_BASE(&p->rc, probs) } else { - RC_BIT_0_BASE(&p->rc, probs); + RC_BIT_0_BASE(&p->rc, probs) p->state = kShortRepNextStates[p->state]; } } else { - RC_BIT_1(&p->rc, probs); + RC_BIT_1(&p->rc, probs) probs = &p->isRepG1[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist == 1) { - RC_BIT_0_BASE(&p->rc, probs); + RC_BIT_0_BASE(&p->rc, probs) dist = p->reps[1]; } else { - RC_BIT_1(&p->rc, probs); + RC_BIT_1(&p->rc, probs) probs = &p->isRepG2[p->state]; RC_BIT_PRE(&p->rc, probs) if (dist == 2) { - RC_BIT_0_BASE(&p->rc, probs); + RC_BIT_0_BASE(&p->rc, probs) dist = p->reps[2]; } else { - RC_BIT_1_BASE(&p->rc, probs); + RC_BIT_1_BASE(&p->rc, probs) dist = p->reps[3]; p->reps[3] = p->reps[2]; } @@ -2557,7 +2544,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa else { unsigned posSlot; - RC_BIT_0(&p->rc, probs); + RC_BIT_0(&p->rc, probs) p->rc.range = range; p->state = kMatchNextStates[p->state]; @@ -2571,7 +2558,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa p->reps[0] = dist + 1; p->matchPriceCount++; - GetPosSlot(dist, posSlot); + GetPosSlot(dist, posSlot) // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot); { UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits); @@ -2582,7 +2569,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa CLzmaProb *prob = probs + (sym >> kNumPosSlotBits); UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1; sym <<= 1; - RC_BIT(&p->rc, prob, bit); + RC_BIT(&p->rc, prob, bit) } while (sym < (1 << kNumPosSlotBits * 2)); p->rc.range = range; @@ -2626,10 +2613,10 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa { unsigned m = 1; unsigned bit; - bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; - bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; - bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; - bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit; + bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) p->rc.range = range; // p->alignPriceCount++; } @@ -2704,7 +2691,7 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, if (!RangeEnc_Alloc(&p->rc, alloc)) return SZ_ERROR_MEM; - #ifndef _7ZIP_ST + #ifndef Z7_ST p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0)); #endif @@ -2748,15 +2735,14 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, (numFastBytes + LZMA_MATCH_LEN_MAX + 1) */ - #ifndef _7ZIP_ST + #ifndef Z7_ST if (p->mtMode) { RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */ - , allocBig)); + , allocBig)) p->matchFinderObj = &p->matchFinderMt; - MFB.bigHash = (Byte)( - (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0); + MFB.bigHash = (Byte)(MFB.hashMask >= 0xFFFFFF ? 1 : 0); MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); } else @@ -2872,59 +2858,53 @@ static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr p->finished = False; p->result = SZ_OK; - RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); + p->nowPos64 = 0; + p->needInit = 1; + RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)) LzmaEnc_Init(p); LzmaEnc_InitPrices(p); - p->nowPos64 = 0; return SZ_OK; } -static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, +static SRes LzmaEnc_Prepare(CLzmaEncHandle p, + ISeqOutStreamPtr outStream, + ISeqInStreamPtr inStream, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - CLzmaEnc *p = (CLzmaEnc *)pp; - MFB.stream = inStream; - p->needInit = 1; + // GET_CLzmaEnc_p + MatchFinder_SET_STREAM(&MFB, inStream) p->rc.outStream = outStream; return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); } -SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, - ISeqInStream *inStream, UInt32 keepWindowSize, +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, + ISeqInStreamPtr inStream, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - CLzmaEnc *p = (CLzmaEnc *)pp; - MFB.stream = inStream; - p->needInit = 1; + // GET_CLzmaEnc_p + MatchFinder_SET_STREAM(&MFB, inStream) return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } -static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) -{ - MFB.directInput = 1; - MFB.bufferBase = (Byte *)src; - MFB.directInputRem = srcLen; -} - -SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, - UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) +SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, + const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig) { - CLzmaEnc *p = (CLzmaEnc *)pp; - LzmaEnc_SetInputBuf(p, src, srcLen); - p->needInit = 1; - - LzmaEnc_SetDataSize(pp, srcLen); + // GET_CLzmaEnc_p + MatchFinder_SET_DIRECT_INPUT_BUF(&MFB, src, srcLen) + LzmaEnc_SetDataSize(p, srcLen); return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } -void LzmaEnc_Finish(CLzmaEncHandle pp) +void LzmaEnc_Finish(CLzmaEncHandle p) { - #ifndef _7ZIP_ST - CLzmaEnc *p = (CLzmaEnc *)pp; + #ifndef Z7_ST + // GET_CLzmaEnc_p if (p->mtMode) MatchFinderMt_ReleaseStream(&p->matchFinderMt); #else - UNUSED_VAR(pp); + UNUSED_VAR(p) #endif } @@ -2933,13 +2913,13 @@ typedef struct { ISeqOutStream vt; Byte *data; - SizeT rem; + size_t rem; BoolInt overflow; } CLzmaEnc_SeqOutStreamBuf; -static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) +static size_t SeqOutStreamBuf_Write(ISeqOutStreamPtr pp, const void *data, size_t size) { - CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); + Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLzmaEnc_SeqOutStreamBuf) if (p->rem < size) { size = p->rem; @@ -2956,24 +2936,25 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s /* -UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) +UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p) { - const CLzmaEnc *p = (CLzmaEnc *)pp; + GET_const_CLzmaEnc_p return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); } */ -const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p) { - const CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_const_CLzmaEnc_p return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; } -SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, +// (desiredPackSize == 0) is not allowed +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit, Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) { - CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_CLzmaEnc_p UInt64 nowPos64; SRes res; CLzmaEnc_SeqOutStreamBuf outStream; @@ -2990,14 +2971,10 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, if (reInit) LzmaEnc_Init(p); LzmaEnc_InitPrices(p); - - nowPos64 = p->nowPos64; RangeEnc_Init(&p->rc); p->rc.outStream = &outStream.vt; - - if (desiredPackSize == 0) - return SZ_ERROR_OUTPUT_EOF; - + nowPos64 = p->nowPos64; + res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize); *unpackSize = (UInt32)(p->nowPos64 - nowPos64); @@ -3009,12 +2986,12 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, } -MY_NO_INLINE -static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) +Z7_NO_INLINE +static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress) { SRes res = SZ_OK; - #ifndef _7ZIP_ST + #ifndef Z7_ST Byte allocaDummy[0x300]; allocaDummy[0] = 0; allocaDummy[1] = allocaDummy[0]; @@ -3036,7 +3013,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) } } - LzmaEnc_Finish(p); + LzmaEnc_Finish((CLzmaEncHandle)(void *)p); /* if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB)) @@ -3048,21 +3025,22 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) } -SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); - return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); + // GET_CLzmaEnc_p + RINOK(LzmaEnc_Prepare(p, outStream, inStream, alloc, allocBig)) + return LzmaEnc_Encode2(p, progress); } -SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) +SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *props, SizeT *size) { if (*size < LZMA_PROPS_SIZE) return SZ_ERROR_PARAM; *size = LZMA_PROPS_SIZE; { - const CLzmaEnc *p = (const CLzmaEnc *)pp; + // GET_CLzmaEnc_p const UInt32 dictSize = p->dictSize; UInt32 v; props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); @@ -3086,23 +3064,24 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) while (v < dictSize); } - SetUi32(props + 1, v); + SetUi32(props + 1, v) return SZ_OK; } } -unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p) { - return (unsigned)((CLzmaEnc *)pp)->writeEndMark; + // GET_CLzmaEnc_p + return (unsigned)p->writeEndMark; } -SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) +SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { SRes res; - CLzmaEnc *p = (CLzmaEnc *)pp; + // GET_CLzmaEnc_p CLzmaEnc_SeqOutStreamBuf outStream; @@ -3114,7 +3093,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte p->writeEndMark = writeEndMark; p->rc.outStream = &outStream.vt; - res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); + res = LzmaEnc_MemPrepare(p, src, srcLen, 0, alloc, allocBig); if (res == SZ_OK) { @@ -3123,7 +3102,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte res = SZ_ERROR_FAIL; } - *destLen -= outStream.rem; + *destLen -= (SizeT)outStream.rem; if (outStream.overflow) return SZ_ERROR_OUTPUT_EOF; return res; @@ -3132,9 +3111,9 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) + ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); + CLzmaEncHandle p = LzmaEnc_Create(alloc); SRes res; if (!p) return SZ_ERROR_MEM; @@ -3154,10 +3133,10 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, /* -#ifndef _7ZIP_ST -void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2]) +#ifndef Z7_ST +void LzmaEnc_GetLzThreads(CLzmaEncHandle p, HANDLE lz_threads[2]) { - const CLzmaEnc *p = (CLzmaEnc *)pp; + GET_const_CLzmaEnc_p lz_threads[0] = p->matchFinderMt.hashSync.thread; lz_threads[1] = p->matchFinderMt.btSync.thread; } diff --git a/src/Common/lzma/LzmaEnc.h b/src/Common/lzma/LzmaEnc.h index bc2ed504..9f8039a1 100644 --- a/src/Common/lzma/LzmaEnc.h +++ b/src/Common/lzma/LzmaEnc.h @@ -1,8 +1,8 @@ /* LzmaEnc.h -- LZMA Encoder -2019-10-30 : Igor Pavlov : Public domain */ +2023-04-13 : Igor Pavlov : Public domain */ -#ifndef __LZMA_ENC_H -#define __LZMA_ENC_H +#ifndef ZIP7_INC_LZMA_ENC_H +#define ZIP7_INC_LZMA_ENC_H #include "7zTypes.h" @@ -10,7 +10,7 @@ EXTERN_C_BEGIN #define LZMA_PROPS_SIZE 5 -typedef struct _CLzmaEncProps +typedef struct { int level; /* 0 <= level <= 9 */ UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version @@ -23,10 +23,13 @@ typedef struct _CLzmaEncProps int fb; /* 5 <= fb <= 273, default = 32 */ int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ int numHashBytes; /* 2, 3 or 4, default = 4 */ + unsigned numHashOutBits; /* default = ? */ UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ int numThreads; /* 1 or 2, default = 2 */ + // int _pad; + UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. Encoder uses this value to reduce dictionary size */ @@ -51,7 +54,9 @@ SRes: SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) */ -typedef void * CLzmaEncHandle; +typedef struct CLzmaEnc CLzmaEnc; +typedef CLzmaEnc * CLzmaEncHandle; +// Z7_DECLARE_HANDLE(CLzmaEncHandle) CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); @@ -61,17 +66,17 @@ void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize); SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); -SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, + ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); /* ---------- One Call Interface ---------- */ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); EXTERN_C_END diff --git a/src/Common/lzma/LzmaLib.c b/src/Common/lzma/LzmaLib.c index 706e9e58..785e8848 100644 --- a/src/Common/lzma/LzmaLib.c +++ b/src/Common/lzma/LzmaLib.c @@ -1,12 +1,14 @@ /* LzmaLib.c -- LZMA library wrapper -2015-06-13 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ + +#include "Precomp.h" #include "Alloc.h" #include "LzmaDec.h" #include "LzmaEnc.h" #include "LzmaLib.h" -MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, +Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, unsigned char *outProps, size_t *outPropsSize, int level, /* 0 <= level <= 9, default = 5 */ unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ @@ -32,7 +34,7 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char } -MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, +Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, const unsigned char *props, size_t propsSize) { ELzmaStatus status; diff --git a/src/Common/lzma/LzmaLib.h b/src/Common/lzma/LzmaLib.h index c343a859..d7c0724d 100644 --- a/src/Common/lzma/LzmaLib.h +++ b/src/Common/lzma/LzmaLib.h @@ -1,14 +1,14 @@ /* LzmaLib.h -- LZMA library interface -2021-04-03 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __LZMA_LIB_H -#define __LZMA_LIB_H +#ifndef ZIP7_INC_LZMA_LIB_H +#define ZIP7_INC_LZMA_LIB_H #include "7zTypes.h" EXTERN_C_BEGIN -#define MY_STDAPI int MY_STD_CALL +#define Z7_STDAPI int Z7_STDCALL #define LZMA_PROPS_SIZE 5 @@ -100,7 +100,7 @@ Returns: SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) */ -MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, +Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */ int level, /* 0 <= level <= 9, default = 5 */ unsigned dictSize, /* default = (1 << 24) */ @@ -130,7 +130,7 @@ Returns: SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) */ -MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, +Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, const unsigned char *props, size_t propsSize); EXTERN_C_END diff --git a/src/Common/lzma/Precomp.h b/src/Common/lzma/Precomp.h index e8ff8b40..69afb2ff 100644 --- a/src/Common/lzma/Precomp.h +++ b/src/Common/lzma/Precomp.h @@ -1,8 +1,8 @@ /* Precomp.h -- StdAfx -2013-11-12 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __7Z_PRECOMP_H -#define __7Z_PRECOMP_H +#ifndef ZIP7_INC_PRECOMP_H +#define ZIP7_INC_PRECOMP_H #include "Compiler.h" /* #include "7zTypes.h" */ diff --git a/src/Common/lzma/Threads.c b/src/Common/lzma/Threads.c index 58eb90ff..cf52bd30 100644 --- a/src/Common/lzma/Threads.c +++ b/src/Common/lzma/Threads.c @@ -1,5 +1,5 @@ /* Threads.c -- multithreading library -2021-12-21 : Igor Pavlov : Public domain */ +2023-03-04 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -11,9 +11,9 @@ #include "Threads.h" -static WRes GetError() +static WRes GetError(void) { - DWORD res = GetLastError(); + const DWORD res = GetLastError(); return res ? (WRes)res : 1; } @@ -173,6 +173,9 @@ WRes CriticalSection_Init(CCriticalSection *p) Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception Windows Vista+ : no exceptions */ #ifdef _MSC_VER + #ifdef __clang__ + #pragma GCC diagnostic ignored "-Wlanguage-extension-token" + #endif __try #endif { @@ -193,18 +196,26 @@ WRes CriticalSection_Init(CCriticalSection *p) // ---------- POSIX ---------- #ifndef __APPLE__ -#ifndef _7ZIP_AFFINITY_DISABLE +#ifndef Z7_AFFINITY_DISABLE // _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET +// clang < 3.6 : unknown warning group '-Wreserved-id-macro' +// clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier" +// clang >= 13 : do not give warning +#if !defined(_GNU_SOURCE) + #if defined(__clang__) && (__clang_major__ >= 4) && (__clang_major__ <= 12) + #pragma GCC diagnostic ignored "-Wreserved-id-macro" + #endif #define _GNU_SOURCE -#endif -#endif +#endif // !defined(_GNU_SOURCE) +#endif // Z7_AFFINITY_DISABLE +#endif // __APPLE__ #include "Threads.h" #include <errno.h> #include <stdlib.h> #include <string.h> -#ifdef _7ZIP_AFFINITY_SUPPORTED +#ifdef Z7_AFFINITY_SUPPORTED // #include <sched.h> #endif @@ -212,15 +223,12 @@ WRes CriticalSection_Init(CCriticalSection *p) // #include <stdio.h> // #define PRF(p) p #define PRF(p) - -#define Print(s) PRF(printf("\n%s\n", s)) - -// #include <stdio.h> +#define Print(s) PRF(printf("\n%s\n", s);) WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet) { // new thread in Posix probably inherits affinity from parrent thread - Print("Thread_Create_With_CpuSet"); + Print("Thread_Create_With_CpuSet") pthread_attr_t attr; int ret; @@ -228,7 +236,7 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, p->_created = 0; - RINOK(pthread_attr_init(&attr)); + RINOK(pthread_attr_init(&attr)) ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); @@ -236,7 +244,7 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, { if (cpuSet) { - #ifdef _7ZIP_AFFINITY_SUPPORTED + #ifdef Z7_AFFINITY_SUPPORTED /* printf("\n affinity :"); @@ -292,7 +300,7 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) { - Print("Thread_Create_WithAffinity"); + Print("Thread_Create_WithAffinity") CCpuSet cs; unsigned i; CpuSet_Zero(&cs); @@ -312,7 +320,7 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param WRes Thread_Close(CThread *p) { - // Print("Thread_Close"); + // Print("Thread_Close") int ret; if (!p->_created) return 0; @@ -326,7 +334,7 @@ WRes Thread_Close(CThread *p) WRes Thread_Wait_Close(CThread *p) { - // Print("Thread_Wait_Close"); + // Print("Thread_Wait_Close") void *thread_return; int ret; if (!p->_created) @@ -343,8 +351,8 @@ WRes Thread_Wait_Close(CThread *p) static WRes Event_Create(CEvent *p, int manualReset, int signaled) { - RINOK(pthread_mutex_init(&p->_mutex, NULL)); - RINOK(pthread_cond_init(&p->_cond, NULL)); + RINOK(pthread_mutex_init(&p->_mutex, NULL)) + RINOK(pthread_cond_init(&p->_cond, NULL)) p->_manual_reset = manualReset; p->_state = (signaled ? True : False); p->_created = 1; @@ -363,7 +371,7 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) WRes Event_Set(CEvent *p) { - RINOK(pthread_mutex_lock(&p->_mutex)); + RINOK(pthread_mutex_lock(&p->_mutex)) p->_state = True; int res1 = pthread_cond_broadcast(&p->_cond); int res2 = pthread_mutex_unlock(&p->_mutex); @@ -372,14 +380,14 @@ WRes Event_Set(CEvent *p) WRes Event_Reset(CEvent *p) { - RINOK(pthread_mutex_lock(&p->_mutex)); + RINOK(pthread_mutex_lock(&p->_mutex)) p->_state = False; return pthread_mutex_unlock(&p->_mutex); } WRes Event_Wait(CEvent *p) { - RINOK(pthread_mutex_lock(&p->_mutex)); + RINOK(pthread_mutex_lock(&p->_mutex)) while (p->_state == False) { // ETIMEDOUT @@ -411,8 +419,8 @@ WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) { if (initCount > maxCount || maxCount < 1) return EINVAL; - RINOK(pthread_mutex_init(&p->_mutex, NULL)); - RINOK(pthread_cond_init(&p->_cond, NULL)); + RINOK(pthread_mutex_init(&p->_mutex, NULL)) + RINOK(pthread_cond_init(&p->_cond, NULL)) p->_count = initCount; p->_maxCount = maxCount; p->_created = 1; @@ -448,7 +456,7 @@ WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) if (releaseCount < 1) return EINVAL; - RINOK(pthread_mutex_lock(&p->_mutex)); + RINOK(pthread_mutex_lock(&p->_mutex)) newCount = p->_count + releaseCount; if (newCount > p->_maxCount) @@ -458,13 +466,13 @@ WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) p->_count = newCount; ret = pthread_cond_broadcast(&p->_cond); } - RINOK(pthread_mutex_unlock(&p->_mutex)); + RINOK(pthread_mutex_unlock(&p->_mutex)) return ret; } WRes Semaphore_Wait(CSemaphore *p) { - RINOK(pthread_mutex_lock(&p->_mutex)); + RINOK(pthread_mutex_lock(&p->_mutex)) while (p->_count < 1) { pthread_cond_wait(&p->_cond, &p->_mutex); @@ -489,7 +497,7 @@ WRes Semaphore_Close(CSemaphore *p) WRes CriticalSection_Init(CCriticalSection *p) { - // Print("CriticalSection_Init"); + // Print("CriticalSection_Init") if (!p) return EINTR; return pthread_mutex_init(&p->_mutex, NULL); @@ -497,7 +505,7 @@ WRes CriticalSection_Init(CCriticalSection *p) void CriticalSection_Enter(CCriticalSection *p) { - // Print("CriticalSection_Enter"); + // Print("CriticalSection_Enter") if (p) { // int ret = @@ -507,7 +515,7 @@ void CriticalSection_Enter(CCriticalSection *p) void CriticalSection_Leave(CCriticalSection *p) { - // Print("CriticalSection_Leave"); + // Print("CriticalSection_Leave") if (p) { // int ret = @@ -517,7 +525,7 @@ void CriticalSection_Leave(CCriticalSection *p) void CriticalSection_Delete(CCriticalSection *p) { - // Print("CriticalSection_Delete"); + // Print("CriticalSection_Delete") if (p) { // int ret = @@ -527,14 +535,28 @@ void CriticalSection_Delete(CCriticalSection *p) LONG InterlockedIncrement(LONG volatile *addend) { - // Print("InterlockedIncrement"); + // Print("InterlockedIncrement") #ifdef USE_HACK_UNSAFE_ATOMIC LONG val = *addend + 1; *addend = val; return val; #else + + #if defined(__clang__) && (__clang_major__ >= 8) + #pragma GCC diagnostic ignored "-Watomic-implicit-seq-cst" + #endif return __sync_add_and_fetch(addend, 1); #endif } #endif // _WIN32 + +WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p) +{ + if (Event_IsCreated(p)) + return Event_Reset(p); + return AutoResetEvent_CreateNotSignaled(p); +} + +#undef PRF +#undef Print diff --git a/src/Common/lzma/Threads.h b/src/Common/lzma/Threads.h index 89ecb92b..4028464a 100644 --- a/src/Common/lzma/Threads.h +++ b/src/Common/lzma/Threads.h @@ -1,18 +1,19 @@ /* Threads.h -- multithreading library -2021-12-21 : Igor Pavlov : Public domain */ +2023-04-02 : Igor Pavlov : Public domain */ -#ifndef __7Z_THREADS_H -#define __7Z_THREADS_H +#ifndef ZIP7_INC_THREADS_H +#define ZIP7_INC_THREADS_H #ifdef _WIN32 -#include <Windows.h> +#include "7zWindows.h" + #else #if defined(__linux__) #if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) -#ifndef _7ZIP_AFFINITY_DISABLE -#define _7ZIP_AFFINITY_SUPPORTED -// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED") +#ifndef Z7_AFFINITY_DISABLE +#define Z7_AFFINITY_SUPPORTED +// #pragma message(" ==== Z7_AFFINITY_SUPPORTED") // #define _GNU_SOURCE #endif #endif @@ -33,7 +34,7 @@ WRes Handle_WaitObject(HANDLE h); typedef HANDLE CThread; -#define Thread_Construct(p) { *(p) = NULL; } +#define Thread_CONSTRUCT(p) { *(p) = NULL; } #define Thread_WasCreated(p) (*(p) != NULL) #define Thread_Close(p) HandlePtr_Close(p) // #define Thread_Wait(p) Handle_WaitObject(*(p)) @@ -52,42 +53,46 @@ typedef #endif THREAD_FUNC_RET_TYPE; +#define THREAD_FUNC_RET_ZERO 0 + typedef DWORD_PTR CAffinityMask; typedef DWORD_PTR CCpuSet; -#define CpuSet_Zero(p) { *(p) = 0; } -#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); } +#define CpuSet_Zero(p) *(p) = (0) +#define CpuSet_Set(p, cpu) *(p) |= ((DWORD_PTR)1 << (cpu)) #else // _WIN32 -typedef struct _CThread +typedef struct { pthread_t _tid; int _created; } CThread; -#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; } -#define Thread_WasCreated(p) ((p)->_created != 0) +#define Thread_CONSTRUCT(p) { (p)->_tid = 0; (p)->_created = 0; } +#define Thread_WasCreated(p) ((p)->_created != 0) WRes Thread_Close(CThread *p); // #define Thread_Wait Thread_Wait_Close typedef void * THREAD_FUNC_RET_TYPE; +#define THREAD_FUNC_RET_ZERO NULL + typedef UInt64 CAffinityMask; -#ifdef _7ZIP_AFFINITY_SUPPORTED +#ifdef Z7_AFFINITY_SUPPORTED typedef cpu_set_t CCpuSet; -#define CpuSet_Zero(p) CPU_ZERO(p) -#define CpuSet_Set(p, cpu) CPU_SET(cpu, p) -#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p) +#define CpuSet_Zero(p) CPU_ZERO(p) +#define CpuSet_Set(p, cpu) CPU_SET(cpu, p) +#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p) #else typedef UInt64 CCpuSet; -#define CpuSet_Zero(p) { *(p) = 0; } -#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); } -#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0) +#define CpuSet_Zero(p) *(p) = (0) +#define CpuSet_Set(p, cpu) *(p) |= ((UInt64)1 << (cpu)) +#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0) #endif @@ -95,7 +100,7 @@ typedef UInt64 CCpuSet; #endif // _WIN32 -#define THREAD_FUNC_CALL_TYPE MY_STD_CALL +#define THREAD_FUNC_CALL_TYPE Z7_STDCALL #if defined(_WIN32) && defined(__GNUC__) /* GCC compiler for x86 32-bit uses the rule: @@ -187,6 +192,7 @@ WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled); WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p); WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); + WRes Event_Set(CEvent *p); WRes Event_Reset(CEvent *p); WRes Event_Wait(CEvent *p); @@ -227,6 +233,8 @@ LONG InterlockedIncrement(LONG volatile *addend); #endif // _WIN32 +WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p); + EXTERN_C_END #endif diff --git a/src/Common/lzma/lzma-history.txt b/src/Common/lzma/lzma-history.txt index 0963c7bf..a151c4b9 100644 --- a/src/Common/lzma/lzma-history.txt +++ b/src/Common/lzma/lzma-history.txt @@ -1,6 +1,46 @@ HISTORY of the LZMA SDK ----------------------- +23.01 2023-06-20 +------------------------- +- 7-Zip now can use new ARM64 filter for compression to 7z and xz archives. + ARM64 filter can increase compression ratio for data containing executable + files compiled for ARM64 (AArch64) architecture. + Also 7-Zip now parses executable files (that have exe and dll filename extensions) + before compressing, and it selects appropriate filter for each parsed file: + - BCJ or BCJ2 filter for x86 executable files, + - ARM64 filter for ARM64 executable files. + Previous versions by default used x86 filter BCJ or BCJ2 for all exe/dll files. +- Default section size for BCJ2 filter was changed from 64 MiB to 240 MiB. + It can increase compression ratio for executable files larger than 64 MiB. +- Some optimizations in filters code: BCJ, BCJ2, Swap* and opthers. +- If 7-Zip uses BCJ2 filter for big datasets compressing, it can use additional temp + files in system's TEMP folder. 7-Zip uses temp file for additional compressed + data stream, if size of such compressed stream is larger than predefined limit: + 16 MiB in 32-bit version, 4 GiB in 64-bit version. +- When new 7-Zip creates multivolume archive, 7-Zip keeps in open state + only volumes that still can be changed. Previous versions kept all volumes + in open state until the end of the archive creation. +- 7-Zip for Linux and macOS now can reduce the number of simultaneously open files, + when 7-Zip opens, extracts or creates multivolume archive. It allows to avoid + the failures for cases with big number of volumes, bacause there is a limitation + for number of open files allowed for a single program in Linux and macOS. +- Some bugs were fixed. +- Source code changes: +- All external macros for compiling C/C++ code of 7-Zip now have Z7_ prefix. +- 7-Zip COM interfaces now use new macros that allow to declare and implement COM interface. +- The code has been modified to compile with the maximum diagnostic warning level: + -Wall in MSVC and -Weverything in CLANG. + And some warning types are disabled in 2 files: + - C/Compiler.h for C/C++ code warnings. + - CPP/Common/Common.h for C++ code warnings. +- Linux/macOS versions of 7-Zip: IUnknown interface in new code doesn't use + virtual destructor that was used in previous 7-Zip and p7zip: + // virtual ~IUnknown() {} + So 7-Zip's dynamically linked shared libraries (codecs) are not compatible + between new 7-Zip for Linux/macOS and old 7-Zip (and p7zip). + + 21.07 2021-12-26 ------------------------- - New switches: -spm and -im!{file_path} to exclude directories from processing diff --git a/src/Common/lzma/lzma-sdk.txt b/src/Common/lzma/lzma-sdk.txt index 9621c8d5..141b0fd4 100644 --- a/src/Common/lzma/lzma-sdk.txt +++ b/src/Common/lzma/lzma-sdk.txt @@ -1,4 +1,4 @@ -LZMA SDK 21.07 +LZMA SDK 23.01 -------------- LZMA SDK provides the documentation, samples, header files, |