diff options
Diffstat (limited to 'src/Common/lzma')
-rw-r--r-- | src/Common/lzma/7zTypes.h | 14 | ||||
-rw-r--r-- | src/Common/lzma/Alloc.c | 174 | ||||
-rw-r--r-- | src/Common/lzma/Alloc.h | 15 | ||||
-rw-r--r-- | src/Common/lzma/Compiler.h | 91 | ||||
-rw-r--r-- | src/Common/lzma/CpuArch.c | 217 | ||||
-rw-r--r-- | src/Common/lzma/CpuArch.h | 177 | ||||
-rw-r--r-- | src/Common/lzma/LzFind.c | 127 | ||||
-rw-r--r-- | src/Common/lzma/LzFind.h | 5 | ||||
-rw-r--r-- | src/Common/lzma/LzFindMt.c | 58 | ||||
-rw-r--r-- | src/Common/lzma/LzFindMt.h | 9 | ||||
-rw-r--r-- | src/Common/lzma/LzmaEnc.c | 40 | ||||
-rw-r--r-- | src/Common/lzma/Precomp.h | 123 | ||||
-rw-r--r-- | src/Common/lzma/Threads.c | 53 | ||||
-rw-r--r-- | src/Common/lzma/Threads.h | 20 | ||||
-rw-r--r-- | src/Common/lzma/lzma-history.txt | 74 | ||||
-rw-r--r-- | src/Common/lzma/lzma-sdk.txt | 17 |
16 files changed, 967 insertions, 247 deletions
diff --git a/src/Common/lzma/7zTypes.h b/src/Common/lzma/7zTypes.h index 1fcb2473..5b77420a 100644 --- a/src/Common/lzma/7zTypes.h +++ b/src/Common/lzma/7zTypes.h @@ -1,7 +1,7 @@ /* 7zTypes.h -- Basic types -2023-04-02 : Igor Pavlov : Public domain */ +2024-01-24 : Igor Pavlov : Public domain */ #ifndef ZIP7_7Z_TYPES_H #define ZIP7_7Z_TYPES_H #ifdef _WIN32 @@ -528,24 +528,24 @@ struct ISzAlloc #define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) /* #define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m) */ #if defined (__clang__) || defined(__GNUC__) -#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \ +#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \ _Pragma("GCC diagnostic pop") #else -#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL -#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL +#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL #endif #define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \ - Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ + Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \ type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \ - Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL + Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL #define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \ Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p) diff --git a/src/Common/lzma/Alloc.c b/src/Common/lzma/Alloc.c index d841bf20..63e1a121 100644 --- a/src/Common/lzma/Alloc.c +++ b/src/Common/lzma/Alloc.c @@ -1,30 +1,29 @@ /* Alloc.c -- Memory allocation functions -2023-04-02 : Igor Pavlov : Public domain */ +2024-02-18 : Igor Pavlov : Public domain */ #include "Precomp.h" #ifdef _WIN32 #include "7zWindows.h" #endif #include <stdlib.h> #include "Alloc.h" -#ifdef _WIN32 -#ifdef Z7_LARGE_PAGES -#if defined(__clang__) || defined(__GNUC__) -typedef void (*Z7_voidFunction)(void); -#define MY_CAST_FUNC (Z7_voidFunction) -#elif defined(_MSC_VER) && _MSC_VER > 1920 -#define MY_CAST_FUNC (void *) -// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' -#else -#define MY_CAST_FUNC +#if defined(Z7_LARGE_PAGES) && defined(_WIN32) && \ + (!defined(Z7_WIN32_WINNT_MIN) || Z7_WIN32_WINNT_MIN < 0x0502) // < Win2003 (xp-64) + #define Z7_USE_DYN_GetLargePageMinimum +#endif + +// for debug: +#if 0 +#if defined(__CHERI__) && defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) +// #pragma message("=== Z7_ALLOC_NO_OFFSET_ALLOCATOR === ") +#define Z7_ALLOC_NO_OFFSET_ALLOCATOR +#endif #endif -#endif // Z7_LARGE_PAGES -#endif // _WIN32 // #define SZ_ALLOC_DEBUG /* #define SZ_ALLOC_DEBUG */ /* use SZ_ALLOC_DEBUG to debug alloc/free operations */ @@ -144,11 +143,13 @@ static void PrintAddr(void *p) #define PRINT_ALLOC(name, cnt, size, ptr) #endif #define PRINT_FREE(name, cnt, ptr) #define Print(s) #define PrintLn() +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR #define PrintHex(v, align) +#endif #define PrintAddr(p) #endif @@ -244,35 +245,39 @@ void MidFree(void *address) } #ifdef Z7_LARGE_PAGES #ifdef MEM_LARGE_PAGES - #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES + #define MY_MEM_LARGE_PAGES MEM_LARGE_PAGES #else - #define MY__MEM_LARGE_PAGES 0x20000000 + #define MY_MEM_LARGE_PAGES 0x20000000 #endif extern SIZE_T g_LargePageSize; SIZE_T g_LargePageSize = 0; typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID); void SetLargePageSize(void) { - #ifdef Z7_LARGE_PAGES SIZE_T size; +#ifdef Z7_USE_DYN_GetLargePageMinimum +Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION + const Func_GetLargePageMinimum fn = - (Func_GetLargePageMinimum) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + (Func_GetLargePageMinimum) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); if (!fn) return; size = fn(); +#else + size = GetLargePageMinimum(); +#endif if (size == 0 || (size & (size - 1)) != 0) return; g_LargePageSize = size; - #endif } #endif // Z7_LARGE_PAGES void *BigAlloc(size_t size) @@ -290,11 +295,11 @@ void *BigAlloc(size_t size) size_t size2; ps--; size2 = (size + ps) & ~ps; if (size2 >= size) { - void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); + void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY_MEM_LARGE_PAGES, PAGE_READWRITE); if (p) { PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p) return p; } @@ -326,42 +331,51 @@ static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return BigA static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(address); } const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; #endif -/* - uintptr_t : <stdint.h> C99 (optional) - : unsupported in VS6 -*/ - -#ifdef _WIN32 - typedef UINT_PTR UIntPtr; -#else - /* - typedef uintptr_t UIntPtr; - */ - typedef ptrdiff_t UIntPtr; -#endif - +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR #define ADJUST_ALLOC_SIZE 0 /* #define ADJUST_ALLOC_SIZE (sizeof(void *) - 1) */ /* Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if MyAlloc() can return address that is NOT multiple of sizeof(void *). */ - /* -#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1)))) + uintptr_t : <stdint.h> C99 (optional) + : unsupported in VS6 */ -#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1)))) +typedef + #ifdef _WIN32 + UINT_PTR + #elif 1 + uintptr_t + #else + ptrdiff_t + #endif + MY_uintptr_t; + +#if 0 \ + || (defined(__CHERI__) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ > 8)) +// for 128-bit pointers (cheri): +#define MY_ALIGN_PTR_DOWN(p, align) \ + ((void *)((char *)(p) - ((size_t)(MY_uintptr_t)(p) & ((align) - 1)))) +#else +#define MY_ALIGN_PTR_DOWN(p, align) \ + ((void *)((((MY_uintptr_t)(p)) & ~((MY_uintptr_t)(align) - 1)))) +#endif +#endif -#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) +#if !defined(_WIN32) \ + && (defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) \ + || defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)) #define USE_posix_memalign #endif #ifndef USE_posix_memalign #define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align) @@ -397,18 +411,17 @@ static int posix_memalign(void **ptr, size_t align, size_t size) ALLOC_ALIGN_SIZE >= cache_line_size */ #define ALLOC_ALIGN_SIZE ((size_t)1 << 7) -static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) +void *z7_AlignedAlloc(size_t size) { - #ifndef USE_posix_memalign +#ifndef USE_posix_memalign void *p; void *pAligned; size_t newSize; - UNUSED_VAR(pp) /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned block to prevent cache line sharing with another allocated blocks */ newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE; @@ -429,52 +442,97 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) ((void **)pAligned)[-1] = p; return pAligned; - #else +#else void *p; - UNUSED_VAR(pp) if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) return NULL; Print(" posix_memalign="); PrintAddr(p); PrintLn(); return p; - #endif +#endif +} + + +void z7_AlignedFree(void *address) +{ +#ifndef USE_posix_memalign + if (address) + MyFree(((void **)address)[-1]); +#else + free(address); +#endif +} + + +static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) +{ + UNUSED_VAR(pp) + return z7_AlignedAlloc(size); } static void SzAlignedFree(ISzAllocPtr pp, void *address) { UNUSED_VAR(pp) - #ifndef USE_posix_memalign +#ifndef USE_posix_memalign if (address) MyFree(((void **)address)[-1]); - #else +#else free(address); - #endif +#endif } const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree }; -#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *)) - /* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */ -#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1] -/* -#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1] -*/ +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR +#if 1 + #define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *)) + #define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1] +#else + // we can use this simplified code, + // if (CAlignOffsetAlloc::offset == (k * sizeof(void *)) + #define REAL_BLOCK_PTR_VAR(p) (((void **)(p))[-1]) +#endif +#endif + + +#if 0 +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR +#include <stdio.h> +static void PrintPtr(const char *s, const void *p) +{ + const Byte *p2 = (const Byte *)&p; + unsigned i; + printf("%s %p ", s, p); + for (i = sizeof(p); i != 0;) + { + i--; + printf("%02x", p2[i]); + } + printf("\n"); +} +#endif +#endif + static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) { +#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) + UNUSED_VAR(pp) + return z7_AlignedAlloc(size); +#else const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); void *adr; void *pAligned; size_t newSize; size_t extra; @@ -499,10 +557,16 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) return NULL; pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr + alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset; +#if 0 + printf("\nalignSize = %6x, offset=%6x, size=%8x \n", (unsigned)alignSize, (unsigned)p->offset, (unsigned)size); + PrintPtr("base", adr); + PrintPtr("alig", pAligned); +#endif + PrintLn(); Print("- Aligned: "); Print(" size="); PrintHex(size, 8); Print(" a_size="); PrintHex(newSize, 8); Print(" ptr="); PrintAddr(adr); @@ -510,23 +574,29 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) PrintLn(); REAL_BLOCK_PTR_VAR(pAligned) = adr; return pAligned; +#endif } static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address) { +#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) + UNUSED_VAR(pp) + z7_AlignedFree(address); +#else if (address) { const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); PrintLn(); Print("- Aligned Free: "); PrintLn(); ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address)); } +#endif } void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p) { diff --git a/src/Common/lzma/Alloc.h b/src/Common/lzma/Alloc.h index fac5b62f..01bf6b7d 100644 --- a/src/Common/lzma/Alloc.h +++ b/src/Common/lzma/Alloc.h @@ -1,7 +1,7 @@ /* Alloc.h -- Memory allocation functions -2023-03-04 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_ALLOC_H #define ZIP7_INC_ALLOC_H #include "7zTypes.h" @@ -20,10 +20,13 @@ MyRealloc() is similar to realloc() for the following cases: void *MyAlloc(size_t size); void MyFree(void *address); void *MyRealloc(void *address, size_t size); +void *z7_AlignedAlloc(size_t size); +void z7_AlignedFree(void *p); + #ifdef _WIN32 #ifdef Z7_LARGE_PAGES void SetLargePageSize(void); #endif @@ -31,16 +34,18 @@ void SetLargePageSize(void); void *MidAlloc(size_t size); void MidFree(void *address); void *BigAlloc(size_t size); void BigFree(void *address); +/* #define Z7_BIG_ALLOC_IS_ZERO_FILLED */ + #else -#define MidAlloc(size) MyAlloc(size) -#define MidFree(address) MyFree(address) -#define BigAlloc(size) MyAlloc(size) -#define BigFree(address) MyFree(address) +#define MidAlloc(size) z7_AlignedAlloc(size) +#define MidFree(address) z7_AlignedFree(address) +#define BigAlloc(size) z7_AlignedAlloc(size) +#define BigFree(address) z7_AlignedFree(address) #endif extern const ISzAlloc g_Alloc; diff --git a/src/Common/lzma/Compiler.h b/src/Common/lzma/Compiler.h index 185a52de..2a9c2b7a 100644 --- a/src/Common/lzma/Compiler.h +++ b/src/Common/lzma/Compiler.h @@ -1,7 +1,7 @@ /* Compiler.h : Compiler specific defines and pragmas -2023-04-02 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_COMPILER_H #define ZIP7_INC_COMPILER_H #if defined(__clang__) @@ -23,15 +23,83 @@ #if defined(__MINGW32__) || defined(__MINGW64__) #define Z7_MINGW #endif +#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__)) +#define Z7_MCST_LCC +#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__) +#endif + +/* +#if defined(__AVX2__) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \ + || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \ + || defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ + || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) + #define Z7_COMPILER_AVX2_SUPPORTED + #endif +#endif +*/ + // #pragma GCC diagnostic ignored "-Wunknown-pragmas" #ifdef __clang__ // padding size of '' with 4 bytes to alignment boundary #pragma GCC diagnostic ignored "-Wpadded" + +#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \ + && defined(__FreeBSD__) +// freebsd: +#pragma GCC diagnostic ignored "-Wexcess-padding" +#endif + +#if __clang_major__ >= 16 +#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage" +#endif + +#if __clang_major__ == 13 +#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) +// cheri +#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast" +#endif +#endif + +#if __clang_major__ == 13 + // for <arm_neon.h> + #pragma GCC diagnostic ignored "-Wreserved-identifier" +#endif + +#endif // __clang__ + +#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16 +// #pragma GCC diagnostic ignored "-Wcast-function-type-strict" +#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \ + _Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"") +#else +#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION +#endif + +typedef void (*Z7_void_Function)(void); +#if defined(__clang__) || defined(__GNUC__) +#define Z7_CAST_FUNC_C (Z7_void_Function) +#elif defined(_MSC_VER) && _MSC_VER > 1920 +#define Z7_CAST_FUNC_C (void *) +// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' +#else +#define Z7_CAST_FUNC_C +#endif +/* +#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__) + // #pragma GCC diagnostic ignored "-Wcast-function-type" +#endif +*/ +#ifdef __GNUC__ +#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000) +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif #endif #ifdef _MSC_VER @@ -99,11 +167,12 @@ #if defined(__clang__) && (__clang_major__ >= 4) #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ _Pragma("clang loop unroll(disable)") \ _Pragma("clang loop vectorize(disable)") #define Z7_ATTRIB_NO_VECTORIZE -#elif defined(__GNUC__) && (__GNUC__ >= 5) +#elif defined(__GNUC__) && (__GNUC__ >= 5) \ + && (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610)) #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) // __attribute__((optimize("no-unroll-loops"))); #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE #elif defined(_MSC_VER) && (_MSC_VER >= 1920) #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ @@ -140,19 +209,27 @@ #define Z7_UNLIKELY(x) (x) // #define Z7_likely #endif -#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000)) -#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ +#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600)) + +#if (Z7_CLANG_VERSION < 130000) +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"") +#else +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"") -#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ +#endif + +#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ _Pragma("GCC diagnostic pop") #else -#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER -#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER #endif #define UNUSED_VAR(x) (void)x; /* #define UNUSED_VAR(x) x=x; */ diff --git a/src/Common/lzma/CpuArch.c b/src/Common/lzma/CpuArch.c index 33f8a3ab..6e02551e 100644 --- a/src/Common/lzma/CpuArch.c +++ b/src/Common/lzma/CpuArch.c @@ -1,7 +1,7 @@ /* CpuArch.c -- CPU specific code -2023-05-18 : Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ #include "Precomp.h" // #include <stdio.h> @@ -15,11 +15,11 @@ #endif /* cpuid instruction supports (subFunction) parameter in ECX, that is used only with some specific (function) parameter values. - But we always use only (subFunction==0). + most functions use only (subFunction==0). */ /* __cpuid(): MSVC and GCC/CLANG use same function/macro name but parameters are different. We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function. @@ -47,47 +47,53 @@ #define ASM_LN "\n" #if defined(MY_CPU_AMD64) && defined(__PIC__) \ && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) -#define x86_cpuid_MACRO(p, func) { \ + /* "=&r" selects free register. It can select even rbx, if that register is free. + "=&D" for (RDI) also works, but the code can be larger with "=&D" + "2"(subFun) : 2 is (zero-based) index in the output constraint list "=c" (ECX). */ + +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ __asm__ __volatile__ ( \ ASM_LN "mov %%rbx, %q1" \ ASM_LN "cpuid" \ ASM_LN "xchg %%rbx, %q1" \ - : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } - - /* "=&r" selects free register. It can select even rbx, if that register is free. - "=&D" for (RDI) also works, but the code can be larger with "=&D" - "2"(0) means (subFunction = 0), - 2 is (zero-based) index in the output constraint list "=c" (ECX). */ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } #elif defined(MY_CPU_X86) && defined(__PIC__) \ && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) -#define x86_cpuid_MACRO(p, func) { \ +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ __asm__ __volatile__ ( \ ASM_LN "mov %%ebx, %k1" \ ASM_LN "cpuid" \ ASM_LN "xchg %%ebx, %k1" \ - : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } #else -#define x86_cpuid_MACRO(p, func) { \ +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ __asm__ __volatile__ ( \ ASM_LN "cpuid" \ - : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } #endif +#define x86_cpuid_MACRO(p, func) x86_cpuid_MACRO_2(p, func, 0) void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) { x86_cpuid_MACRO(p, func) } +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + x86_cpuid_MACRO_2(p, func, subFunc) +} + Z7_NO_INLINE UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) { #if defined(NEED_CHECK_FOR_CPUID) @@ -203,54 +209,87 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) __asm pop edi __asm pop ebx __asm ret 0 } +static +void __declspec(naked) Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + UNUSED_VAR(p) + UNUSED_VAR(func) + UNUSED_VAR(subFunc) + __asm push ebx + __asm push edi + __asm mov edi, ecx // p + __asm mov eax, edx // func + __asm mov ecx, [esp + 12] // subFunc + __asm cpuid + __asm mov [edi ], eax + __asm mov [edi + 4], ebx + __asm mov [edi + 8], ecx + __asm mov [edi + 12], edx + __asm pop edi + __asm pop ebx + __asm ret 4 +} + #else // MY_CPU_AMD64 #if _MSC_VER >= 1600 #include <intrin.h> #define MY_cpuidex __cpuidex + +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + __cpuidex((int *)p, func, subFunc); +} + #else /* __cpuid (func == (0 or 7)) requires subfunction number in ECX. MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. __cpuid() in new MSVC clears ECX. __cpuid() in old MSVC (14.00) x64 doesn't clear ECX We still can use __cpuid for low (func) values that don't require ECX, but __cpuid() in old MSVC will be incorrect for some func values: (func == 7). So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, - where ECX value is first parameter for FASTCALL / NO_INLINE func, + where ECX value is first parameter for FASTCALL / NO_INLINE func. So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!! */ static -Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo) +Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo) { UNUSED_VAR(subFunction) __cpuid(CPUInfo, func); } #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info) #pragma message("======== MY_cpuidex_HACK WAS USED ========") +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + MY_cpuidex_HACK(subFunc, func, (Int32 *)p); +} #endif // _MSC_VER >= 1600 #if !defined(MY_CPU_AMD64) /* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code, so we disable inlining here */ Z7_NO_INLINE #endif void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) { - MY_cpuidex((int *)p, (int)func, 0); + MY_cpuidex((Int32 *)p, (Int32)func, 0); } Z7_NO_INLINE UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) { - int a[4]; + Int32 a[4]; MY_cpuidex(a, 0, 0); return a[0]; } #endif // MY_CPU_AMD64 @@ -382,29 +421,29 @@ static BoolInt CPU_Sys_Is_SSE_Supported(void) BoolInt CPU_IsSupported_CMOV(void) { UInt32 a[4]; if (!x86cpuid_Func_1(&a[0])) return 0; - return (a[3] >> 15) & 1; + return (BoolInt)(a[3] >> 15) & 1; } BoolInt CPU_IsSupported_SSE(void) { UInt32 a[4]; CHECK_SYS_SSE_SUPPORT if (!x86cpuid_Func_1(&a[0])) return 0; - return (a[3] >> 25) & 1; + return (BoolInt)(a[3] >> 25) & 1; } BoolInt CPU_IsSupported_SSE2(void) { UInt32 a[4]; CHECK_SYS_SSE_SUPPORT if (!x86cpuid_Func_1(&a[0])) return 0; - return (a[3] >> 26) & 1; + return (BoolInt)(a[3] >> 26) & 1; } #endif @@ -417,21 +456,21 @@ static UInt32 x86cpuid_Func_1_ECX(void) return a[2]; } BoolInt CPU_IsSupported_AES(void) { - return (x86cpuid_Func_1_ECX() >> 25) & 1; + return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1; } BoolInt CPU_IsSupported_SSSE3(void) { - return (x86cpuid_Func_1_ECX() >> 9) & 1; + return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1; } BoolInt CPU_IsSupported_SSE41(void) { - return (x86cpuid_Func_1_ECX() >> 19) & 1; + return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1; } BoolInt CPU_IsSupported_SHA(void) { CHECK_SYS_SSE_SUPPORT @@ -439,11 +478,28 @@ BoolInt CPU_IsSupported_SHA(void) if (z7_x86_cpuid_GetMaxFunc() < 7) return False; { UInt32 d[4]; z7_x86_cpuid(d, 7); - return (d[1] >> 29) & 1; + return (BoolInt)(d[1] >> 29) & 1; + } +} + + +BoolInt CPU_IsSupported_SHA512(void) +{ + if (!CPU_IsSupported_AVX2()) return False; // maybe CPU_IsSupported_AVX() is enough here + + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid_subFunc(d, 7, 0); + if (d[0] < 1) // d[0] - is max supported subleaf value + return False; + z7_x86_cpuid_subFunc(d, 7, 1); + return (BoolInt)(d[0]) & 1; } } /* MSVC: _xgetbv() intrinsic is available since VS2010SP1. @@ -636,14 +692,14 @@ BoolInt CPU_IsSupported_AVX(void) what exact states(registers) OS have enabled for storing/restoring. */ { const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); - // printf("\n=== XGetBV=%d\n", bm); + // printf("\n=== XGetBV=0x%x\n", bm); return 1 - & (bm >> 1) // SSE state is supported (set by OS) for storing/restoring - & (bm >> 2); // AVX state is supported (set by OS) for storing/restoring + & (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring + & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring } // since Win7SP1: we can use GetEnabledXStateFeatures(); } @@ -656,13 +712,42 @@ BoolInt CPU_IsSupported_AVX2(void) { UInt32 d[4]; z7_x86_cpuid(d, 7); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); return 1 - & (d[1] >> 5); // avx2 + & (BoolInt)(d[1] >> 5); // avx2 + } +} + +#if 0 +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + BoolInt v; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + v = 1 + & (BoolInt)(d[1] >> 16) // avx512f + & (BoolInt)(d[1] >> 31); // avx512vl + if (!v) + return False; + } + { + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=0x%x\n", bm); + return 1 + & (BoolInt)(bm >> 5) // OPMASK + & (BoolInt)(bm >> 6) // ZMM upper 256-bit + & (BoolInt)(bm >> 7); // ZMM16 ... ZMM31 } } +#endif BoolInt CPU_IsSupported_VAES_AVX2(void) { if (!CPU_IsSupported_AVX()) return False; @@ -671,13 +756,13 @@ BoolInt CPU_IsSupported_VAES_AVX2(void) { UInt32 d[4]; z7_x86_cpuid(d, 7); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); return 1 - & (d[1] >> 5) // avx2 + & (BoolInt)(d[1] >> 5) // avx2 // & (d[1] >> 31) // avx512vl - & (d[2] >> 9); // vaes // VEX-256/EVEX + & (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX } } BoolInt CPU_IsSupported_PageGB(void) { @@ -686,11 +771,11 @@ BoolInt CPU_IsSupported_PageGB(void) UInt32 d[4]; z7_x86_cpuid(d, 0x80000000); if (d[0] < 0x80000001) return False; z7_x86_cpuid(d, 0x80000001); - return (d[3] >> 26) & 1; + return (BoolInt)(d[3] >> 26) & 1; } } #elif defined(MY_CPU_ARM_OR_ARM64) @@ -745,10 +830,22 @@ BoolInt CPU_IsSupported_CRC32(void) BoolInt CPU_IsSupported_NEON(void) { return z7_sysctlbyname_Get_BoolInt("hw.optional.neon"); } +BoolInt CPU_IsSupported_SHA512(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha512"); +} + +/* +BoolInt CPU_IsSupported_SHA3(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha3"); +} +*/ + #ifdef MY_CPU_ARM64 #define APPLE_CRYPTO_SUPPORT_VAL 1 #else #define APPLE_CRYPTO_SUPPORT_VAL 0 #endif @@ -758,44 +855,94 @@ BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; } BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } #else // __APPLE__ -#include <sys/auxv.h> +#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216) + #define Z7_GETAUXV_AVAILABLE +#else +// #pragma message("=== is not NEW GLIBC === ") + #if defined __has_include + #if __has_include (<sys/auxv.h>) +// #pragma message("=== sys/auxv.h is avail=== ") + #define Z7_GETAUXV_AVAILABLE + #endif + #endif +#endif +#ifdef Z7_GETAUXV_AVAILABLE +// #pragma message("=== Z7_GETAUXV_AVAILABLE === ") +#include <sys/auxv.h> #define USE_HWCAP +#endif #ifdef USE_HWCAP +#if defined(__FreeBSD__) +static unsigned long MY_getauxval(int aux) +{ + unsigned long val; + if (elf_aux_info(aux, &val, sizeof(val))) + return 0; + return val; +} +#else +#define MY_getauxval getauxval + #if defined __has_include + #if __has_include (<asm/hwcap.h>) #include <asm/hwcap.h> + #endif + #endif +#endif #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ - BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; } + BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); } #ifdef MY_CPU_ARM64 #define MY_HWCAP_CHECK_FUNC(name) \ MY_HWCAP_CHECK_FUNC_2(name, name) +#if 1 || defined(__ARM_NEON) + BoolInt CPU_IsSupported_NEON(void) { return True; } +#else MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) +#endif // MY_HWCAP_CHECK_FUNC (ASIMD) #elif defined(MY_CPU_ARM) #define MY_HWCAP_CHECK_FUNC(name) \ - BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } + BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); } MY_HWCAP_CHECK_FUNC_2(NEON, NEON) #endif #else // USE_HWCAP #define MY_HWCAP_CHECK_FUNC(name) \ - BoolInt CPU_IsSupported_ ## name() { return 0; } + BoolInt CPU_IsSupported_ ## name(void) { return 0; } +#if defined(__ARM_NEON) + BoolInt CPU_IsSupported_NEON(void) { return True; } +#else MY_HWCAP_CHECK_FUNC(NEON) +#endif #endif // USE_HWCAP MY_HWCAP_CHECK_FUNC (CRC32) MY_HWCAP_CHECK_FUNC (SHA1) MY_HWCAP_CHECK_FUNC (SHA2) MY_HWCAP_CHECK_FUNC (AES) +#ifdef MY_CPU_ARM64 +// <hwcap.h> supports HWCAP_SHA512 and HWCAP_SHA3 since 2017. +// we define them here, if they are not defined +#ifndef HWCAP_SHA3 +// #define HWCAP_SHA3 (1 << 17) +#endif +#ifndef HWCAP_SHA512 +// #pragma message("=== HWCAP_SHA512 define === ") +#define HWCAP_SHA512 (1 << 21) +#endif +MY_HWCAP_CHECK_FUNC (SHA512) +// MY_HWCAP_CHECK_FUNC (SHA3) +#endif #endif // __APPLE__ #endif // _WIN32 #endif // MY_CPU_ARM_OR_ARM64 diff --git a/src/Common/lzma/CpuArch.h b/src/Common/lzma/CpuArch.h index 8e5d8a54..a6297ea4 100644 --- a/src/Common/lzma/CpuArch.h +++ b/src/Common/lzma/CpuArch.h @@ -1,7 +1,7 @@ /* CpuArch.h -- CPU specific code -2023-04-02 : Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ #ifndef ZIP7_INC_CPU_ARCH_H #define ZIP7_INC_CPU_ARCH_H #include "7zTypes.h" @@ -18,10 +18,11 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem MY_CPU_64BIT means that processor can work with 64-bit registers. MY_CPU_64BIT can be used to select fast code branch MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) */ +#if !defined(_M_ARM64EC) #if defined(_M_X64) \ || defined(_M_AMD64) \ || defined(__x86_64__) \ || defined(__AMD64__) \ || defined(__amd64__) @@ -33,10 +34,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define MY_CPU_NAME "x64" #define MY_CPU_SIZEOF_POINTER 8 #endif #define MY_CPU_64BIT #endif +#endif #if defined(_M_IX86) \ || defined(__i386__) #define MY_CPU_X86 @@ -45,21 +47,30 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define MY_CPU_SIZEOF_POINTER 4 #endif #if defined(_M_ARM64) \ + || defined(_M_ARM64EC) \ || defined(__AARCH64EL__) \ || defined(__AARCH64EB__) \ || defined(__aarch64__) #define MY_CPU_ARM64 - #ifdef __ILP32__ +#if defined(__ILP32__) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) #define MY_CPU_NAME "arm64-32" #define MY_CPU_SIZEOF_POINTER 4 - #else +#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) + #define MY_CPU_NAME "arm64-128" + #define MY_CPU_SIZEOF_POINTER 16 +#else +#if defined(_M_ARM64EC) + #define MY_CPU_NAME "arm64ec" +#else #define MY_CPU_NAME "arm64" +#endif #define MY_CPU_SIZEOF_POINTER 8 - #endif +#endif #define MY_CPU_64BIT #endif #if defined(_M_ARM) \ @@ -131,22 +142,83 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. /* #define MY_CPU_32BIT */ #endif #endif +#if defined(__sparc__) \ + || defined(__sparc) + #define MY_CPU_SPARC + #if defined(__LP64__) \ + || defined(_LP64) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8) + #define MY_CPU_NAME "sparcv9" + #define MY_CPU_SIZEOF_POINTER 8 + #define MY_CPU_64BIT + #elif defined(__sparc_v9__) \ + || defined(__sparcv9) + #define MY_CPU_64BIT + #if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "sparcv9-32" + #else + #define MY_CPU_NAME "sparcv9m" + #endif + #elif defined(__sparc_v8__) \ + || defined(__sparcv8) + #define MY_CPU_NAME "sparcv8" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "sparc" + #endif +#endif + + #if defined(__riscv) \ || defined(__riscv__) + #define MY_CPU_RISCV #if __riscv_xlen == 32 #define MY_CPU_NAME "riscv32" #elif __riscv_xlen == 64 #define MY_CPU_NAME "riscv64" #else #define MY_CPU_NAME "riscv" #endif #endif +#if defined(__loongarch__) + #define MY_CPU_LOONGARCH + #if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64) + #define MY_CPU_64BIT + #endif + #if defined(__loongarch64) + #define MY_CPU_NAME "loongarch64" + #define MY_CPU_LOONGARCH64 + #else + #define MY_CPU_NAME "loongarch" + #endif +#endif + + +// #undef MY_CPU_NAME +// #undef MY_CPU_SIZEOF_POINTER +// #define __e2k__ +// #define __SIZEOF_POINTER__ 4 +#if defined(__e2k__) + #define MY_CPU_E2K + #if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "e2k-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "e2k" + #if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8) + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #endif + #define MY_CPU_64BIT +#endif + + #if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) #define MY_CPU_X86_OR_AMD64 #endif #if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64) @@ -173,10 +245,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #if defined(MY_CPU_X86_OR_AMD64) \ || defined(MY_CPU_ARM_LE) \ || defined(MY_CPU_ARM64_LE) \ || defined(MY_CPU_IA64_LE) \ + || defined(_LITTLE_ENDIAN) \ || defined(__LITTLE_ENDIAN__) \ || defined(__ARMEL__) \ || defined(__THUMBEL__) \ || defined(__AARCH64EL__) \ || defined(__MIPSEL__) \ @@ -249,10 +322,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif #endif #ifndef MY_CPU_NAME + // #define MY_CPU_IS_UNKNOWN #ifdef MY_CPU_LE #define MY_CPU_NAME "LE" #elif defined(MY_CPU_BE) #define MY_CPU_NAME "BE" #else @@ -293,13 +367,23 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define Z7_BSWAP16(v) _byteswap_ushort(v) #define Z7_BSWAP32(v) _byteswap_ulong (v) #define Z7_BSWAP64(v) _byteswap_uint64(v) #define Z7_CPU_FAST_BSWAP_SUPPORTED -#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ - || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) - +/* GCC can generate slow code that calls function for __builtin_bswap32() for: + - GCC for RISCV, if Zbb/XTHeadBb extension is not used. + - GCC for SPARC. + The code from CLANG for SPARC also is not fastest. + So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases. +*/ +#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \ + && !defined(MY_CPU_SPARC) \ + && ( \ + (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \ + ) + #define Z7_BSWAP16(v) __builtin_bswap16(v) #define Z7_BSWAP32(v) __builtin_bswap32(v) #define Z7_BSWAP64(v) __builtin_bswap64(v) #define Z7_CPU_FAST_BSWAP_SUPPORTED @@ -327,17 +411,52 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #ifdef MY_CPU_LE #if defined(MY_CPU_X86_OR_AMD64) \ - || defined(MY_CPU_ARM64) + || defined(MY_CPU_ARM64) \ + || defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \ + || defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6) #define MY_CPU_LE_UNALIGN #define MY_CPU_LE_UNALIGN_64 #elif defined(__ARM_FEATURE_UNALIGNED) - /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. - So we can't use unaligned 64-bit operations. */ - #define MY_CPU_LE_UNALIGN +/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions. + Description of problems: +problem-1 : 32-bit ARM architecture: + multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM) + require 32-bit (WORD) alignment (by 32-bit ARM architecture). + So there is "Alignment fault exception", if data is not aligned for 32-bit. + +problem-2 : 32-bit kernels and arm64 kernels: + 32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception". + So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux. + + But some arm64 kernels do not handle these faults in 32-bit programs. + So we have unhandled exception for such instructions. + Probably some new arm64 kernels have fixed it, and unaligned + paired-access instructions work in new kernels? + +problem-3 : compiler for 32-bit arm: + Compilers use LDRD/STRD/LDM/STM for UInt64 accesses + and for another cases where two 32-bit accesses are fused + to one multi-access instruction. + So UInt64 variables must be aligned for 32-bit, and each + 32-bit access must be aligned for 32-bit, if we want to + avoid "Alignment fault" exception (handled or unhandled). + +problem-4 : performace: + Even if unaligned access is handled by kernel, it will be slow. + So if we allow unaligned access, we can get fast unaligned + single-access, and slow unaligned paired-access. + + We don't allow unaligned access on 32-bit arm, because compiler + genarates paired-access instructions that require 32-bit alignment, + and some arm64 kernels have no handler for these instructions. + Also unaligned paired-access instructions will be slow, if kernel handles them. +*/ + // it must be disabled: + // #define MY_CPU_LE_UNALIGN #endif #endif #ifdef MY_CPU_LE_UNALIGN @@ -388,15 +507,23 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif #if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) +#if 0 +// Z7_BSWAP16 can be slow for x86-msvc +#define GetBe16_to32(p) (Z7_BSWAP16 (*(const UInt16 *)(const void *)(p))) +#else +#define GetBe16_to32(p) (Z7_BSWAP32 (*(const UInt16 *)(const void *)(p)) >> 16) +#endif + #define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p)) #define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); } #if defined(MY_CPU_LE_UNALIGN_64) #define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p)) +#define SetBe64(p, v) { (*(UInt64 *)(void *)(p)) = Z7_BSWAP64(v); } #endif #else #define GetBe32(p) ( \ @@ -415,15 +542,31 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #ifndef GetBe64 #define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) #endif +#ifndef SetBe64 +#define SetBe64(p, v) { Byte *_ppp_ = (Byte *)(p); UInt64 _vvv_ = (v); \ + _ppp_[0] = (Byte)(_vvv_ >> 56); \ + _ppp_[1] = (Byte)(_vvv_ >> 48); \ + _ppp_[2] = (Byte)(_vvv_ >> 40); \ + _ppp_[3] = (Byte)(_vvv_ >> 32); \ + _ppp_[4] = (Byte)(_vvv_ >> 24); \ + _ppp_[5] = (Byte)(_vvv_ >> 16); \ + _ppp_[6] = (Byte)(_vvv_ >> 8); \ + _ppp_[7] = (Byte)_vvv_; } +#endif + #ifndef GetBe16 +#ifdef GetBe16_to32 +#define GetBe16(p) ( (UInt16) GetBe16_to32(p)) +#else #define GetBe16(p) ( (UInt16) ( \ ((UInt16)((const Byte *)(p))[0] << 8) | \ ((const Byte *)(p))[1] )) #endif +#endif #if defined(MY_CPU_BE) #define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v) #define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) @@ -437,37 +580,46 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif #if defined(MY_CPU_BE) +#define GetBe64a(p) (*(const UInt64 *)(const void *)(p)) #define GetBe32a(p) (*(const UInt32 *)(const void *)(p)) #define GetBe16a(p) (*(const UInt16 *)(const void *)(p)) #define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define GetUi64a(p) GetUi64(p) #define GetUi32a(p) GetUi32(p) #define GetUi16a(p) GetUi16(p) #define SetUi32a(p, v) SetUi32(p, v) #define SetUi16a(p, v) SetUi16(p, v) #elif defined(MY_CPU_LE) +#define GetUi64a(p) (*(const UInt64 *)(const void *)(p)) #define GetUi32a(p) (*(const UInt32 *)(const void *)(p)) #define GetUi16a(p) (*(const UInt16 *)(const void *)(p)) #define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define GetBe64a(p) GetBe64(p) #define GetBe32a(p) GetBe32(p) #define GetBe16a(p) GetBe16(p) #define SetBe32a(p, v) SetBe32(p, v) #define SetBe16a(p, v) SetBe16(p, v) #else #error Stop_Compiling_Unknown_Endian_CPU_a #endif +#ifndef GetBe16_to32 +#define GetBe16_to32(p) GetBe16(p) +#endif + + #if defined(MY_CPU_X86_OR_AMD64) \ || defined(MY_CPU_ARM_OR_ARM64) \ || defined(MY_CPU_PPC_OR_PPC64) #define Z7_CPU_FAST_ROTATE_SUPPORTED #endif @@ -484,17 +636,19 @@ UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void); #endif BoolInt CPU_IsSupported_AES(void); BoolInt CPU_IsSupported_AVX(void); BoolInt CPU_IsSupported_AVX2(void); +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void); BoolInt CPU_IsSupported_VAES_AVX2(void); BoolInt CPU_IsSupported_CMOV(void); BoolInt CPU_IsSupported_SSE(void); BoolInt CPU_IsSupported_SSE2(void); BoolInt CPU_IsSupported_SSSE3(void); BoolInt CPU_IsSupported_SSE41(void); BoolInt CPU_IsSupported_SHA(void); +BoolInt CPU_IsSupported_SHA512(void); BoolInt CPU_IsSupported_PageGB(void); #elif defined(MY_CPU_ARM_OR_ARM64) BoolInt CPU_IsSupported_CRC32(void); @@ -508,10 +662,11 @@ BoolInt CPU_IsSupported_CRYPTO(void); #else BoolInt CPU_IsSupported_SHA1(void); BoolInt CPU_IsSupported_SHA2(void); BoolInt CPU_IsSupported_AES(void); #endif +BoolInt CPU_IsSupported_SHA512(void); #endif #if defined(__APPLE__) int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); diff --git a/src/Common/lzma/LzFind.c b/src/Common/lzma/LzFind.c index 0fbd5aae..1ce40464 100644 --- a/src/Common/lzma/LzFind.c +++ b/src/Common/lzma/LzFind.c @@ -1,7 +1,7 @@ /* LzFind.c -- Match finder for LZ algorithms -2023-03-14 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" #include <string.h> // #include <stdio.h> @@ -106,13 +106,19 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all // return 0; // for debug } return (p->bufBase != NULL); } -static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } +static const Byte *MatchFinder_GetPointerToCurrentPos(void *p) +{ + return ((CMatchFinder *)p)->buffer; +} -static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } +static UInt32 MatchFinder_GetNumAvailableBytes(void *p) +{ + return GET_AVAIL_BYTES((CMatchFinder *)p); +} Z7_NO_INLINE static void MatchFinder_ReadBlock(CMatchFinder *p) { @@ -569,12 +575,13 @@ void MatchFinder_Init_4(CMatchFinder *p) // (CYC_TO_POS_OFFSET == 0) is expected by some optimized code #define CYC_TO_POS_OFFSET 0 // #define CYC_TO_POS_OFFSET 1 // for debug -void MatchFinder_Init(CMatchFinder *p) +void MatchFinder_Init(void *_p) { + CMatchFinder *p = (CMatchFinder *)_p; MatchFinder_Init_HighHash(p); MatchFinder_Init_LowHash(p); MatchFinder_Init_4(p); // if (readData) MatchFinder_ReadBlock(p); @@ -605,29 +612,29 @@ void MatchFinder_Init(CMatchFinder *p) #if (_MSC_VER >= 1900) #define USE_LZFIND_SATUR_SUB_256 #endif #endif -// #elif defined(MY_CPU_ARM_OR_ARM64) -#elif defined(MY_CPU_ARM64) +#elif defined(MY_CPU_ARM64) \ + /* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */ - #if defined(__clang__) && (__clang_major__ >= 8) \ - || defined(__GNUC__) && (__GNUC__ >= 8) + #if defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) #define USE_LZFIND_SATUR_SUB_128 #ifdef MY_CPU_ARM64 // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__(""))) #else - // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon"))) #endif #elif defined(_MSC_VER) #if (_MSC_VER >= 1910) #define USE_LZFIND_SATUR_SUB_128 #endif #endif - #if defined(_MSC_VER) && defined(MY_CPU_ARM64) + #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) #include <arm64_neon.h> #else #include <arm_neon.h> #endif @@ -1080,13 +1087,15 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const return; } #define MOVE_POS \ - ++p->cyclicBufferPos; \ + p->cyclicBufferPos++; \ p->buffer++; \ - { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } + { const UInt32 pos1 = p->pos + 1; \ + p->pos = pos1; \ + if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } #define MOVE_POS_RET MOVE_POS return distances; Z7_NO_INLINE static void MatchFinder_MovePos(CMatchFinder *p) @@ -1101,24 +1110,30 @@ static void MatchFinder_MovePos(CMatchFinder *p) */ MOVE_POS } #define GET_MATCHES_HEADER2(minLen, ret_op) \ - unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \ - lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ + UInt32 hv; const Byte *cur; UInt32 curMatch; \ + UInt32 lenLimit = p->lenLimit; \ + if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; } \ cur = p->buffer; #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances) -#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue) +#define SKIP_HEADER(minLen) \ + do { GET_MATCHES_HEADER2(minLen, continue) -#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue +#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, \ + p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue -#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num); +#define SKIP_FOOTER \ + SkipMatchesSpec(MF_PARAMS(p)); \ + MOVE_POS \ + } while (--num); #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ - distances = func(MF_PARAMS(p), \ - distances, (UInt32)_maxLen_); MOVE_POS_RET + distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \ + MOVE_POS_RET #define GET_MATCHES_FOOTER_BT(_maxLen_) \ GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) #define GET_MATCHES_FOOTER_HC(_maxLen_) \ @@ -1131,12 +1146,13 @@ static void MatchFinder_MovePos(CMatchFinder *p) const Byte *c = cur + maxLen; \ const Byte *lim = cur + lenLimit; \ for (; c != lim; c++) if (*(c + diff) != *c) break; \ maxLen = (unsigned)(c - cur); } -static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; GET_MATCHES_HEADER(2) HASH2_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; GET_MATCHES_FOOTER_BT(1) @@ -1156,12 +1172,13 @@ UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) mmm = p->cyclicBufferSize; \ if (pos < mmm) \ mmm = pos; -static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; UInt32 h2, d2, pos; unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(3) @@ -1197,12 +1214,13 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) GET_MATCHES_FOOTER_BT(maxLen) } -static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; UInt32 h2, h3, d2, d3, pos; unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(4) @@ -1265,14 +1283,16 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) GET_MATCHES_FOOTER_BT(maxLen) } -static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; - UInt32 h2, h3, d2, d3, maxLen, pos; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(5) HASH5_CALC @@ -1337,12 +1357,13 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) GET_MATCHES_FOOTER_BT(maxLen) } -static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; UInt32 h2, h3, d2, d3, pos; unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(4) @@ -1405,14 +1426,16 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) GET_MATCHES_FOOTER_HC(maxLen) } -static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; - UInt32 h2, h3, d2, d3, maxLen, pos; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(5) HASH5_CALC @@ -1464,11 +1487,11 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) distances[-2] = 3; if (*(cur - d2 + 3) != cur[3]) break; UPDATE_maxLen - distances[-2] = maxLen; + distances[-2] = (UInt32)maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; MOVE_POS_RET } @@ -1487,12 +1510,13 @@ UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) p->hash[hv] = p->pos; GET_MATCHES_FOOTER_HC(2) } -static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt2_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(2) { HASH2_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; @@ -1509,12 +1533,13 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) p->hash[hv] = p->pos; } SKIP_FOOTER } -static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt3_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(3) { UInt32 h2; UInt32 *hash; HASH3_CALC @@ -1524,12 +1549,13 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) (hash + kFix3HashSize)[hv] = p->pos; } SKIP_FOOTER } -static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt4_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(4) { UInt32 h2, h3; UInt32 *hash; HASH4_CALC @@ -1540,12 +1566,13 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) (hash + kFix4HashSize)[hv] = p->pos; } SKIP_FOOTER } -static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt5_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(5) { UInt32 h2, h3; UInt32 *hash; HASH5_CALC @@ -1587,12 +1614,13 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) p->pos = pos; \ if (pos == p->posLimit) MatchFinder_CheckLimits(p); \ }} while(num); \ -static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Hc4_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; HC_SKIP_HEADER(4) UInt32 h2, h3; HASH4_CALC curMatch = (hash + kFix4HashSize)[hv]; @@ -1602,12 +1630,13 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) HC_SKIP_FOOTER } -static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Hc5_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; HC_SKIP_HEADER(5) UInt32 h2, h3; HASH5_CALC curMatch = (hash + kFix5HashSize)[hv]; @@ -1632,45 +1661,45 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) } void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) { - vTable->Init = (Mf_Init_Func)MatchFinder_Init; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; + vTable->Init = MatchFinder_Init; + vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos; if (!p->btMode) { if (p->numHashBytes <= 4) { - vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + vTable->GetMatches = Hc4_MatchFinder_GetMatches; + vTable->Skip = Hc4_MatchFinder_Skip; } else { - vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; + vTable->GetMatches = Hc5_MatchFinder_GetMatches; + vTable->Skip = Hc5_MatchFinder_Skip; } } else if (p->numHashBytes == 2) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; + vTable->GetMatches = Bt2_MatchFinder_GetMatches; + vTable->Skip = Bt2_MatchFinder_Skip; } else if (p->numHashBytes == 3) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; + vTable->GetMatches = Bt3_MatchFinder_GetMatches; + vTable->Skip = Bt3_MatchFinder_Skip; } else if (p->numHashBytes == 4) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; + vTable->GetMatches = Bt4_MatchFinder_GetMatches; + vTable->Skip = Bt4_MatchFinder_Skip; } else { - vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; + vTable->GetMatches = Bt5_MatchFinder_GetMatches; + vTable->Skip = Bt5_MatchFinder_Skip; } } diff --git a/src/Common/lzma/LzFind.h b/src/Common/lzma/LzFind.h index a3f72c98..67e8a6e0 100644 --- a/src/Common/lzma/LzFind.h +++ b/src/Common/lzma/LzFind.h @@ -1,7 +1,7 @@ /* LzFind.h -- Match finder for LZ algorithms -2023-03-04 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_LZ_FIND_H #define ZIP7_INC_LZ_FIND_H #include "7zTypes.h" @@ -142,11 +142,12 @@ typedef struct void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable); void MatchFinder_Init_LowHash(CMatchFinder *p); void MatchFinder_Init_HighHash(CMatchFinder *p); void MatchFinder_Init_4(CMatchFinder *p); -void MatchFinder_Init(CMatchFinder *p); +// void MatchFinder_Init(CMatchFinder *p); +void MatchFinder_Init(void *p); UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); diff --git a/src/Common/lzma/LzFindMt.c b/src/Common/lzma/LzFindMt.c index 5253e6eb..ac9d59d0 100644 --- a/src/Common/lzma/LzFindMt.c +++ b/src/Common/lzma/LzFindMt.c @@ -1,7 +1,7 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2023-04-02 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #include "Precomp.h" // #include <stdio.h> @@ -92,11 +92,11 @@ static void MtSync_Construct(CMtSync *p) Semaphore_Construct(&p->freeSemaphore); Semaphore_Construct(&p->filledSemaphore); } -#define DEBUG_BUFFER_LOCK // define it to debug lock state +// #define DEBUG_BUFFER_LOCK // define it to debug lock state #ifdef DEBUG_BUFFER_LOCK #include <stdlib.h> #define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1); #define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1); @@ -875,12 +875,13 @@ SRes MatchFinderMt_InitMt(CMatchFinderMt *p) RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)) return MtSync_Init(&p->btSync, kMtBtNumBlocks); } -static void MatchFinderMt_Init(CMatchFinderMt *p) +static void MatchFinderMt_Init(void *_p) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; CMatchFinder *mf = MF(p); p->btBufPos = p->btBufPosLimit = NULL; p->hashBufPos = @@ -979,21 +980,23 @@ static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) return p->btNumAvailBytes; } -static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) +static const Byte * MatchFinderMt_GetPointerToCurrentPos(void *_p) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; return p->pointerToCurPos; } #define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); -static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) +static UInt32 MatchFinderMt_GetNumAvailableBytes(void *_p) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; if (p->btBufPos != p->btBufPosLimit) return p->btNumAvailBytes; return MatchFinderMt_GetNextBlock_Bt(p); } @@ -1241,12 +1244,13 @@ static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) return d; } -static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) +static UInt32 * MatchFinderMt2_GetMatches(void *_p, UInt32 *d) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; const UInt32 *bt = p->btBufPos; const UInt32 len = *bt++; const UInt32 *btLim = bt + len; p->btBufPos = btLim; p->btNumAvailBytes--; @@ -1265,12 +1269,13 @@ static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) return d; } -static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) +static UInt32 * MatchFinderMt_GetMatches(void *_p, UInt32 *d) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; const UInt32 *bt = p->btBufPos; UInt32 len = *bt++; const UInt32 avail = p->btNumAvailBytes - 1; p->btNumAvailBytes = avail; p->btBufPos = bt + len; @@ -1313,27 +1318,30 @@ static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) #define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED #define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; #define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0); -static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) +static void MatchFinderMt0_Skip(void *_p, UInt32 num) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; SKIP_HEADER2_MT { p->btNumAvailBytes--; SKIP_FOOTER_MT } -static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num) +static void MatchFinderMt2_Skip(void *_p, UInt32 num) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; SKIP_HEADER_MT(2) UInt32 h2; MT_HASH2_CALC hash[h2] = p->lzPos; SKIP_FOOTER_MT } -static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) +static void MatchFinderMt3_Skip(void *_p, UInt32 num) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; SKIP_HEADER_MT(3) UInt32 h2, h3; MT_HASH3_CALC (hash + kFix3HashSize)[h3] = hash[ h2] = @@ -1359,43 +1367,43 @@ static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) } */ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable) { - vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; - vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; + vTable->Init = MatchFinderMt_Init; + vTable->GetNumAvailableBytes = MatchFinderMt_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = MatchFinderMt_GetPointerToCurrentPos; + vTable->GetMatches = MatchFinderMt_GetMatches; switch (MF(p)->numHashBytes) { case 2: p->GetHeadsFunc = GetHeads2; - p->MixMatchesFunc = (Mf_Mix_Matches)NULL; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; - vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; + p->MixMatchesFunc = NULL; + vTable->Skip = MatchFinderMt0_Skip; + vTable->GetMatches = MatchFinderMt2_GetMatches; break; case 3: p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; + p->MixMatchesFunc = MixMatches2; + vTable->Skip = MatchFinderMt2_Skip; break; case 4: p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4; // it's fast inline version of GetMatches() - // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4; + // vTable->GetMatches = MatchFinderMt_GetMatches_Bt4; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; + p->MixMatchesFunc = MixMatches3; + vTable->Skip = MatchFinderMt3_Skip; break; default: p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; + p->MixMatchesFunc = MixMatches4; vTable->Skip = - (Mf_Skip_Func)MatchFinderMt3_Skip; - // (Mf_Skip_Func)MatchFinderMt4_Skip; + MatchFinderMt3_Skip; + // MatchFinderMt4_Skip; break; } } #undef RINOK_THREAD diff --git a/src/Common/lzma/LzFindMt.h b/src/Common/lzma/LzFindMt.h index db5923ea..fcb479da 100644 --- a/src/Common/lzma/LzFindMt.h +++ b/src/Common/lzma/LzFindMt.h @@ -1,7 +1,7 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2023-03-05 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_LZ_FIND_MT_H #define ZIP7_INC_LZ_FIND_MT_H #include "LzFind.h" @@ -29,19 +29,22 @@ typedef struct CSemaphore filledSemaphore; CCriticalSection cs; // UInt32 numBlocks_Sent; } CMtSync; -typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances); + +struct CMatchFinderMt_; + +typedef UInt32 * (*Mf_Mix_Matches)(struct CMatchFinderMt_ *p, UInt32 matchMinPos, UInt32 *distances); /* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ #define kMtCacheLineDummy 128 typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); -typedef struct +typedef struct CMatchFinderMt_ { /* LZ */ const Byte *pointerToCurPos; UInt32 *btBuf; const UInt32 *btBufPos; diff --git a/src/Common/lzma/LzmaEnc.c b/src/Common/lzma/LzmaEnc.c index 6d13cac8..088b78f8 100644 --- a/src/Common/lzma/LzmaEnc.c +++ b/src/Common/lzma/LzmaEnc.c @@ -1,7 +1,7 @@ /* LzmaEnc.c -- LZMA Encoder -2023-04-13: Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ #include "Precomp.h" #include <string.h> @@ -70,15 +70,15 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) int level = p->level; if (level < 0) level = 5; p->level = level; if (p->dictSize == 0) - p->dictSize = - ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) : - ( level <= 6 ? ((UInt32)1 << (level + 19)) : - ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26) - ))); + p->dictSize = (unsigned)level <= 4 ? + (UInt32)1 << (level * 2 + 16) : + (unsigned)level <= sizeof(size_t) / 2 + 4 ? + (UInt32)1 << (level + 20) : + (UInt32)1 << (sizeof(size_t) / 2 + 24); if (p->dictSize > p->reduceSize) { UInt32 v = (UInt32)p->reduceSize; const UInt32 kReduceMin = ((UInt32)1 << 12); @@ -90,12 +90,12 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (p->lc < 0) p->lc = 3; if (p->lp < 0) p->lp = 0; if (p->pb < 0) p->pb = 2; - if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); - if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); + if (p->algo < 0) p->algo = (unsigned)level < 5 ? 0 : 1; + if (p->fb < 0) p->fb = (unsigned)level < 7 ? 32 : 64; if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5); if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1); if (p->numThreads < 0) @@ -193,15 +193,15 @@ LZCNT can be faster than BSR, if supported. unsigned GetPosSlot1(UInt32 pos); unsigned GetPosSlot1(UInt32 pos) { unsigned res; - BSR2_RET(pos, res); + BSR2_RET(pos, res) return res; } -#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res) } +#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res) } #else // ! LZMA_LOG_BSR #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) @@ -510,11 +510,11 @@ struct CLzmaEnc COPY_ARR(d, s, isRep0Long) \ COPY_ARR(d, s, posSlotEncoder) \ COPY_ARR(d, s, posEncoders) \ (d)->lenProbs = (s)->lenProbs; \ (d)->repLenProbs = (s)->repLenProbs; \ - memcpy((d)->litProbs, (s)->litProbs, ((UInt32)0x300 << (p)->lclp) * sizeof(CLzmaProb)); + memcpy((d)->litProbs, (s)->litProbs, ((size_t)0x300 * sizeof(CLzmaProb)) << (p)->lclp); void LzmaEnc_SaveState(CLzmaEncHandle p) { // GET_CLzmaEnc_p CSaveState *v = &p->saveState; @@ -1038,27 +1038,27 @@ Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables( // UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices); unsigned sym = --i + (1 << (kLenNumHighBits - 1)); UInt32 price = b; do { - unsigned bit = sym & 1; + const unsigned bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); } while (sym >= 2); { - unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; + const unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob); prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob); } } while (i); { unsigned posState; - size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); + const size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); for (posState = 1; posState < numPosStates; posState++) memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num); } } } @@ -2694,16 +2694,16 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, #ifndef Z7_ST p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0)); #endif { - unsigned lclp = p->lc + p->lp; + const unsigned lclp = p->lc + p->lp; if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) { LzmaEnc_FreeLits(p, alloc); - p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); - p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp); + p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp); if (!p->litProbs || !p->saveState.litProbs) { LzmaEnc_FreeLits(p, alloc); return SZ_ERROR_MEM; } @@ -2800,12 +2800,12 @@ static void LzmaEnc_Init(CLzmaEnc *p) for (i = 0; i < kNumFullDistances; i++) p->posEncoders[i] = kProbInitValue; } { - UInt32 num = (UInt32)0x300 << (p->lp + p->lc); - UInt32 k; + const size_t num = (size_t)0x300 << (p->lp + p->lc); + size_t k; CLzmaProb *probs = p->litProbs; for (k = 0; k < num; k++) probs[k] = kProbInitValue; } diff --git a/src/Common/lzma/Precomp.h b/src/Common/lzma/Precomp.h index 69afb2ff..7747fdd7 100644 --- a/src/Common/lzma/Precomp.h +++ b/src/Common/lzma/Precomp.h @@ -1,10 +1,127 @@ -/* Precomp.h -- StdAfx -2023-04-02 : Igor Pavlov : Public domain */ +/* Precomp.h -- precompilation file +2024-01-25 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_PRECOMP_H #define ZIP7_INC_PRECOMP_H +/* + this file must be included before another *.h files and before <windows.h>. + this file is included from the following files: + C\*.c + C\Util\*\Precomp.h <- C\Util\*\*.c + CPP\Common\Common.h <- *\StdAfx.h <- *\*.cpp + + this file can set the following macros: + Z7_LARGE_PAGES 1 + Z7_LONG_PATH 1 + Z7_WIN32_WINNT_MIN 0x0500 (or higher) : we require at least win2000+ for 7-Zip + _WIN32_WINNT 0x0500 (or higher) + WINVER _WIN32_WINNT + UNICODE 1 + _UNICODE 1 +*/ + #include "Compiler.h" -/* #include "7zTypes.h" */ + +#ifdef _MSC_VER +// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty +#if _MSC_VER >= 1912 +// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception. +#endif +#endif + +/* +// for debug: +#define UNICODE 1 +#define _UNICODE 1 +#define _WIN32_WINNT 0x0500 // win2000 +#ifndef WINVER + #define WINVER _WIN32_WINNT +#endif +*/ + +#ifdef _WIN32 +/* + this "Precomp.h" file must be included before <windows.h>, + if we want to define _WIN32_WINNT before <windows.h>. +*/ + +#ifndef Z7_LARGE_PAGES +#ifndef Z7_NO_LARGE_PAGES +#define Z7_LARGE_PAGES 1 +#endif +#endif + +#ifndef Z7_LONG_PATH +#ifndef Z7_NO_LONG_PATH +#define Z7_LONG_PATH 1 +#endif +#endif + +#ifndef Z7_DEVICE_FILE +#ifndef Z7_NO_DEVICE_FILE +// #define Z7_DEVICE_FILE 1 +#endif +#endif + +// we don't change macros if included after <windows.h> +#ifndef _WINDOWS_ + +#ifndef Z7_WIN32_WINNT_MIN + #if defined(_M_ARM64) || defined(__aarch64__) + // #define Z7_WIN32_WINNT_MIN 0x0a00 // win10 + #define Z7_WIN32_WINNT_MIN 0x0600 // vista + #elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT) + // #define Z7_WIN32_WINNT_MIN 0x0602 // win8 + #define Z7_WIN32_WINNT_MIN 0x0600 // vista + #elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64) + #define Z7_WIN32_WINNT_MIN 0x0503 // win2003 + // #elif defined(_M_IX86) || defined(__i386__) + // #define Z7_WIN32_WINNT_MIN 0x0500 // win2000 + #else // x86 and another(old) systems + #define Z7_WIN32_WINNT_MIN 0x0500 // win2000 + // #define Z7_WIN32_WINNT_MIN 0x0502 // win2003 // for debug + #endif +#endif // Z7_WIN32_WINNT_MIN + + +#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT +#ifdef _WIN32_WINNT + // #error Stop_Compiling_Bad_WIN32_WINNT +#else + #ifndef Z7_NO_DEFINE_WIN32_WINNT +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define _WIN32_WINNT Z7_WIN32_WINNT_MIN +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER + #endif +#endif // _WIN32_WINNT + +#ifndef WINVER + #define WINVER _WIN32_WINNT +#endif +#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT + + +#ifndef _MBCS +#ifndef Z7_NO_UNICODE +// UNICODE and _UNICODE are used by <windows.h> and by 7-zip code. + +#ifndef UNICODE +#define UNICODE 1 +#endif + +#ifndef _UNICODE +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define _UNICODE 1 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#endif // Z7_NO_UNICODE +#endif // _MBCS +#endif // _WINDOWS_ + +// #include "7zWindows.h" + +#endif // _WIN32 #endif diff --git a/src/Common/lzma/Threads.c b/src/Common/lzma/Threads.c index cf52bd30..464efeca 100644 --- a/src/Common/lzma/Threads.c +++ b/src/Common/lzma/Threads.c @@ -1,7 +1,7 @@ /* Threads.c -- multithreading library -2023-03-04 : Igor Pavlov : Public domain */ +2024-03-28 : Igor Pavlov : Public domain */ #include "Precomp.h" #ifdef _WIN32 @@ -193,24 +193,23 @@ WRes CriticalSection_Init(CCriticalSection *p) #else // _WIN32 // ---------- POSIX ---------- -#ifndef __APPLE__ +#if defined(__linux__) && !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) #ifndef Z7_AFFINITY_DISABLE // _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET // clang < 3.6 : unknown warning group '-Wreserved-id-macro' // clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier" // clang >= 13 : do not give warning #if !defined(_GNU_SOURCE) - #if defined(__clang__) && (__clang_major__ >= 4) && (__clang_major__ <= 12) - #pragma GCC diagnostic ignored "-Wreserved-id-macro" - #endif -#define _GNU_SOURCE +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +// #define _GNU_SOURCE +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER #endif // !defined(_GNU_SOURCE) #endif // Z7_AFFINITY_DISABLE -#endif // __APPLE__ +#endif // __linux__ #include "Threads.h" #include <errno.h> #include <stdlib.h> @@ -242,12 +241,13 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, if (!ret) { if (cpuSet) { - #ifdef Z7_AFFINITY_SUPPORTED - + // pthread_attr_setaffinity_np() is not supported for MUSL compile. + // so we check for __GLIBC__ here +#if defined(Z7_AFFINITY_SUPPORTED) && defined( __GLIBC__) /* printf("\n affinity :"); unsigned i; for (i = 0; i < sizeof(*cpuSet) && i < 8; i++) { @@ -265,11 +265,11 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, */ // ret2 = pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet); // if (ret2) ret = ret2; - #endif +#endif } ret = pthread_create(&p->_tid, &attr, func, param); if (!ret) @@ -367,17 +367,24 @@ WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) { return Event_Create(p, False, signaled); } WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEvent_Create(p, 0); } +#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) +// freebsd: +#pragma GCC diagnostic ignored "-Wthread-safety-analysis" +#endif + WRes Event_Set(CEvent *p) { RINOK(pthread_mutex_lock(&p->_mutex)) p->_state = True; - int res1 = pthread_cond_broadcast(&p->_cond); - int res2 = pthread_mutex_unlock(&p->_mutex); - return (res2 ? res2 : res1); + { + const int res1 = pthread_cond_broadcast(&p->_cond); + const int res2 = pthread_mutex_unlock(&p->_mutex); + return (res2 ? res2 : res1); + } } WRes Event_Reset(CEvent *p) { RINOK(pthread_mutex_lock(&p->_mutex)) @@ -406,12 +413,12 @@ WRes Event_Close(CEvent *p) { if (!p->_created) return 0; p->_created = 0; { - int res1 = pthread_mutex_destroy(&p->_mutex); - int res2 = pthread_cond_destroy(&p->_cond); + const int res1 = pthread_mutex_destroy(&p->_mutex); + const int res2 = pthread_cond_destroy(&p->_cond); return (res1 ? res1 : res2); } } @@ -485,12 +492,12 @@ WRes Semaphore_Close(CSemaphore *p) { if (!p->_created) return 0; p->_created = 0; { - int res1 = pthread_mutex_destroy(&p->_mutex); - int res2 = pthread_cond_destroy(&p->_cond); + const int res1 = pthread_mutex_destroy(&p->_mutex); + const int res2 = pthread_cond_destroy(&p->_cond); return (res1 ? res1 : res2); } } @@ -547,10 +554,22 @@ LONG InterlockedIncrement(LONG volatile *addend) #endif return __sync_add_and_fetch(addend, 1); #endif } +LONG InterlockedDecrement(LONG volatile *addend) +{ + // Print("InterlockedDecrement") + #ifdef USE_HACK_UNSAFE_ATOMIC + LONG val = *addend - 1; + *addend = val; + return val; + #else + return __sync_sub_and_fetch(addend, 1); + #endif +} + #endif // _WIN32 WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p) { if (Event_IsCreated(p)) diff --git a/src/Common/lzma/Threads.h b/src/Common/lzma/Threads.h index 4028464a..c1484a27 100644 --- a/src/Common/lzma/Threads.h +++ b/src/Common/lzma/Threads.h @@ -1,22 +1,31 @@ /* Threads.h -- multithreading library -2023-04-02 : Igor Pavlov : Public domain */ +2024-03-28 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_THREADS_H #define ZIP7_INC_THREADS_H #ifdef _WIN32 #include "7zWindows.h" #else +#include "Compiler.h" + +// #define Z7_AFFINITY_DISABLE #if defined(__linux__) #if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) #ifndef Z7_AFFINITY_DISABLE #define Z7_AFFINITY_SUPPORTED // #pragma message(" ==== Z7_AFFINITY_SUPPORTED") -// #define _GNU_SOURCE +#if !defined(_GNU_SOURCE) +// #pragma message(" ==== _GNU_SOURCE set") +// we need _GNU_SOURCE for cpu_set_t, if we compile for MUSL +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define _GNU_SOURCE +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif #endif #endif #endif #include <pthread.h> @@ -171,11 +180,11 @@ WRes CriticalSection_Init(CCriticalSection *p); #define CriticalSection_Leave(p) LeaveCriticalSection(p) #else // _WIN32 -typedef struct _CEvent +typedef struct { int _created; int _manual_reset; int _state; pthread_mutex_t _mutex; @@ -197,11 +206,11 @@ WRes Event_Set(CEvent *p); WRes Event_Reset(CEvent *p); WRes Event_Wait(CEvent *p); WRes Event_Close(CEvent *p); -typedef struct _CSemaphore +typedef struct { int _created; UInt32 _count; UInt32 _maxCount; pthread_mutex_t _mutex; @@ -217,21 +226,22 @@ WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); #define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1) WRes Semaphore_Wait(CSemaphore *p); WRes Semaphore_Close(CSemaphore *p); -typedef struct _CCriticalSection +typedef struct { pthread_mutex_t _mutex; } CCriticalSection; WRes CriticalSection_Init(CCriticalSection *p); void CriticalSection_Delete(CCriticalSection *cs); void CriticalSection_Enter(CCriticalSection *cs); void CriticalSection_Leave(CCriticalSection *cs); LONG InterlockedIncrement(LONG volatile *addend); +LONG InterlockedDecrement(LONG volatile *addend); #endif // _WIN32 WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p); diff --git a/src/Common/lzma/lzma-history.txt b/src/Common/lzma/lzma-history.txt index a151c4b9..20e0a441 100644 --- a/src/Common/lzma/lzma-history.txt +++ b/src/Common/lzma/lzma-history.txt @@ -1,8 +1,82 @@ HISTORY of the LZMA SDK ----------------------- +24.09 2024-11-29 +------------------------- +- The default dictionary size values for LZMA/LZMA2 compression methods were increased: + dictionary size compression level + v24.08 v24.09 v24.09 + 32-bit 64-bit + 8 MB 16 MB 16 MB -mx4 + 16 MB 32 MB 32 MB -mx5 : Normal + 32 MB 64 MB 64 MB -mx6 + 32 MB 64 MB 128 MB -mx7 : Maximum + 64 MB 64 MB 256 MB -mx8 + 64 MB 64 MB 256 MB -mx9 : Ultra + The default dictionary size values for 32-bit versions of LZMA/LZMA2 don't exceed 64 MB. +- If an archive update operation uses a temporary archive folder and + the archive is moved to the destination folder, 7-Zip shows the progress of moving + the archive file, as this operation can take a long time if the archive is large. +- Some bugs were fixed. + + +24.07 2024-06-19 +------------------------- +- Changes in files: + Asm/x86/Sha256Opt.asm + Now it uses "READONLY" flag for constant array segment. + It fixes an issue where ".rodata" section in 7-Zip for x86/x64 Linux had a "WRITE" attribute. + + +24.05 2024-05-14 +------------------------- +- New switch -myv={MMNN} to set decoder compatibility version for 7z archive creating. + {MMNN} is 4-digit number that represents the version of 7-Zip without a dot. + If -myv={MMNN} switch is specified, 7-Zip will only use compression methods that can + be decoded by the specified version {MMNN} of 7-Zip and newer versions. + If -myv={MMNN} switch is not specified, -myv=2300 is used, and 7-Zip will only + use compression methods that can be decoded by 7-Zip 23.00 and newer versions. +- New switch -myfa={FilterID} to allow 7-Zip to use the specified filter method for 7z archive creating. +- New switch -myfd={FilterID} to disallow 7-Zip to use the specified filter method for 7z archive creating. + + +24.03 2024-03-23 +------------------------- +- 7-Zip now can use new RISCV filter for compression to 7z and xz archives. + RISCV filter can increase compression ratio for data containing executable + files compiled for RISC-V architecture. +- The speed for LZMA and LZMA2 decompression in ARM64 version for Windows + was increased by 20%-60%. + It uses arm64 assembler code, and clang-cl is required for arm64 assembler code compiling. +- -slmu switch : to show timestamps as UTC instead of LOCAL TIME. +- -slsl switch : in console 7-Zip for Windows : to show file paths with + linux path separator slash '/' instead of backslash separator '\'. +- 7-Zip supports .sha256 files that use backslash path separator '\'. +- Some bugs were fixed. + + +24.01 2024-01-31 +------------------------- +- 7-Zip uses file C/Precomp.h that is included to all c and c++ files. + CPP/Common/Common.h also includes C/Precomp.h. + C/Precomp.h defines the following macros (if _WIN32 is defined): + Z7_LARGE_PAGES 1 + Z7_LONG_PATH 1 + Z7_WIN32_WINNT_MIN 0x0500 (or higher) + _WIN32_WINNT 0x0500 (or higher) + WINVER _WIN32_WINNT + UNICODE 1 + _UNICODE 1 + if _WIN32_WINNT is defined already, C/Precomp.h doesn't redefine it. + +- Speed optimizations for hash caclulation: CRC-32, CRC-64. +- The bug was fixed: 7-Zip for Linux could fail for multivolume creation in some cases. +- 7zr.exe for arm64 is included to LZMA SDK package. +- Some bugs were fixed. + + 23.01 2023-06-20 ------------------------- - 7-Zip now can use new ARM64 filter for compression to 7z and xz archives. ARM64 filter can increase compression ratio for data containing executable files compiled for ARM64 (AArch64) architecture. diff --git a/src/Common/lzma/lzma-sdk.txt b/src/Common/lzma/lzma-sdk.txt index 141b0fd4..f7016709 100644 --- a/src/Common/lzma/lzma-sdk.txt +++ b/src/Common/lzma/lzma-sdk.txt @@ -1,6 +1,6 @@ -LZMA SDK 23.01 +LZMA SDK 24.09 -------------- LZMA SDK provides the documentation, samples, header files, libraries, and tools you need to develop applications that use 7z / LZMA / LZMA2 / XZ compression. @@ -135,13 +135,16 @@ DOC/lzma-history.txt - history of LZMA SDK DOC/lzma-specification.txt - Specification of LZMA DOC/Methods.txt - Compression method IDs for .7z bin/installer/ - example script to create installer that uses SFX module, -bin/7zdec.exe - simplified 7z archive decoder -bin/7zr.exe - 7-Zip console program (reduced version) +bin/7zdec.exe - simplified 7z archive decoder (x86 32-bit version) +bin/7zr.exe - 7-Zip console program (reduced version) (x86 32-bit version) bin/x64/7zr.exe - 7-Zip console program (reduced version) (x64 version) +bin/x64/7zdec.exe - simplified 7z archive decoder (x64 version) +bin/arm64/7zr.exe - 7-Zip console program (reduced version) (arm64 version) +bin/arm64/7zdec.exe - simplified 7z archive decoder (arm64 version) bin/lzma.exe - file->file LZMA encoder/decoder for Windows bin/7zS2.sfx - small SFX module for installers (GUI version) bin/7zS2con.sfx - small SFX module for installers (Console version) bin/7zSD.sfx - SFX module for installers. @@ -233,11 +236,11 @@ Note: LZMA features ------------- - - Variable dictionary size (up to 1 GB) + - Variable dictionary size (up to 4 GB) - Estimated compressing speed: about 2 MB/s on 2 GHz CPU - Estimated decompressing speed: - 20-30 MB/s on modern 2 GHz cpu - 1-2 MB/s on 200 MHz simple RISC cpu: (ARM, MIPS, PowerPC) - Small memory requirements for decompressing (16 KB + DictionarySize) @@ -283,12 +286,12 @@ Usage: LZMA <e|d> inputFile outputFile [<switches>...] -a{N}: set compression mode 0 = fast, 1 = normal default: 1 (normal) - d{N}: Sets Dictionary size - [0, 30], default: 23 (8MB) - The maximum value for dictionary size is 1 GB = 2^30 bytes. + d{N}: Sets Dictionary size - [0, 31], default: N=24 (32 MB) + The maximum value for dictionary size is N=31 (2 GB). Dictionary size is calculated as DictionarySize = 2^N bytes. For decompressing file compressed by LZMA method with dictionary size D = 2^N you need about D bytes of memory (RAM). -fb{N}: set number of fast bytes - [5, 273], default: 128 @@ -319,11 +322,13 @@ Usage: LZMA <e|d> inputFile outputFile [<switches>...] MF_ID Memory Description bt2 d * 9.5 + 4MB Binary Tree with 2 bytes hashing. bt3 d * 11.5 + 4MB Binary Tree with 3 bytes hashing. bt4 d * 11.5 + 4MB Binary Tree with 4 bytes hashing. + bt5 d * 11.5 + 4MB Binary Tree with 5 bytes hashing. hc4 d * 7.5 + 4MB Hash Chain with 4 bytes hashing. + hc5 d * 7.5 + 4MB Hash Chain with 5 bytes hashing. -eos: write End Of Stream marker. By default LZMA doesn't write eos marker, since LZMA decoder knows uncompressed size stored in .lzma file header. |