diff options
Diffstat (limited to 'src/Common/lzma')
-rw-r--r-- | src/Common/lzma/7zTypes.h | 14 | ||||
-rw-r--r-- | src/Common/lzma/Alloc.c | 174 | ||||
-rw-r--r-- | src/Common/lzma/Alloc.h | 15 | ||||
-rw-r--r-- | src/Common/lzma/Compiler.h | 91 | ||||
-rw-r--r-- | src/Common/lzma/CpuArch.c | 217 | ||||
-rw-r--r-- | src/Common/lzma/CpuArch.h | 177 | ||||
-rw-r--r-- | src/Common/lzma/LzFind.c | 127 | ||||
-rw-r--r-- | src/Common/lzma/LzFind.h | 5 | ||||
-rw-r--r-- | src/Common/lzma/LzFindMt.c | 58 | ||||
-rw-r--r-- | src/Common/lzma/LzFindMt.h | 9 | ||||
-rw-r--r-- | src/Common/lzma/LzmaEnc.c | 40 | ||||
-rw-r--r-- | src/Common/lzma/Precomp.h | 123 | ||||
-rw-r--r-- | src/Common/lzma/Threads.c | 53 | ||||
-rw-r--r-- | src/Common/lzma/Threads.h | 20 | ||||
-rw-r--r-- | src/Common/lzma/lzma-history.txt | 74 | ||||
-rw-r--r-- | src/Common/lzma/lzma-sdk.txt | 17 |
16 files changed, 967 insertions, 247 deletions
diff --git a/src/Common/lzma/7zTypes.h b/src/Common/lzma/7zTypes.h index 1fcb2473..5b77420a 100644 --- a/src/Common/lzma/7zTypes.h +++ b/src/Common/lzma/7zTypes.h @@ -1,6 +1,6 @@ /* 7zTypes.h -- Basic types -2023-04-02 : Igor Pavlov : Public domain */ +2024-01-24 : Igor Pavlov : Public domain */ #ifndef ZIP7_7Z_TYPES_H #define ZIP7_7Z_TYPES_H @@ -529,22 +529,22 @@ struct ISzAlloc /* #define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m) */ #if defined (__clang__) || defined(__GNUC__) -#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \ +#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \ _Pragma("GCC diagnostic pop") #else -#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL -#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL +#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL #endif #define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \ - Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \ + Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \ type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \ - Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL + Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL #define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \ Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p) diff --git a/src/Common/lzma/Alloc.c b/src/Common/lzma/Alloc.c index d841bf20..63e1a121 100644 --- a/src/Common/lzma/Alloc.c +++ b/src/Common/lzma/Alloc.c @@ -1,6 +1,6 @@ /* Alloc.c -- Memory allocation functions -2023-04-02 : Igor Pavlov : Public domain */ +2024-02-18 : Igor Pavlov : Public domain */ #include "Precomp.h" #ifdef _WIN32 @@ -9,21 +9,20 @@ #include <stdlib.h> #include "Alloc.h" -#ifdef _WIN32 -#ifdef Z7_LARGE_PAGES -#if defined(__clang__) || defined(__GNUC__) -typedef void (*Z7_voidFunction)(void); -#define MY_CAST_FUNC (Z7_voidFunction) -#elif defined(_MSC_VER) && _MSC_VER > 1920 -#define MY_CAST_FUNC (void *) -// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' -#else -#define MY_CAST_FUNC +#if defined(Z7_LARGE_PAGES) && defined(_WIN32) && \ + (!defined(Z7_WIN32_WINNT_MIN) || Z7_WIN32_WINNT_MIN < 0x0502) // < Win2003 (xp-64) + #define Z7_USE_DYN_GetLargePageMinimum +#endif + +// for debug: +#if 0 +#if defined(__CHERI__) && defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) +// #pragma message("=== Z7_ALLOC_NO_OFFSET_ALLOCATOR === ") +#define Z7_ALLOC_NO_OFFSET_ALLOCATOR +#endif #endif -#endif // Z7_LARGE_PAGES -#endif // _WIN32 // #define SZ_ALLOC_DEBUG /* #define SZ_ALLOC_DEBUG */ @@ -145,9 +144,11 @@ static void PrintAddr(void *p) #endif #define PRINT_FREE(name, cnt, ptr) #define Print(s) #define PrintLn() +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR #define PrintHex(v, align) +#endif #define PrintAddr(p) #endif @@ -245,11 +246,11 @@ void MidFree(void *address) #ifdef Z7_LARGE_PAGES #ifdef MEM_LARGE_PAGES - #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES + #define MY_MEM_LARGE_PAGES MEM_LARGE_PAGES #else - #define MY__MEM_LARGE_PAGES 0x20000000 + #define MY_MEM_LARGE_PAGES 0x20000000 #endif extern SIZE_T g_LargePageSize; @@ -257,21 +258,25 @@ SIZE_T g_LargePageSize = 0; typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID); void SetLargePageSize(void) { - #ifdef Z7_LARGE_PAGES SIZE_T size; +#ifdef Z7_USE_DYN_GetLargePageMinimum +Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION + const Func_GetLargePageMinimum fn = - (Func_GetLargePageMinimum) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + (Func_GetLargePageMinimum) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); if (!fn) return; size = fn(); +#else + size = GetLargePageMinimum(); +#endif if (size == 0 || (size & (size - 1)) != 0) return; g_LargePageSize = size; - #endif } #endif // Z7_LARGE_PAGES @@ -291,9 +296,9 @@ void *BigAlloc(size_t size) ps--; size2 = (size + ps) & ~ps; if (size2 >= size) { - void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); + void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY_MEM_LARGE_PAGES, PAGE_READWRITE); if (p) { PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p) return p; @@ -327,22 +332,9 @@ static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(add const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; #endif -/* - uintptr_t : <stdint.h> C99 (optional) - : unsupported in VS6 -*/ - -#ifdef _WIN32 - typedef UINT_PTR UIntPtr; -#else - /* - typedef uintptr_t UIntPtr; - */ - typedef ptrdiff_t UIntPtr; -#endif - +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR #define ADJUST_ALLOC_SIZE 0 /* #define ADJUST_ALLOC_SIZE (sizeof(void *) - 1) @@ -351,16 +343,38 @@ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if MyAlloc() can return address that is NOT multiple of sizeof(void *). */ - /* -#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1)))) + uintptr_t : <stdint.h> C99 (optional) + : unsupported in VS6 */ -#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1)))) +typedef + #ifdef _WIN32 + UINT_PTR + #elif 1 + uintptr_t + #else + ptrdiff_t + #endif + MY_uintptr_t; + +#if 0 \ + || (defined(__CHERI__) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ > 8)) +// for 128-bit pointers (cheri): +#define MY_ALIGN_PTR_DOWN(p, align) \ + ((void *)((char *)(p) - ((size_t)(MY_uintptr_t)(p) & ((align) - 1)))) +#else +#define MY_ALIGN_PTR_DOWN(p, align) \ + ((void *)((((MY_uintptr_t)(p)) & ~((MY_uintptr_t)(align) - 1)))) +#endif +#endif -#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) +#if !defined(_WIN32) \ + && (defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) \ + || defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)) #define USE_posix_memalign #endif #ifndef USE_posix_memalign @@ -398,16 +412,15 @@ static int posix_memalign(void **ptr, size_t align, size_t size) */ #define ALLOC_ALIGN_SIZE ((size_t)1 << 7) -static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) +void *z7_AlignedAlloc(size_t size) { - #ifndef USE_posix_memalign +#ifndef USE_posix_memalign void *p; void *pAligned; size_t newSize; - UNUSED_VAR(pp) /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned block to prevent cache line sharing with another allocated blocks */ @@ -430,50 +443,95 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) ((void **)pAligned)[-1] = p; return pAligned; - #else +#else void *p; - UNUSED_VAR(pp) if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) return NULL; Print(" posix_memalign="); PrintAddr(p); PrintLn(); return p; - #endif +#endif +} + + +void z7_AlignedFree(void *address) +{ +#ifndef USE_posix_memalign + if (address) + MyFree(((void **)address)[-1]); +#else + free(address); +#endif +} + + +static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) +{ + UNUSED_VAR(pp) + return z7_AlignedAlloc(size); } static void SzAlignedFree(ISzAllocPtr pp, void *address) { UNUSED_VAR(pp) - #ifndef USE_posix_memalign +#ifndef USE_posix_memalign if (address) MyFree(((void **)address)[-1]); - #else +#else free(address); - #endif +#endif } const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree }; -#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *)) - /* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */ -#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1] -/* -#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1] -*/ +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR +#if 1 + #define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *)) + #define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1] +#else + // we can use this simplified code, + // if (CAlignOffsetAlloc::offset == (k * sizeof(void *)) + #define REAL_BLOCK_PTR_VAR(p) (((void **)(p))[-1]) +#endif +#endif + + +#if 0 +#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR +#include <stdio.h> +static void PrintPtr(const char *s, const void *p) +{ + const Byte *p2 = (const Byte *)&p; + unsigned i; + printf("%s %p ", s, p); + for (i = sizeof(p); i != 0;) + { + i--; + printf("%02x", p2[i]); + } + printf("\n"); +} +#endif +#endif + static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) { +#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) + UNUSED_VAR(pp) + return z7_AlignedAlloc(size); +#else const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); void *adr; void *pAligned; size_t newSize; @@ -500,8 +558,14 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr + alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset; +#if 0 + printf("\nalignSize = %6x, offset=%6x, size=%8x \n", (unsigned)alignSize, (unsigned)p->offset, (unsigned)size); + PrintPtr("base", adr); + PrintPtr("alig", pAligned); +#endif + PrintLn(); Print("- Aligned: "); Print(" size="); PrintHex(size, 8); Print(" a_size="); PrintHex(newSize, 8); @@ -511,21 +575,27 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) REAL_BLOCK_PTR_VAR(pAligned) = adr; return pAligned; +#endif } static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address) { +#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) + UNUSED_VAR(pp) + z7_AlignedFree(address); +#else if (address) { const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt); PrintLn(); Print("- Aligned Free: "); PrintLn(); ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address)); } +#endif } void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p) diff --git a/src/Common/lzma/Alloc.h b/src/Common/lzma/Alloc.h index fac5b62f..01bf6b7d 100644 --- a/src/Common/lzma/Alloc.h +++ b/src/Common/lzma/Alloc.h @@ -1,6 +1,6 @@ /* Alloc.h -- Memory allocation functions -2023-03-04 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_ALLOC_H #define ZIP7_INC_ALLOC_H @@ -21,8 +21,11 @@ MyRealloc() is similar to realloc() for the following cases: void *MyAlloc(size_t size); void MyFree(void *address); void *MyRealloc(void *address, size_t size); +void *z7_AlignedAlloc(size_t size); +void z7_AlignedFree(void *p); + #ifdef _WIN32 #ifdef Z7_LARGE_PAGES void SetLargePageSize(void); @@ -32,14 +35,16 @@ void *MidAlloc(size_t size); void MidFree(void *address); void *BigAlloc(size_t size); void BigFree(void *address); +/* #define Z7_BIG_ALLOC_IS_ZERO_FILLED */ + #else -#define MidAlloc(size) MyAlloc(size) -#define MidFree(address) MyFree(address) -#define BigAlloc(size) MyAlloc(size) -#define BigFree(address) MyFree(address) +#define MidAlloc(size) z7_AlignedAlloc(size) +#define MidFree(address) z7_AlignedFree(address) +#define BigAlloc(size) z7_AlignedAlloc(size) +#define BigFree(address) z7_AlignedFree(address) #endif extern const ISzAlloc g_Alloc; diff --git a/src/Common/lzma/Compiler.h b/src/Common/lzma/Compiler.h index 185a52de..2a9c2b7a 100644 --- a/src/Common/lzma/Compiler.h +++ b/src/Common/lzma/Compiler.h @@ -1,6 +1,6 @@ /* Compiler.h : Compiler specific defines and pragmas -2023-04-02 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_COMPILER_H #define ZIP7_INC_COMPILER_H @@ -24,13 +24,81 @@ #if defined(__MINGW32__) || defined(__MINGW64__) #define Z7_MINGW #endif +#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__)) +#define Z7_MCST_LCC +#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__) +#endif + +/* +#if defined(__AVX2__) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \ + || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \ + || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \ + || defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ + || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) + #define Z7_COMPILER_AVX2_SUPPORTED + #endif +#endif +*/ + // #pragma GCC diagnostic ignored "-Wunknown-pragmas" #ifdef __clang__ // padding size of '' with 4 bytes to alignment boundary #pragma GCC diagnostic ignored "-Wpadded" + +#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \ + && defined(__FreeBSD__) +// freebsd: +#pragma GCC diagnostic ignored "-Wexcess-padding" +#endif + +#if __clang_major__ >= 16 +#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage" +#endif + +#if __clang_major__ == 13 +#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) +// cheri +#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast" +#endif +#endif + +#if __clang_major__ == 13 + // for <arm_neon.h> + #pragma GCC diagnostic ignored "-Wreserved-identifier" +#endif + +#endif // __clang__ + +#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16 +// #pragma GCC diagnostic ignored "-Wcast-function-type-strict" +#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \ + _Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"") +#else +#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION +#endif + +typedef void (*Z7_void_Function)(void); +#if defined(__clang__) || defined(__GNUC__) +#define Z7_CAST_FUNC_C (Z7_void_Function) +#elif defined(_MSC_VER) && _MSC_VER > 1920 +#define Z7_CAST_FUNC_C (void *) +// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()' +#else +#define Z7_CAST_FUNC_C +#endif +/* +#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__) + // #pragma GCC diagnostic ignored "-Wcast-function-type" +#endif +*/ +#ifdef __GNUC__ +#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000) +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#endif #endif #ifdef _MSC_VER @@ -100,9 +168,10 @@ #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ _Pragma("clang loop unroll(disable)") \ _Pragma("clang loop vectorize(disable)") #define Z7_ATTRIB_NO_VECTORIZE -#elif defined(__GNUC__) && (__GNUC__ >= 5) +#elif defined(__GNUC__) && (__GNUC__ >= 5) \ + && (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610)) #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) // __attribute__((optimize("no-unroll-loops"))); #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE #elif defined(_MSC_VER) && (_MSC_VER >= 1920) @@ -141,17 +210,25 @@ // #define Z7_likely #endif -#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000)) -#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ +#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600)) + +#if (Z7_CLANG_VERSION < 130000) +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"") +#else +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"") -#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ +#endif + +#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \ _Pragma("GCC diagnostic pop") #else -#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER -#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER #endif #define UNUSED_VAR(x) (void)x; /* #define UNUSED_VAR(x) x=x; */ diff --git a/src/Common/lzma/CpuArch.c b/src/Common/lzma/CpuArch.c index 33f8a3ab..6e02551e 100644 --- a/src/Common/lzma/CpuArch.c +++ b/src/Common/lzma/CpuArch.c @@ -1,6 +1,6 @@ /* CpuArch.c -- CPU specific code -2023-05-18 : Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ #include "Precomp.h" // #include <stdio.h> @@ -16,9 +16,9 @@ /* cpuid instruction supports (subFunction) parameter in ECX, that is used only with some specific (function) parameter values. - But we always use only (subFunction==0). + most functions use only (subFunction==0). */ /* __cpuid(): MSVC and GCC/CLANG use same function/macro name but parameters are different. @@ -48,45 +48,51 @@ #if defined(MY_CPU_AMD64) && defined(__PIC__) \ && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) -#define x86_cpuid_MACRO(p, func) { \ + /* "=&r" selects free register. It can select even rbx, if that register is free. + "=&D" for (RDI) also works, but the code can be larger with "=&D" + "2"(subFun) : 2 is (zero-based) index in the output constraint list "=c" (ECX). */ + +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ __asm__ __volatile__ ( \ ASM_LN "mov %%rbx, %q1" \ ASM_LN "cpuid" \ ASM_LN "xchg %%rbx, %q1" \ - : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } - - /* "=&r" selects free register. It can select even rbx, if that register is free. - "=&D" for (RDI) also works, but the code can be larger with "=&D" - "2"(0) means (subFunction = 0), - 2 is (zero-based) index in the output constraint list "=c" (ECX). */ + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } #elif defined(MY_CPU_X86) && defined(__PIC__) \ && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__)) -#define x86_cpuid_MACRO(p, func) { \ +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ __asm__ __volatile__ ( \ ASM_LN "mov %%ebx, %k1" \ ASM_LN "cpuid" \ ASM_LN "xchg %%ebx, %k1" \ - : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } #else -#define x86_cpuid_MACRO(p, func) { \ +#define x86_cpuid_MACRO_2(p, func, subFunc) { \ __asm__ __volatile__ ( \ ASM_LN "cpuid" \ - : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); } + : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); } #endif +#define x86_cpuid_MACRO(p, func) x86_cpuid_MACRO_2(p, func, 0) void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) { x86_cpuid_MACRO(p, func) } +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + x86_cpuid_MACRO_2(p, func, subFunc) +} + Z7_NO_INLINE UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) { @@ -204,13 +210,41 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) __asm pop ebx __asm ret 0 } +static +void __declspec(naked) Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + UNUSED_VAR(p) + UNUSED_VAR(func) + UNUSED_VAR(subFunc) + __asm push ebx + __asm push edi + __asm mov edi, ecx // p + __asm mov eax, edx // func + __asm mov ecx, [esp + 12] // subFunc + __asm cpuid + __asm mov [edi ], eax + __asm mov [edi + 4], ebx + __asm mov [edi + 8], ecx + __asm mov [edi + 12], edx + __asm pop edi + __asm pop ebx + __asm ret 4 +} + #else // MY_CPU_AMD64 #if _MSC_VER >= 1600 #include <intrin.h> #define MY_cpuidex __cpuidex + +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + __cpuidex((int *)p, func, subFunc); +} + #else /* __cpuid (func == (0 or 7)) requires subfunction number in ECX. MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. @@ -218,22 +252,27 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) __cpuid() in old MSVC (14.00) x64 doesn't clear ECX We still can use __cpuid for low (func) values that don't require ECX, but __cpuid() in old MSVC will be incorrect for some func values: (func == 7). So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, - where ECX value is first parameter for FASTCALL / NO_INLINE func, + where ECX value is first parameter for FASTCALL / NO_INLINE func. So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!! */ static -Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo) +Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo) { UNUSED_VAR(subFunction) __cpuid(CPUInfo, func); } #define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info) #pragma message("======== MY_cpuidex_HACK WAS USED ========") +static +void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc) +{ + MY_cpuidex_HACK(subFunc, func, (Int32 *)p); +} #endif // _MSC_VER >= 1600 #if !defined(MY_CPU_AMD64) /* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code, @@ -241,15 +280,15 @@ Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, i Z7_NO_INLINE #endif void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func) { - MY_cpuidex((int *)p, (int)func, 0); + MY_cpuidex((Int32 *)p, (Int32)func, 0); } Z7_NO_INLINE UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void) { - int a[4]; + Int32 a[4]; MY_cpuidex(a, 0, 0); return a[0]; } @@ -383,27 +422,27 @@ BoolInt CPU_IsSupported_CMOV(void) { UInt32 a[4]; if (!x86cpuid_Func_1(&a[0])) return 0; - return (a[3] >> 15) & 1; + return (BoolInt)(a[3] >> 15) & 1; } BoolInt CPU_IsSupported_SSE(void) { UInt32 a[4]; CHECK_SYS_SSE_SUPPORT if (!x86cpuid_Func_1(&a[0])) return 0; - return (a[3] >> 25) & 1; + return (BoolInt)(a[3] >> 25) & 1; } BoolInt CPU_IsSupported_SSE2(void) { UInt32 a[4]; CHECK_SYS_SSE_SUPPORT if (!x86cpuid_Func_1(&a[0])) return 0; - return (a[3] >> 26) & 1; + return (BoolInt)(a[3] >> 26) & 1; } #endif @@ -418,19 +457,19 @@ static UInt32 x86cpuid_Func_1_ECX(void) } BoolInt CPU_IsSupported_AES(void) { - return (x86cpuid_Func_1_ECX() >> 25) & 1; + return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1; } BoolInt CPU_IsSupported_SSSE3(void) { - return (x86cpuid_Func_1_ECX() >> 9) & 1; + return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1; } BoolInt CPU_IsSupported_SSE41(void) { - return (x86cpuid_Func_1_ECX() >> 19) & 1; + return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1; } BoolInt CPU_IsSupported_SHA(void) { @@ -440,9 +479,26 @@ BoolInt CPU_IsSupported_SHA(void) return False; { UInt32 d[4]; z7_x86_cpuid(d, 7); - return (d[1] >> 29) & 1; + return (BoolInt)(d[1] >> 29) & 1; + } +} + + +BoolInt CPU_IsSupported_SHA512(void) +{ + if (!CPU_IsSupported_AVX2()) return False; // maybe CPU_IsSupported_AVX() is enough here + + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + z7_x86_cpuid_subFunc(d, 7, 0); + if (d[0] < 1) // d[0] - is max supported subleaf value + return False; + z7_x86_cpuid_subFunc(d, 7, 1); + return (BoolInt)(d[0]) & 1; } } /* @@ -637,12 +693,12 @@ BoolInt CPU_IsSupported_AVX(void) */ { const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); - // printf("\n=== XGetBV=%d\n", bm); + // printf("\n=== XGetBV=0x%x\n", bm); return 1 - & (bm >> 1) // SSE state is supported (set by OS) for storing/restoring - & (bm >> 2); // AVX state is supported (set by OS) for storing/restoring + & (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring + & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring } // since Win7SP1: we can use GetEnabledXStateFeatures(); } @@ -657,11 +713,40 @@ BoolInt CPU_IsSupported_AVX2(void) UInt32 d[4]; z7_x86_cpuid(d, 7); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); return 1 - & (d[1] >> 5); // avx2 + & (BoolInt)(d[1] >> 5); // avx2 + } +} + +#if 0 +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void) +{ + if (!CPU_IsSupported_AVX()) + return False; + if (z7_x86_cpuid_GetMaxFunc() < 7) + return False; + { + UInt32 d[4]; + BoolInt v; + z7_x86_cpuid(d, 7); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + v = 1 + & (BoolInt)(d[1] >> 16) // avx512f + & (BoolInt)(d[1] >> 31); // avx512vl + if (!v) + return False; + } + { + const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK); + // printf("\n=== XGetBV=0x%x\n", bm); + return 1 + & (BoolInt)(bm >> 5) // OPMASK + & (BoolInt)(bm >> 6) // ZMM upper 256-bit + & (BoolInt)(bm >> 7); // ZMM16 ... ZMM31 } } +#endif BoolInt CPU_IsSupported_VAES_AVX2(void) { if (!CPU_IsSupported_AVX()) @@ -672,11 +757,11 @@ BoolInt CPU_IsSupported_VAES_AVX2(void) UInt32 d[4]; z7_x86_cpuid(d, 7); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); return 1 - & (d[1] >> 5) // avx2 + & (BoolInt)(d[1] >> 5) // avx2 // & (d[1] >> 31) // avx512vl - & (d[2] >> 9); // vaes // VEX-256/EVEX + & (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX } } BoolInt CPU_IsSupported_PageGB(void) @@ -687,9 +772,9 @@ BoolInt CPU_IsSupported_PageGB(void) z7_x86_cpuid(d, 0x80000000); if (d[0] < 0x80000001) return False; z7_x86_cpuid(d, 0x80000001); - return (d[3] >> 26) & 1; + return (BoolInt)(d[3] >> 26) & 1; } } @@ -746,8 +831,20 @@ BoolInt CPU_IsSupported_NEON(void) { return z7_sysctlbyname_Get_BoolInt("hw.optional.neon"); } +BoolInt CPU_IsSupported_SHA512(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha512"); +} + +/* +BoolInt CPU_IsSupported_SHA3(void) +{ + return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha3"); +} +*/ + #ifdef MY_CPU_ARM64 #define APPLE_CRYPTO_SUPPORT_VAL 1 #else #define APPLE_CRYPTO_SUPPORT_VAL 0 @@ -759,42 +856,92 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } #else // __APPLE__ -#include <sys/auxv.h> +#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216) + #define Z7_GETAUXV_AVAILABLE +#else +// #pragma message("=== is not NEW GLIBC === ") + #if defined __has_include + #if __has_include (<sys/auxv.h>) +// #pragma message("=== sys/auxv.h is avail=== ") + #define Z7_GETAUXV_AVAILABLE + #endif + #endif +#endif +#ifdef Z7_GETAUXV_AVAILABLE +// #pragma message("=== Z7_GETAUXV_AVAILABLE === ") +#include <sys/auxv.h> #define USE_HWCAP +#endif #ifdef USE_HWCAP +#if defined(__FreeBSD__) +static unsigned long MY_getauxval(int aux) +{ + unsigned long val; + if (elf_aux_info(aux, &val, sizeof(val))) + return 0; + return val; +} +#else +#define MY_getauxval getauxval + #if defined __has_include + #if __has_include (<asm/hwcap.h>) #include <asm/hwcap.h> + #endif + #endif +#endif #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ - BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; } + BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); } #ifdef MY_CPU_ARM64 #define MY_HWCAP_CHECK_FUNC(name) \ MY_HWCAP_CHECK_FUNC_2(name, name) +#if 1 || defined(__ARM_NEON) + BoolInt CPU_IsSupported_NEON(void) { return True; } +#else MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) +#endif // MY_HWCAP_CHECK_FUNC (ASIMD) #elif defined(MY_CPU_ARM) #define MY_HWCAP_CHECK_FUNC(name) \ - BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } + BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); } MY_HWCAP_CHECK_FUNC_2(NEON, NEON) #endif #else // USE_HWCAP #define MY_HWCAP_CHECK_FUNC(name) \ - BoolInt CPU_IsSupported_ ## name() { return 0; } + BoolInt CPU_IsSupported_ ## name(void) { return 0; } +#if defined(__ARM_NEON) + BoolInt CPU_IsSupported_NEON(void) { return True; } +#else MY_HWCAP_CHECK_FUNC(NEON) +#endif #endif // USE_HWCAP MY_HWCAP_CHECK_FUNC (CRC32) MY_HWCAP_CHECK_FUNC (SHA1) MY_HWCAP_CHECK_FUNC (SHA2) MY_HWCAP_CHECK_FUNC (AES) +#ifdef MY_CPU_ARM64 +// <hwcap.h> supports HWCAP_SHA512 and HWCAP_SHA3 since 2017. +// we define them here, if they are not defined +#ifndef HWCAP_SHA3 +// #define HWCAP_SHA3 (1 << 17) +#endif +#ifndef HWCAP_SHA512 +// #pragma message("=== HWCAP_SHA512 define === ") +#define HWCAP_SHA512 (1 << 21) +#endif +MY_HWCAP_CHECK_FUNC (SHA512) +// MY_HWCAP_CHECK_FUNC (SHA3) +#endif #endif // __APPLE__ #endif // _WIN32 diff --git a/src/Common/lzma/CpuArch.h b/src/Common/lzma/CpuArch.h index 8e5d8a54..a6297ea4 100644 --- a/src/Common/lzma/CpuArch.h +++ b/src/Common/lzma/CpuArch.h @@ -1,6 +1,6 @@ /* CpuArch.h -- CPU specific code -2023-04-02 : Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ #ifndef ZIP7_INC_CPU_ARCH_H #define ZIP7_INC_CPU_ARCH_H @@ -19,8 +19,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. MY_CPU_64BIT can be used to select fast code branch MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) */ +#if !defined(_M_ARM64EC) #if defined(_M_X64) \ || defined(_M_AMD64) \ || defined(__x86_64__) \ || defined(__AMD64__) \ @@ -34,8 +35,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define MY_CPU_SIZEOF_POINTER 8 #endif #define MY_CPU_64BIT #endif +#endif #if defined(_M_IX86) \ || defined(__i386__) @@ -46,19 +48,28 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif #if defined(_M_ARM64) \ + || defined(_M_ARM64EC) \ || defined(__AARCH64EL__) \ || defined(__AARCH64EB__) \ || defined(__aarch64__) #define MY_CPU_ARM64 - #ifdef __ILP32__ +#if defined(__ILP32__) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) #define MY_CPU_NAME "arm64-32" #define MY_CPU_SIZEOF_POINTER 4 - #else +#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16) + #define MY_CPU_NAME "arm64-128" + #define MY_CPU_SIZEOF_POINTER 16 +#else +#if defined(_M_ARM64EC) + #define MY_CPU_NAME "arm64ec" +#else #define MY_CPU_NAME "arm64" +#endif #define MY_CPU_SIZEOF_POINTER 8 - #endif +#endif #define MY_CPU_64BIT #endif @@ -132,10 +143,38 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif #endif +#if defined(__sparc__) \ + || defined(__sparc) + #define MY_CPU_SPARC + #if defined(__LP64__) \ + || defined(_LP64) \ + || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8) + #define MY_CPU_NAME "sparcv9" + #define MY_CPU_SIZEOF_POINTER 8 + #define MY_CPU_64BIT + #elif defined(__sparc_v9__) \ + || defined(__sparcv9) + #define MY_CPU_64BIT + #if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "sparcv9-32" + #else + #define MY_CPU_NAME "sparcv9m" + #endif + #elif defined(__sparc_v8__) \ + || defined(__sparcv8) + #define MY_CPU_NAME "sparcv8" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "sparc" + #endif +#endif + + #if defined(__riscv) \ || defined(__riscv__) + #define MY_CPU_RISCV #if __riscv_xlen == 32 #define MY_CPU_NAME "riscv32" #elif __riscv_xlen == 64 #define MY_CPU_NAME "riscv64" @@ -144,8 +183,41 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif #endif +#if defined(__loongarch__) + #define MY_CPU_LOONGARCH + #if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64) + #define MY_CPU_64BIT + #endif + #if defined(__loongarch64) + #define MY_CPU_NAME "loongarch64" + #define MY_CPU_LOONGARCH64 + #else + #define MY_CPU_NAME "loongarch" + #endif +#endif + + +// #undef MY_CPU_NAME +// #undef MY_CPU_SIZEOF_POINTER +// #define __e2k__ +// #define __SIZEOF_POINTER__ 4 +#if defined(__e2k__) + #define MY_CPU_E2K + #if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4) + #define MY_CPU_NAME "e2k-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "e2k" + #if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8) + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #endif + #define MY_CPU_64BIT +#endif + + #if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) #define MY_CPU_X86_OR_AMD64 #endif @@ -174,8 +246,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #if defined(MY_CPU_X86_OR_AMD64) \ || defined(MY_CPU_ARM_LE) \ || defined(MY_CPU_ARM64_LE) \ || defined(MY_CPU_IA64_LE) \ + || defined(_LITTLE_ENDIAN) \ || defined(__LITTLE_ENDIAN__) \ || defined(__ARMEL__) \ || defined(__THUMBEL__) \ || defined(__AARCH64EL__) \ @@ -250,8 +323,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif #ifndef MY_CPU_NAME + // #define MY_CPU_IS_UNKNOWN #ifdef MY_CPU_LE #define MY_CPU_NAME "LE" #elif defined(MY_CPU_BE) #define MY_CPU_NAME "BE" @@ -294,11 +368,21 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define Z7_BSWAP32(v) _byteswap_ulong (v) #define Z7_BSWAP64(v) _byteswap_uint64(v) #define Z7_CPU_FAST_BSWAP_SUPPORTED -#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ - || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) - +/* GCC can generate slow code that calls function for __builtin_bswap32() for: + - GCC for RISCV, if Zbb/XTHeadBb extension is not used. + - GCC for SPARC. + The code from CLANG for SPARC also is not fastest. + So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases. +*/ +#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \ + && !defined(MY_CPU_SPARC) \ + && ( \ + (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \ + ) + #define Z7_BSWAP16(v) __builtin_bswap16(v) #define Z7_BSWAP32(v) __builtin_bswap32(v) #define Z7_BSWAP64(v) __builtin_bswap64(v) #define Z7_CPU_FAST_BSWAP_SUPPORTED @@ -328,15 +412,50 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #ifdef MY_CPU_LE #if defined(MY_CPU_X86_OR_AMD64) \ - || defined(MY_CPU_ARM64) + || defined(MY_CPU_ARM64) \ + || defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \ + || defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6) #define MY_CPU_LE_UNALIGN #define MY_CPU_LE_UNALIGN_64 #elif defined(__ARM_FEATURE_UNALIGNED) - /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. - So we can't use unaligned 64-bit operations. */ - #define MY_CPU_LE_UNALIGN +/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions. + Description of problems: +problem-1 : 32-bit ARM architecture: + multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM) + require 32-bit (WORD) alignment (by 32-bit ARM architecture). + So there is "Alignment fault exception", if data is not aligned for 32-bit. + +problem-2 : 32-bit kernels and arm64 kernels: + 32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception". + So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux. + + But some arm64 kernels do not handle these faults in 32-bit programs. + So we have unhandled exception for such instructions. + Probably some new arm64 kernels have fixed it, and unaligned + paired-access instructions work in new kernels? + +problem-3 : compiler for 32-bit arm: + Compilers use LDRD/STRD/LDM/STM for UInt64 accesses + and for another cases where two 32-bit accesses are fused + to one multi-access instruction. + So UInt64 variables must be aligned for 32-bit, and each + 32-bit access must be aligned for 32-bit, if we want to + avoid "Alignment fault" exception (handled or unhandled). + +problem-4 : performace: + Even if unaligned access is handled by kernel, it will be slow. + So if we allow unaligned access, we can get fast unaligned + single-access, and slow unaligned paired-access. + + We don't allow unaligned access on 32-bit arm, because compiler + genarates paired-access instructions that require 32-bit alignment, + and some arm64 kernels have no handler for these instructions. + Also unaligned paired-access instructions will be slow, if kernel handles them. +*/ + // it must be disabled: + // #define MY_CPU_LE_UNALIGN #endif #endif @@ -389,13 +508,21 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) +#if 0 +// Z7_BSWAP16 can be slow for x86-msvc +#define GetBe16_to32(p) (Z7_BSWAP16 (*(const UInt16 *)(const void *)(p))) +#else +#define GetBe16_to32(p) (Z7_BSWAP32 (*(const UInt16 *)(const void *)(p)) >> 16) +#endif + #define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p)) #define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); } #if defined(MY_CPU_LE_UNALIGN_64) #define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p)) +#define SetBe64(p, v) { (*(UInt64 *)(void *)(p)) = Z7_BSWAP64(v); } #endif #else @@ -416,13 +543,29 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #ifndef GetBe64 #define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) #endif +#ifndef SetBe64 +#define SetBe64(p, v) { Byte *_ppp_ = (Byte *)(p); UInt64 _vvv_ = (v); \ + _ppp_[0] = (Byte)(_vvv_ >> 56); \ + _ppp_[1] = (Byte)(_vvv_ >> 48); \ + _ppp_[2] = (Byte)(_vvv_ >> 40); \ + _ppp_[3] = (Byte)(_vvv_ >> 32); \ + _ppp_[4] = (Byte)(_vvv_ >> 24); \ + _ppp_[5] = (Byte)(_vvv_ >> 16); \ + _ppp_[6] = (Byte)(_vvv_ >> 8); \ + _ppp_[7] = (Byte)_vvv_; } +#endif + #ifndef GetBe16 +#ifdef GetBe16_to32 +#define GetBe16(p) ( (UInt16) GetBe16_to32(p)) +#else #define GetBe16(p) ( (UInt16) ( \ ((UInt16)((const Byte *)(p))[0] << 8) | \ ((const Byte *)(p))[1] )) #endif +#endif #if defined(MY_CPU_BE) #define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v) @@ -438,25 +581,29 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #if defined(MY_CPU_BE) +#define GetBe64a(p) (*(const UInt64 *)(const void *)(p)) #define GetBe32a(p) (*(const UInt32 *)(const void *)(p)) #define GetBe16a(p) (*(const UInt16 *)(const void *)(p)) #define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define GetUi64a(p) GetUi64(p) #define GetUi32a(p) GetUi32(p) #define GetUi16a(p) GetUi16(p) #define SetUi32a(p, v) SetUi32(p, v) #define SetUi16a(p, v) SetUi16(p, v) #elif defined(MY_CPU_LE) +#define GetUi64a(p) (*(const UInt64 *)(const void *)(p)) #define GetUi32a(p) (*(const UInt32 *)(const void *)(p)) #define GetUi16a(p) (*(const UInt16 *)(const void *)(p)) #define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define GetBe64a(p) GetBe64(p) #define GetBe32a(p) GetBe32(p) #define GetBe16a(p) GetBe16(p) #define SetBe32a(p, v) SetBe32(p, v) #define SetBe16a(p, v) SetBe16(p, v) @@ -465,8 +612,13 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #error Stop_Compiling_Unknown_Endian_CPU_a #endif +#ifndef GetBe16_to32 +#define GetBe16_to32(p) GetBe16(p) +#endif + + #if defined(MY_CPU_X86_OR_AMD64) \ || defined(MY_CPU_ARM_OR_ARM64) \ || defined(MY_CPU_PPC_OR_PPC64) #define Z7_CPU_FAST_ROTATE_SUPPORTED @@ -485,15 +637,17 @@ UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void); BoolInt CPU_IsSupported_AES(void); BoolInt CPU_IsSupported_AVX(void); BoolInt CPU_IsSupported_AVX2(void); +BoolInt CPU_IsSupported_AVX512F_AVX512VL(void); BoolInt CPU_IsSupported_VAES_AVX2(void); BoolInt CPU_IsSupported_CMOV(void); BoolInt CPU_IsSupported_SSE(void); BoolInt CPU_IsSupported_SSE2(void); BoolInt CPU_IsSupported_SSSE3(void); BoolInt CPU_IsSupported_SSE41(void); BoolInt CPU_IsSupported_SHA(void); +BoolInt CPU_IsSupported_SHA512(void); BoolInt CPU_IsSupported_PageGB(void); #elif defined(MY_CPU_ARM_OR_ARM64) @@ -509,8 +663,9 @@ BoolInt CPU_IsSupported_CRYPTO(void); BoolInt CPU_IsSupported_SHA1(void); BoolInt CPU_IsSupported_SHA2(void); BoolInt CPU_IsSupported_AES(void); #endif +BoolInt CPU_IsSupported_SHA512(void); #endif #if defined(__APPLE__) diff --git a/src/Common/lzma/LzFind.c b/src/Common/lzma/LzFind.c index 0fbd5aae..1ce40464 100644 --- a/src/Common/lzma/LzFind.c +++ b/src/Common/lzma/LzFind.c @@ -1,6 +1,6 @@ /* LzFind.c -- Match finder for LZ algorithms -2023-03-14 : Igor Pavlov : Public domain */ +2024-03-01 : Igor Pavlov : Public domain */ #include "Precomp.h" #include <string.h> @@ -107,11 +107,17 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all } return (p->bufBase != NULL); } -static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } +static const Byte *MatchFinder_GetPointerToCurrentPos(void *p) +{ + return ((CMatchFinder *)p)->buffer; +} -static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } +static UInt32 MatchFinder_GetNumAvailableBytes(void *p) +{ + return GET_AVAIL_BYTES((CMatchFinder *)p); +} Z7_NO_INLINE static void MatchFinder_ReadBlock(CMatchFinder *p) @@ -570,10 +576,11 @@ void MatchFinder_Init_4(CMatchFinder *p) // (CYC_TO_POS_OFFSET == 0) is expected by some optimized code #define CYC_TO_POS_OFFSET 0 // #define CYC_TO_POS_OFFSET 1 // for debug -void MatchFinder_Init(CMatchFinder *p) +void MatchFinder_Init(void *_p) { + CMatchFinder *p = (CMatchFinder *)_p; MatchFinder_Init_HighHash(p); MatchFinder_Init_LowHash(p); MatchFinder_Init_4(p); // if (readData) @@ -606,27 +613,27 @@ void MatchFinder_Init(CMatchFinder *p) #define USE_LZFIND_SATUR_SUB_256 #endif #endif -// #elif defined(MY_CPU_ARM_OR_ARM64) -#elif defined(MY_CPU_ARM64) +#elif defined(MY_CPU_ARM64) \ + /* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */ - #if defined(__clang__) && (__clang_major__ >= 8) \ - || defined(__GNUC__) && (__GNUC__ >= 8) + #if defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(__GNUC__) && (__GNUC__ >= 6) #define USE_LZFIND_SATUR_SUB_128 #ifdef MY_CPU_ARM64 // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__(""))) #else - // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon"))) #endif #elif defined(_MSC_VER) #if (_MSC_VER >= 1910) #define USE_LZFIND_SATUR_SUB_128 #endif #endif - #if defined(_MSC_VER) && defined(MY_CPU_ARM64) + #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64) #include <arm64_neon.h> #else #include <arm_neon.h> #endif @@ -1081,11 +1088,13 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const } #define MOVE_POS \ - ++p->cyclicBufferPos; \ + p->cyclicBufferPos++; \ p->buffer++; \ - { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } + { const UInt32 pos1 = p->pos + 1; \ + p->pos = pos1; \ + if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } #define MOVE_POS_RET MOVE_POS return distances; Z7_NO_INLINE @@ -1102,22 +1111,28 @@ static void MatchFinder_MovePos(CMatchFinder *p) MOVE_POS } #define GET_MATCHES_HEADER2(minLen, ret_op) \ - unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \ - lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ + UInt32 hv; const Byte *cur; UInt32 curMatch; \ + UInt32 lenLimit = p->lenLimit; \ + if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; } \ cur = p->buffer; #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances) -#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue) +#define SKIP_HEADER(minLen) \ + do { GET_MATCHES_HEADER2(minLen, continue) -#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue +#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, \ + p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue -#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num); +#define SKIP_FOOTER \ + SkipMatchesSpec(MF_PARAMS(p)); \ + MOVE_POS \ + } while (--num); #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ - distances = func(MF_PARAMS(p), \ - distances, (UInt32)_maxLen_); MOVE_POS_RET + distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \ + MOVE_POS_RET #define GET_MATCHES_FOOTER_BT(_maxLen_) \ GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) @@ -1132,10 +1147,11 @@ static void MatchFinder_MovePos(CMatchFinder *p) const Byte *lim = cur + lenLimit; \ for (; c != lim; c++) if (*(c + diff) != *c) break; \ maxLen = (unsigned)(c - cur); } -static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; GET_MATCHES_HEADER(2) HASH2_CALC curMatch = p->hash[hv]; p->hash[hv] = p->pos; @@ -1157,10 +1173,11 @@ UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) if (pos < mmm) \ mmm = pos; -static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; UInt32 h2, d2, pos; unsigned maxLen; UInt32 *hash; @@ -1198,10 +1215,11 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) GET_MATCHES_FOOTER_BT(maxLen) } -static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; UInt32 h2, h3, d2, d3, pos; unsigned maxLen; UInt32 *hash; @@ -1266,12 +1284,14 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) GET_MATCHES_FOOTER_BT(maxLen) } -static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; - UInt32 h2, h3, d2, d3, maxLen, pos; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(5) HASH5_CALC @@ -1338,10 +1358,11 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) GET_MATCHES_FOOTER_BT(maxLen) } -static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; UInt32 h2, h3, d2, d3, pos; unsigned maxLen; UInt32 *hash; @@ -1406,12 +1427,14 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) GET_MATCHES_FOOTER_HC(maxLen) } -static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances) { + CMatchFinder *p = (CMatchFinder *)_p; UInt32 mmm; - UInt32 h2, h3, d2, d3, maxLen, pos; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(5) HASH5_CALC @@ -1465,9 +1488,9 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) distances[-2] = 3; if (*(cur - d2 + 3) != cur[3]) break; UPDATE_maxLen - distances[-2] = maxLen; + distances[-2] = (UInt32)maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; MOVE_POS_RET @@ -1488,10 +1511,11 @@ UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) GET_MATCHES_FOOTER_HC(2) } -static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt2_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(2) { HASH2_CALC curMatch = p->hash[hv]; @@ -1510,10 +1534,11 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) } SKIP_FOOTER } -static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt3_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(3) { UInt32 h2; UInt32 *hash; @@ -1525,10 +1550,11 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) } SKIP_FOOTER } -static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt4_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(4) { UInt32 h2, h3; UInt32 *hash; @@ -1541,10 +1567,11 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) } SKIP_FOOTER } -static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Bt5_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; SKIP_HEADER(5) { UInt32 h2, h3; UInt32 *hash; @@ -1588,10 +1615,11 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) if (pos == p->posLimit) MatchFinder_CheckLimits(p); \ }} while(num); \ -static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Hc4_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; HC_SKIP_HEADER(4) UInt32 h2, h3; HASH4_CALC @@ -1603,10 +1631,11 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) HC_SKIP_FOOTER } -static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +static void Hc5_MatchFinder_Skip(void *_p, UInt32 num) { + CMatchFinder *p = (CMatchFinder *)_p; HC_SKIP_HEADER(5) UInt32 h2, h3; HASH5_CALC @@ -1633,43 +1662,43 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) { - vTable->Init = (Mf_Init_Func)MatchFinder_Init; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; + vTable->Init = MatchFinder_Init; + vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos; if (!p->btMode) { if (p->numHashBytes <= 4) { - vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + vTable->GetMatches = Hc4_MatchFinder_GetMatches; + vTable->Skip = Hc4_MatchFinder_Skip; } else { - vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; + vTable->GetMatches = Hc5_MatchFinder_GetMatches; + vTable->Skip = Hc5_MatchFinder_Skip; } } else if (p->numHashBytes == 2) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; + vTable->GetMatches = Bt2_MatchFinder_GetMatches; + vTable->Skip = Bt2_MatchFinder_Skip; } else if (p->numHashBytes == 3) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; + vTable->GetMatches = Bt3_MatchFinder_GetMatches; + vTable->Skip = Bt3_MatchFinder_Skip; } else if (p->numHashBytes == 4) { - vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; + vTable->GetMatches = Bt4_MatchFinder_GetMatches; + vTable->Skip = Bt4_MatchFinder_Skip; } else { - vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; + vTable->GetMatches = Bt5_MatchFinder_GetMatches; + vTable->Skip = Bt5_MatchFinder_Skip; } } diff --git a/src/Common/lzma/LzFind.h b/src/Common/lzma/LzFind.h index a3f72c98..67e8a6e0 100644 --- a/src/Common/lzma/LzFind.h +++ b/src/Common/lzma/LzFind.h @@ -1,6 +1,6 @@ /* LzFind.h -- Match finder for LZ algorithms -2023-03-04 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_LZ_FIND_H #define ZIP7_INC_LZ_FIND_H @@ -143,9 +143,10 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable); void MatchFinder_Init_LowHash(CMatchFinder *p); void MatchFinder_Init_HighHash(CMatchFinder *p); void MatchFinder_Init_4(CMatchFinder *p); -void MatchFinder_Init(CMatchFinder *p); +// void MatchFinder_Init(CMatchFinder *p); +void MatchFinder_Init(void *p); UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); diff --git a/src/Common/lzma/LzFindMt.c b/src/Common/lzma/LzFindMt.c index 5253e6eb..ac9d59d0 100644 --- a/src/Common/lzma/LzFindMt.c +++ b/src/Common/lzma/LzFindMt.c @@ -1,6 +1,6 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2023-04-02 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #include "Precomp.h" // #include <stdio.h> @@ -93,9 +93,9 @@ static void MtSync_Construct(CMtSync *p) Semaphore_Construct(&p->filledSemaphore); } -#define DEBUG_BUFFER_LOCK // define it to debug lock state +// #define DEBUG_BUFFER_LOCK // define it to debug lock state #ifdef DEBUG_BUFFER_LOCK #include <stdlib.h> #define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1); @@ -876,10 +876,11 @@ SRes MatchFinderMt_InitMt(CMatchFinderMt *p) return MtSync_Init(&p->btSync, kMtBtNumBlocks); } -static void MatchFinderMt_Init(CMatchFinderMt *p) +static void MatchFinderMt_Init(void *_p) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; CMatchFinder *mf = MF(p); p->btBufPos = p->btBufPosLimit = NULL; @@ -980,19 +981,21 @@ static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) } -static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) +static const Byte * MatchFinderMt_GetPointerToCurrentPos(void *_p) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; return p->pointerToCurPos; } #define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); -static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) +static UInt32 MatchFinderMt_GetNumAvailableBytes(void *_p) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; if (p->btBufPos != p->btBufPosLimit) return p->btNumAvailBytes; return MatchFinderMt_GetNextBlock_Bt(p); } @@ -1242,10 +1245,11 @@ static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) return d; } -static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) +static UInt32 * MatchFinderMt2_GetMatches(void *_p, UInt32 *d) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; const UInt32 *bt = p->btBufPos; const UInt32 len = *bt++; const UInt32 *btLim = bt + len; p->btBufPos = btLim; @@ -1266,10 +1270,11 @@ static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) } -static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) +static UInt32 * MatchFinderMt_GetMatches(void *_p, UInt32 *d) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; const UInt32 *bt = p->btBufPos; UInt32 len = *bt++; const UInt32 avail = p->btNumAvailBytes - 1; p->btNumAvailBytes = avail; @@ -1314,25 +1319,28 @@ static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) #define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED #define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; #define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0); -static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) +static void MatchFinderMt0_Skip(void *_p, UInt32 num) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; SKIP_HEADER2_MT { p->btNumAvailBytes--; SKIP_FOOTER_MT } -static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num) +static void MatchFinderMt2_Skip(void *_p, UInt32 num) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; SKIP_HEADER_MT(2) UInt32 h2; MT_HASH2_CALC hash[h2] = p->lzPos; SKIP_FOOTER_MT } -static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) +static void MatchFinderMt3_Skip(void *_p, UInt32 num) { + CMatchFinderMt *p = (CMatchFinderMt *)_p; SKIP_HEADER_MT(3) UInt32 h2, h3; MT_HASH3_CALC (hash + kFix3HashSize)[h3] = @@ -1360,41 +1368,41 @@ static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) */ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable) { - vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; - vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; + vTable->Init = MatchFinderMt_Init; + vTable->GetNumAvailableBytes = MatchFinderMt_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = MatchFinderMt_GetPointerToCurrentPos; + vTable->GetMatches = MatchFinderMt_GetMatches; switch (MF(p)->numHashBytes) { case 2: p->GetHeadsFunc = GetHeads2; - p->MixMatchesFunc = (Mf_Mix_Matches)NULL; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; - vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; + p->MixMatchesFunc = NULL; + vTable->Skip = MatchFinderMt0_Skip; + vTable->GetMatches = MatchFinderMt2_GetMatches; break; case 3: p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; + p->MixMatchesFunc = MixMatches2; + vTable->Skip = MatchFinderMt2_Skip; break; case 4: p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4; // it's fast inline version of GetMatches() - // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4; + // vTable->GetMatches = MatchFinderMt_GetMatches_Bt4; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; + p->MixMatchesFunc = MixMatches3; + vTable->Skip = MatchFinderMt3_Skip; break; default: p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; + p->MixMatchesFunc = MixMatches4; vTable->Skip = - (Mf_Skip_Func)MatchFinderMt3_Skip; - // (Mf_Skip_Func)MatchFinderMt4_Skip; + MatchFinderMt3_Skip; + // MatchFinderMt4_Skip; break; } } diff --git a/src/Common/lzma/LzFindMt.h b/src/Common/lzma/LzFindMt.h index db5923ea..fcb479da 100644 --- a/src/Common/lzma/LzFindMt.h +++ b/src/Common/lzma/LzFindMt.h @@ -1,6 +1,6 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2023-03-05 : Igor Pavlov : Public domain */ +2024-01-22 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_LZ_FIND_MT_H #define ZIP7_INC_LZ_FIND_MT_H @@ -30,17 +30,20 @@ typedef struct CCriticalSection cs; // UInt32 numBlocks_Sent; } CMtSync; -typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances); + +struct CMatchFinderMt_; + +typedef UInt32 * (*Mf_Mix_Matches)(struct CMatchFinderMt_ *p, UInt32 matchMinPos, UInt32 *distances); /* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ #define kMtCacheLineDummy 128 typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); -typedef struct +typedef struct CMatchFinderMt_ { /* LZ */ const Byte *pointerToCurPos; UInt32 *btBuf; diff --git a/src/Common/lzma/LzmaEnc.c b/src/Common/lzma/LzmaEnc.c index 6d13cac8..088b78f8 100644 --- a/src/Common/lzma/LzmaEnc.c +++ b/src/Common/lzma/LzmaEnc.c @@ -1,6 +1,6 @@ /* LzmaEnc.c -- LZMA Encoder -2023-04-13: Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ #include "Precomp.h" #include <string.h> @@ -71,13 +71,13 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (level < 0) level = 5; p->level = level; if (p->dictSize == 0) - p->dictSize = - ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) : - ( level <= 6 ? ((UInt32)1 << (level + 19)) : - ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26) - ))); + p->dictSize = (unsigned)level <= 4 ? + (UInt32)1 << (level * 2 + 16) : + (unsigned)level <= sizeof(size_t) / 2 + 4 ? + (UInt32)1 << (level + 20) : + (UInt32)1 << (sizeof(size_t) / 2 + 24); if (p->dictSize > p->reduceSize) { UInt32 v = (UInt32)p->reduceSize; @@ -91,10 +91,10 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (p->lc < 0) p->lc = 3; if (p->lp < 0) p->lp = 0; if (p->pb < 0) p->pb = 2; - if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); - if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); + if (p->algo < 0) p->algo = (unsigned)level < 5 ? 0 : 1; + if (p->fb < 0) p->fb = (unsigned)level < 7 ? 32 : 64; if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5); if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1); @@ -194,13 +194,13 @@ LZCNT can be faster than BSR, if supported. unsigned GetPosSlot1(UInt32 pos); unsigned GetPosSlot1(UInt32 pos) { unsigned res; - BSR2_RET(pos, res); + BSR2_RET(pos, res) return res; } -#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res) } +#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res) } #else // ! LZMA_LOG_BSR @@ -511,9 +511,9 @@ struct CLzmaEnc COPY_ARR(d, s, posSlotEncoder) \ COPY_ARR(d, s, posEncoders) \ (d)->lenProbs = (s)->lenProbs; \ (d)->repLenProbs = (s)->repLenProbs; \ - memcpy((d)->litProbs, (s)->litProbs, ((UInt32)0x300 << (p)->lclp) * sizeof(CLzmaProb)); + memcpy((d)->litProbs, (s)->litProbs, ((size_t)0x300 * sizeof(CLzmaProb)) << (p)->lclp); void LzmaEnc_SaveState(CLzmaEncHandle p) { // GET_CLzmaEnc_p @@ -1039,25 +1039,25 @@ Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables( unsigned sym = --i + (1 << (kLenNumHighBits - 1)); UInt32 price = b; do { - unsigned bit = sym & 1; + const unsigned bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); } while (sym >= 2); { - unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; + const unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob); prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob); } } while (i); { unsigned posState; - size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); + const size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); for (posState = 1; posState < numPosStates; posState++) memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num); } } @@ -2695,14 +2695,14 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0)); #endif { - unsigned lclp = p->lc + p->lp; + const unsigned lclp = p->lc + p->lp; if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) { LzmaEnc_FreeLits(p, alloc); - p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); - p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp); + p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp); if (!p->litProbs || !p->saveState.litProbs) { LzmaEnc_FreeLits(p, alloc); return SZ_ERROR_MEM; @@ -2801,10 +2801,10 @@ static void LzmaEnc_Init(CLzmaEnc *p) p->posEncoders[i] = kProbInitValue; } { - UInt32 num = (UInt32)0x300 << (p->lp + p->lc); - UInt32 k; + const size_t num = (size_t)0x300 << (p->lp + p->lc); + size_t k; CLzmaProb *probs = p->litProbs; for (k = 0; k < num; k++) probs[k] = kProbInitValue; } diff --git a/src/Common/lzma/Precomp.h b/src/Common/lzma/Precomp.h index 69afb2ff..7747fdd7 100644 --- a/src/Common/lzma/Precomp.h +++ b/src/Common/lzma/Precomp.h @@ -1,10 +1,127 @@ -/* Precomp.h -- StdAfx -2023-04-02 : Igor Pavlov : Public domain */ +/* Precomp.h -- precompilation file +2024-01-25 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_PRECOMP_H #define ZIP7_INC_PRECOMP_H +/* + this file must be included before another *.h files and before <windows.h>. + this file is included from the following files: + C\*.c + C\Util\*\Precomp.h <- C\Util\*\*.c + CPP\Common\Common.h <- *\StdAfx.h <- *\*.cpp + + this file can set the following macros: + Z7_LARGE_PAGES 1 + Z7_LONG_PATH 1 + Z7_WIN32_WINNT_MIN 0x0500 (or higher) : we require at least win2000+ for 7-Zip + _WIN32_WINNT 0x0500 (or higher) + WINVER _WIN32_WINNT + UNICODE 1 + _UNICODE 1 +*/ + #include "Compiler.h" -/* #include "7zTypes.h" */ + +#ifdef _MSC_VER +// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty +#if _MSC_VER >= 1912 +// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception. +#endif +#endif + +/* +// for debug: +#define UNICODE 1 +#define _UNICODE 1 +#define _WIN32_WINNT 0x0500 // win2000 +#ifndef WINVER + #define WINVER _WIN32_WINNT +#endif +*/ + +#ifdef _WIN32 +/* + this "Precomp.h" file must be included before <windows.h>, + if we want to define _WIN32_WINNT before <windows.h>. +*/ + +#ifndef Z7_LARGE_PAGES +#ifndef Z7_NO_LARGE_PAGES +#define Z7_LARGE_PAGES 1 +#endif +#endif + +#ifndef Z7_LONG_PATH +#ifndef Z7_NO_LONG_PATH +#define Z7_LONG_PATH 1 +#endif +#endif + +#ifndef Z7_DEVICE_FILE +#ifndef Z7_NO_DEVICE_FILE +// #define Z7_DEVICE_FILE 1 +#endif +#endif + +// we don't change macros if included after <windows.h> +#ifndef _WINDOWS_ + +#ifndef Z7_WIN32_WINNT_MIN + #if defined(_M_ARM64) || defined(__aarch64__) + // #define Z7_WIN32_WINNT_MIN 0x0a00 // win10 + #define Z7_WIN32_WINNT_MIN 0x0600 // vista + #elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT) + // #define Z7_WIN32_WINNT_MIN 0x0602 // win8 + #define Z7_WIN32_WINNT_MIN 0x0600 // vista + #elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64) + #define Z7_WIN32_WINNT_MIN 0x0503 // win2003 + // #elif defined(_M_IX86) || defined(__i386__) + // #define Z7_WIN32_WINNT_MIN 0x0500 // win2000 + #else // x86 and another(old) systems + #define Z7_WIN32_WINNT_MIN 0x0500 // win2000 + // #define Z7_WIN32_WINNT_MIN 0x0502 // win2003 // for debug + #endif +#endif // Z7_WIN32_WINNT_MIN + + +#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT +#ifdef _WIN32_WINNT + // #error Stop_Compiling_Bad_WIN32_WINNT +#else + #ifndef Z7_NO_DEFINE_WIN32_WINNT +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER + #define _WIN32_WINNT Z7_WIN32_WINNT_MIN +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER + #endif +#endif // _WIN32_WINNT + +#ifndef WINVER + #define WINVER _WIN32_WINNT +#endif +#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT + + +#ifndef _MBCS +#ifndef Z7_NO_UNICODE +// UNICODE and _UNICODE are used by <windows.h> and by 7-zip code. + +#ifndef UNICODE +#define UNICODE 1 +#endif + +#ifndef _UNICODE +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define _UNICODE 1 +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif + +#endif // Z7_NO_UNICODE +#endif // _MBCS +#endif // _WINDOWS_ + +// #include "7zWindows.h" + +#endif // _WIN32 #endif diff --git a/src/Common/lzma/Threads.c b/src/Common/lzma/Threads.c index cf52bd30..464efeca 100644 --- a/src/Common/lzma/Threads.c +++ b/src/Common/lzma/Threads.c @@ -1,6 +1,6 @@ /* Threads.c -- multithreading library -2023-03-04 : Igor Pavlov : Public domain */ +2024-03-28 : Igor Pavlov : Public domain */ #include "Precomp.h" #ifdef _WIN32 @@ -194,22 +194,21 @@ WRes CriticalSection_Init(CCriticalSection *p) #else // _WIN32 // ---------- POSIX ---------- -#ifndef __APPLE__ +#if defined(__linux__) && !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) #ifndef Z7_AFFINITY_DISABLE // _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET // clang < 3.6 : unknown warning group '-Wreserved-id-macro' // clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier" // clang >= 13 : do not give warning #if !defined(_GNU_SOURCE) - #if defined(__clang__) && (__clang_major__ >= 4) && (__clang_major__ <= 12) - #pragma GCC diagnostic ignored "-Wreserved-id-macro" - #endif -#define _GNU_SOURCE +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +// #define _GNU_SOURCE +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER #endif // !defined(_GNU_SOURCE) #endif // Z7_AFFINITY_DISABLE -#endif // __APPLE__ +#endif // __linux__ #include "Threads.h" #include <errno.h> @@ -243,10 +242,11 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, if (!ret) { if (cpuSet) { - #ifdef Z7_AFFINITY_SUPPORTED - + // pthread_attr_setaffinity_np() is not supported for MUSL compile. + // so we check for __GLIBC__ here +#if defined(Z7_AFFINITY_SUPPORTED) && defined( __GLIBC__) /* printf("\n affinity :"); unsigned i; for (i = 0; i < sizeof(*cpuSet) && i < 8; i++) @@ -266,9 +266,9 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, // ret2 = pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet); // if (ret2) ret = ret2; - #endif +#endif } ret = pthread_create(&p->_tid, &attr, func, param); @@ -368,15 +368,22 @@ WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEvent_Create(p, 0); } +#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) +// freebsd: +#pragma GCC diagnostic ignored "-Wthread-safety-analysis" +#endif + WRes Event_Set(CEvent *p) { RINOK(pthread_mutex_lock(&p->_mutex)) p->_state = True; - int res1 = pthread_cond_broadcast(&p->_cond); - int res2 = pthread_mutex_unlock(&p->_mutex); - return (res2 ? res2 : res1); + { + const int res1 = pthread_cond_broadcast(&p->_cond); + const int res2 = pthread_mutex_unlock(&p->_mutex); + return (res2 ? res2 : res1); + } } WRes Event_Reset(CEvent *p) { @@ -407,10 +414,10 @@ WRes Event_Close(CEvent *p) if (!p->_created) return 0; p->_created = 0; { - int res1 = pthread_mutex_destroy(&p->_mutex); - int res2 = pthread_cond_destroy(&p->_cond); + const int res1 = pthread_mutex_destroy(&p->_mutex); + const int res2 = pthread_cond_destroy(&p->_cond); return (res1 ? res1 : res2); } } @@ -486,10 +493,10 @@ WRes Semaphore_Close(CSemaphore *p) if (!p->_created) return 0; p->_created = 0; { - int res1 = pthread_mutex_destroy(&p->_mutex); - int res2 = pthread_cond_destroy(&p->_cond); + const int res1 = pthread_mutex_destroy(&p->_mutex); + const int res2 = pthread_cond_destroy(&p->_cond); return (res1 ? res1 : res2); } } @@ -548,8 +555,20 @@ LONG InterlockedIncrement(LONG volatile *addend) return __sync_add_and_fetch(addend, 1); #endif } +LONG InterlockedDecrement(LONG volatile *addend) +{ + // Print("InterlockedDecrement") + #ifdef USE_HACK_UNSAFE_ATOMIC + LONG val = *addend - 1; + *addend = val; + return val; + #else + return __sync_sub_and_fetch(addend, 1); + #endif +} + #endif // _WIN32 WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p) { diff --git a/src/Common/lzma/Threads.h b/src/Common/lzma/Threads.h index 4028464a..c1484a27 100644 --- a/src/Common/lzma/Threads.h +++ b/src/Common/lzma/Threads.h @@ -1,6 +1,6 @@ /* Threads.h -- multithreading library -2023-04-02 : Igor Pavlov : Public domain */ +2024-03-28 : Igor Pavlov : Public domain */ #ifndef ZIP7_INC_THREADS_H #define ZIP7_INC_THREADS_H @@ -8,14 +8,23 @@ #include "7zWindows.h" #else +#include "Compiler.h" + +// #define Z7_AFFINITY_DISABLE #if defined(__linux__) #if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) #ifndef Z7_AFFINITY_DISABLE #define Z7_AFFINITY_SUPPORTED // #pragma message(" ==== Z7_AFFINITY_SUPPORTED") -// #define _GNU_SOURCE +#if !defined(_GNU_SOURCE) +// #pragma message(" ==== _GNU_SOURCE set") +// we need _GNU_SOURCE for cpu_set_t, if we compile for MUSL +Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER +#define _GNU_SOURCE +Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER +#endif #endif #endif #endif @@ -172,9 +181,9 @@ WRes CriticalSection_Init(CCriticalSection *p); #else // _WIN32 -typedef struct _CEvent +typedef struct { int _created; int _manual_reset; int _state; @@ -198,9 +207,9 @@ WRes Event_Reset(CEvent *p); WRes Event_Wait(CEvent *p); WRes Event_Close(CEvent *p); -typedef struct _CSemaphore +typedef struct { int _created; UInt32 _count; UInt32 _maxCount; @@ -218,9 +227,9 @@ WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); WRes Semaphore_Wait(CSemaphore *p); WRes Semaphore_Close(CSemaphore *p); -typedef struct _CCriticalSection +typedef struct { pthread_mutex_t _mutex; } CCriticalSection; @@ -229,8 +238,9 @@ void CriticalSection_Delete(CCriticalSection *cs); void CriticalSection_Enter(CCriticalSection *cs); void CriticalSection_Leave(CCriticalSection *cs); LONG InterlockedIncrement(LONG volatile *addend); +LONG InterlockedDecrement(LONG volatile *addend); #endif // _WIN32 WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p); diff --git a/src/Common/lzma/lzma-history.txt b/src/Common/lzma/lzma-history.txt index a151c4b9..20e0a441 100644 --- a/src/Common/lzma/lzma-history.txt +++ b/src/Common/lzma/lzma-history.txt @@ -1,7 +1,81 @@ HISTORY of the LZMA SDK ----------------------- +24.09 2024-11-29 +------------------------- +- The default dictionary size values for LZMA/LZMA2 compression methods were increased: + dictionary size compression level + v24.08 v24.09 v24.09 + 32-bit 64-bit + 8 MB 16 MB 16 MB -mx4 + 16 MB 32 MB 32 MB -mx5 : Normal + 32 MB 64 MB 64 MB -mx6 + 32 MB 64 MB 128 MB -mx7 : Maximum + 64 MB 64 MB 256 MB -mx8 + 64 MB 64 MB 256 MB -mx9 : Ultra + The default dictionary size values for 32-bit versions of LZMA/LZMA2 don't exceed 64 MB. +- If an archive update operation uses a temporary archive folder and + the archive is moved to the destination folder, 7-Zip shows the progress of moving + the archive file, as this operation can take a long time if the archive is large. +- Some bugs were fixed. + + +24.07 2024-06-19 +------------------------- +- Changes in files: + Asm/x86/Sha256Opt.asm + Now it uses "READONLY" flag for constant array segment. + It fixes an issue where ".rodata" section in 7-Zip for x86/x64 Linux had a "WRITE" attribute. + + +24.05 2024-05-14 +------------------------- +- New switch -myv={MMNN} to set decoder compatibility version for 7z archive creating. + {MMNN} is 4-digit number that represents the version of 7-Zip without a dot. + If -myv={MMNN} switch is specified, 7-Zip will only use compression methods that can + be decoded by the specified version {MMNN} of 7-Zip and newer versions. + If -myv={MMNN} switch is not specified, -myv=2300 is used, and 7-Zip will only + use compression methods that can be decoded by 7-Zip 23.00 and newer versions. +- New switch -myfa={FilterID} to allow 7-Zip to use the specified filter method for 7z archive creating. +- New switch -myfd={FilterID} to disallow 7-Zip to use the specified filter method for 7z archive creating. + + +24.03 2024-03-23 +------------------------- +- 7-Zip now can use new RISCV filter for compression to 7z and xz archives. + RISCV filter can increase compression ratio for data containing executable + files compiled for RISC-V architecture. +- The speed for LZMA and LZMA2 decompression in ARM64 version for Windows + was increased by 20%-60%. + It uses arm64 assembler code, and clang-cl is required for arm64 assembler code compiling. +- -slmu switch : to show timestamps as UTC instead of LOCAL TIME. +- -slsl switch : in console 7-Zip for Windows : to show file paths with + linux path separator slash '/' instead of backslash separator '\'. +- 7-Zip supports .sha256 files that use backslash path separator '\'. +- Some bugs were fixed. + + +24.01 2024-01-31 +------------------------- +- 7-Zip uses file C/Precomp.h that is included to all c and c++ files. + CPP/Common/Common.h also includes C/Precomp.h. + C/Precomp.h defines the following macros (if _WIN32 is defined): + Z7_LARGE_PAGES 1 + Z7_LONG_PATH 1 + Z7_WIN32_WINNT_MIN 0x0500 (or higher) + _WIN32_WINNT 0x0500 (or higher) + WINVER _WIN32_WINNT + UNICODE 1 + _UNICODE 1 + if _WIN32_WINNT is defined already, C/Precomp.h doesn't redefine it. + +- Speed optimizations for hash caclulation: CRC-32, CRC-64. +- The bug was fixed: 7-Zip for Linux could fail for multivolume creation in some cases. +- 7zr.exe for arm64 is included to LZMA SDK package. +- Some bugs were fixed. + + 23.01 2023-06-20 ------------------------- - 7-Zip now can use new ARM64 filter for compression to 7z and xz archives. ARM64 filter can increase compression ratio for data containing executable diff --git a/src/Common/lzma/lzma-sdk.txt b/src/Common/lzma/lzma-sdk.txt index 141b0fd4..f7016709 100644 --- a/src/Common/lzma/lzma-sdk.txt +++ b/src/Common/lzma/lzma-sdk.txt @@ -1,5 +1,5 @@ -LZMA SDK 23.01 +LZMA SDK 24.09 -------------- LZMA SDK provides the documentation, samples, header files, libraries, and tools you need to develop applications that @@ -136,11 +136,14 @@ DOC/lzma-specification.txt - Specification of LZMA DOC/Methods.txt - Compression method IDs for .7z bin/installer/ - example script to create installer that uses SFX module, -bin/7zdec.exe - simplified 7z archive decoder -bin/7zr.exe - 7-Zip console program (reduced version) +bin/7zdec.exe - simplified 7z archive decoder (x86 32-bit version) +bin/7zr.exe - 7-Zip console program (reduced version) (x86 32-bit version) bin/x64/7zr.exe - 7-Zip console program (reduced version) (x64 version) +bin/x64/7zdec.exe - simplified 7z archive decoder (x64 version) +bin/arm64/7zr.exe - 7-Zip console program (reduced version) (arm64 version) +bin/arm64/7zdec.exe - simplified 7z archive decoder (arm64 version) bin/lzma.exe - file->file LZMA encoder/decoder for Windows bin/7zS2.sfx - small SFX module for installers (GUI version) bin/7zS2con.sfx - small SFX module for installers (Console version) bin/7zSD.sfx - SFX module for installers. @@ -234,9 +237,9 @@ Note: LZMA features ------------- - - Variable dictionary size (up to 1 GB) + - Variable dictionary size (up to 4 GB) - Estimated compressing speed: about 2 MB/s on 2 GHz CPU - Estimated decompressing speed: - 20-30 MB/s on modern 2 GHz cpu - 1-2 MB/s on 200 MHz simple RISC cpu: (ARM, MIPS, PowerPC) @@ -284,10 +287,10 @@ Usage: LZMA <e|d> inputFile outputFile [<switches>...] -a{N}: set compression mode 0 = fast, 1 = normal default: 1 (normal) - d{N}: Sets Dictionary size - [0, 30], default: 23 (8MB) - The maximum value for dictionary size is 1 GB = 2^30 bytes. + d{N}: Sets Dictionary size - [0, 31], default: N=24 (32 MB) + The maximum value for dictionary size is N=31 (2 GB). Dictionary size is calculated as DictionarySize = 2^N bytes. For decompressing file compressed by LZMA method with dictionary size D = 2^N you need about D bytes of memory (RAM). @@ -320,9 +323,11 @@ Usage: LZMA <e|d> inputFile outputFile [<switches>...] bt2 d * 9.5 + 4MB Binary Tree with 2 bytes hashing. bt3 d * 11.5 + 4MB Binary Tree with 3 bytes hashing. bt4 d * 11.5 + 4MB Binary Tree with 4 bytes hashing. + bt5 d * 11.5 + 4MB Binary Tree with 5 bytes hashing. hc4 d * 7.5 + 4MB Hash Chain with 4 bytes hashing. + hc5 d * 7.5 + 4MB Hash Chain with 5 bytes hashing. -eos: write End Of Stream marker. By default LZMA doesn't write eos marker, since LZMA decoder knows uncompressed size stored in .lzma file header. |