/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms 2021-07-13 : Igor Pavlov : Public domain */ #include "Precomp.h" #include "CpuArch.h" #include "LzFind.h" // #include "LzFindMt.h" // #define LOG_ITERS // #define LOG_THREAD #ifdef LOG_THREAD #include #define PRF(x) x #else // #define PRF(x) #endif #ifdef LOG_ITERS #include UInt64 g_NumIters_Tree; UInt64 g_NumIters_Loop; UInt64 g_NumIters_Bytes; #define LOG_ITER(x) x #else #define LOG_ITER(x) #endif // ---------- BT THREAD ---------- #define USE_SON_PREFETCH #define USE_LONG_MATCH_OPT #define kEmptyHashValue 0 // #define CYC_TO_POS_OFFSET 0 // #define CYC_TO_POS_OFFSET 1 // for debug /* MY_NO_INLINE UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes) { do { UInt32 delta; if (hash == size) break; delta = *hash++; if (delta == 0 || delta > (UInt32)pos) return NULL; lenLimit++; if (delta == (UInt32)pos) { CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2; *d++ = 0; ptr1[0] = kEmptyHashValue; ptr1[1] = kEmptyHashValue; } else { UInt32 *_distances = ++d; CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1; CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; const Byte *len0 = cur, *len1 = cur; UInt32 cutValue = _cutValue; const Byte *maxLen = cur + _maxLen; for (LOG_ITER(g_NumIters_Tree++);;) { LOG_ITER(g_NumIters_Loop++); { const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1); const Byte *len = (len0 < len1 ? len0 : len1); #ifdef USE_SON_PREFETCH const UInt32 pair0 = *pair; #endif if (len[diff] == len[0]) { if (++len != lenLimit && len[diff] == len[0]) while (++len != lenLimit) { LOG_ITER(g_NumIters_Bytes++); if (len[diff] != len[0]) break; } if (maxLen < len) { maxLen = len; *d++ = (UInt32)(len - cur); *d++ = delta - 1; if (len == lenLimit) { const UInt32 pair1 = pair[1]; *ptr1 = #ifdef USE_SON_PREFETCH pair0; #else pair[0]; #endif *ptr0 = pair1; _distances[-1] = (UInt32)(d - _distances); #ifdef USE_LONG_MATCH_OPT if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) break; { for (;;) { hash++; pos++; cur++; lenLimit++; { CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; #if 0 *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff]; #else const UInt32 p0 = ptr[0 + (diff * 2)]; const UInt32 p1 = ptr[1 + (diff * 2)]; ptr[0] = p0; ptr[1] = p1; // ptr[0] = ptr[0 + (diff * 2)]; // ptr[1] = ptr[1 + (diff * 2)]; #endif } // PrintSon(son + 2, pos - 1); // printf("\npos = %x delta = %x\n", pos, delta); len++; *d++ = 2; *d++ = (UInt32)(len - cur); *d++ = delta - 1; if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) break; } } #endif break; } } } { const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff); if (len[diff] < len[0]) { delta = pair[1]; if (delta >= curMatch) return NULL; *ptr1 = curMatch; ptr1 = pair + 1; len1 = len; } else { delta = *pair; if (delta >= curMatch) return NULL; *ptr0 = curMatch; ptr0 = pair; len0 = len; } delta = (UInt32)pos - delta; if (--cutValue == 0 || delta >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; _distances[-1] = (UInt32)(d - _distances); break; } } } } // for (tree iterations) } pos++; cur++; } while (d < limit); *posRes = (UInt32)pos; return d; } */ /* define cbs if you use 2 functions. GetMatchesSpecN_1() : (pos < _cyclicBufferSize) GetMatchesSpecN_2() : (pos >= _cyclicBufferSize) do not define cbs if you use 1 function: GetMatchesSpecN_2() */ // #define cbs _cyclicBufferSize /* we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32 to eliminate "movsx" BUG in old MSVC x64 compiler. */ UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 *posRes); MY_NO_INLINE UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 *posRes) { do // while (hash != size) { UInt32 delta; #ifndef cbs UInt32 cbs; #endif if (hash == size) break; delta = *hash++; if (delta == 0) return NULL; lenLimit++; #ifndef cbs cbs = _cyclicBufferSize; if ((UInt32)pos < cbs) { if (delta > (UInt32)pos) return NULL; cbs = (UInt32)pos; } #endif if (delta >= cbs) { CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); *d++ = 0; ptr1[0] = kEmptyHashValue; ptr1[1] = kEmptyHashValue; } else { UInt32 *_distances = ++d; CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); UInt32 cutValue = _cutValue; const Byte *len0 = cur, *len1 = cur; const Byte *maxLen = cur + _maxLen; // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else for (LOG_ITER(g_NumIters_Tree++);;) { LOG_ITER(g_NumIters_Loop++); { // SPEC code CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0) ) << 1); const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; const Byte *len = (len0 < len1 ? len0 : len1); #ifdef USE_SON_PREFETCH const UInt32 pair0 = *pair; #endif if (len[diff] == len[0]) { if (++len != lenLimit && len[diff] == len[0]) while (++len != lenLimit) { LOG_ITER(g_NumIters_Bytes++); if (len[diff] != len[0]) break; } if (maxLen < len) { maxLen = len; *d++ = (UInt32)(len - cur); *d++ = delta - 1; if (len == lenLimit) { const UInt32 pair1 = pair[1]; *ptr1 = #ifdef USE_SON_PREFETCH pair0; #else pair[0]; #endif *ptr0 = pair1; _distances[-1] = (UInt32)(d - _distances); #ifdef USE_LONG_MATCH_OPT if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) break; { for (;;) { *d++ = 2; *d++ = (UInt32)(lenLimit - cur); *d++ = delta - 1; cur++; lenLimit++; // SPEC _cyclicBufferPos++; { // SPEC code CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1); const CLzRef *src = dest + ((diff + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1); // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; #if 0 *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src); #else const UInt32 p0 = src[0]; const UInt32 p1 = src[1]; dest[0] = p0; dest[1] = p1; #endif } pos++; hash++; if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) break; } // for() end for long matches } #endif break; // break from TREE iterations } } } { const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff); if (len[diff] < len[0]) { delta = pair[1]; *ptr1 = curMatch; ptr1 = pair + 1; len1 = len; if (delta >= curMatch) return NULL; } else { delta = *pair; *ptr0 = curMatch; ptr0 = pair; len0 = len; if (delta >= curMatch) return NULL; } delta = (UInt32)pos - delta; if (--cutValue == 0 || delta >= cbs) { *ptr0 = *ptr1 = kEmptyHashValue; _distances[-1] = (UInt32)(d - _distances); break; } } } } // for (tree iterations) } pos++; _cyclicBufferPos++; cur++; } while (d < limit); *posRes = (UInt32)pos; return d; } /* typedef UInt32 uint32plus; // size_t UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 *posRes) { do // while (hash != size) { UInt32 delta; #ifndef cbs UInt32 cbs; #endif if (hash == size) break; delta = *hash++; if (delta == 0) return NULL; #ifndef cbs cbs = _cyclicBufferSize; if ((UInt32)pos < cbs) { if (delta > (UInt32)pos) return NULL; cbs = (UInt32)pos; } #endif if (delta >= cbs) { CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); *d++ = 0; ptr1[0] = kEmptyHashValue; ptr1[1] = kEmptyHashValue; } else { CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); UInt32 *_distances = ++d; uint32plus len0 = 0, len1 = 0; UInt32 cutValue = _cutValue; uint32plus maxLen = _maxLen; // lenLimit++; // const Byte *lenLimit = cur + _lenLimit; for (LOG_ITER(g_NumIters_Tree++);;) { LOG_ITER(g_NumIters_Loop++); { // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0) ) << 1); const Byte *pb = cur - delta; uint32plus len = (len0 < len1 ? len0 : len1); #ifdef USE_SON_PREFETCH const UInt32 pair0 = *pair; #endif if (pb[len] == cur[len]) { if (++len != lenLimit && pb[len] == cur[len]) while (++len != lenLimit) if (pb[len] != cur[len]) break; if (maxLen < len) { maxLen = len; *d++ = (UInt32)len; *d++ = delta - 1; if (len == lenLimit) { { const UInt32 pair1 = pair[1]; *ptr0 = pair1; *ptr1 = #ifdef USE_SON_PREFETCH pair0; #else pair[0]; #endif } _distances[-1] = (UInt32)(d - _distances); #ifdef USE_LONG_MATCH_OPT if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit) break; { const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; for (;;) { *d++ = 2; *d++ = (UInt32)lenLimit; *d++ = delta - 1; _cyclicBufferPos++; { CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1); const CLzRef *src = dest + ((diff + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1); #if 0 *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src); #else const UInt32 p0 = src[0]; const UInt32 p1 = src[1]; dest[0] = p0; dest[1] = p1; #endif } hash++; pos++; cur++; pb++; if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit) break; } } #endif break; } } } { const UInt32 curMatch = (UInt32)pos - delta; if (pb[len] < cur[len]) { delta = pair[1]; *ptr1 = curMatch; ptr1 = pair + 1; len1 = len; } else { delta = *pair; *ptr0 = curMatch; ptr0 = pair; len0 = len; } { if (delta >= curMatch) return NULL; delta = (UInt32)pos - delta; if (delta >= cbs // delta >= _cyclicBufferSize || delta >= pos || --cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; _distances[-1] = (UInt32)(d - _distances); break; } } } } } // for (tree iterations) } pos++; _cyclicBufferPos++; cur++; } while (d < limit); *posRes = (UInt32)pos; return d; } */