From cf48b532b447faa969347fef183c6e8921c4ded2 Mon Sep 17 00:00:00 2001 From: Mounir IDRASSI Date: Tue, 26 Feb 2019 01:50:27 +0100 Subject: Windows: Implement RAM encryption for keys on 64-bit machines using ChaCha12 cipher and t1ha non-cryptographic fast hash (https://github.com/leo-yuriev/t1ha) --- src/Common/Apidrvr.h | 1 + src/Common/Crypto.c | 209 +++++++++ src/Common/Crypto.h | 21 + src/Common/EncryptionThreadPool.c | 45 +- src/Common/Tcdefs.h | 9 +- src/Crypto/Sources | 3 + src/Crypto/t1ha.h | 261 +++++++++++ src/Crypto/t1ha2.c | 323 ++++++++++++++ src/Crypto/t1ha2_selfcheck.c | 186 ++++++++ src/Crypto/t1ha_bits.h | 904 ++++++++++++++++++++++++++++++++++++++ src/Crypto/t1ha_selfcheck.c | 99 +++++ src/Crypto/t1ha_selfcheck.h | 76 ++++ src/Driver/DriveFilter.c | 54 ++- src/Driver/Driver.vcxproj | 3 + src/Driver/Driver.vcxproj.filters | 9 + src/Driver/DumpFilter.c | 2 +- src/Driver/Ntdriver.c | 23 +- src/Driver/Ntvol.c | 4 + src/Mount/Mount.c | 13 + src/Mount/Mount.rc | 14 +- src/Mount/Resource.h | 3 +- 21 files changed, 2241 insertions(+), 21 deletions(-) create mode 100644 src/Crypto/t1ha.h create mode 100644 src/Crypto/t1ha2.c create mode 100644 src/Crypto/t1ha2_selfcheck.c create mode 100644 src/Crypto/t1ha_bits.h create mode 100644 src/Crypto/t1ha_selfcheck.c create mode 100644 src/Crypto/t1ha_selfcheck.h diff --git a/src/Common/Apidrvr.h b/src/Common/Apidrvr.h index 63de40ae..2a6941a1 100644 --- a/src/Common/Apidrvr.h +++ b/src/Common/Apidrvr.h @@ -418,5 +418,6 @@ typedef struct #define VC_DRIVER_CONFIG_ALLOW_WINDOWS_DEFRAG 0x200 #define VC_DRIVER_CONFIG_CLEAR_KEYS_ON_NEW_DEVICE_INSERTION 0x400 #define VC_DRIVER_CONFIG_ENABLE_CPU_RNG 0x800 +#define VC_DRIVER_CONFIG_ENABLE_RAM_ENCRYPTION 0x1000 #endif /* _WIN32 */ diff --git a/src/Common/Crypto.c b/src/Common/Crypto.c index 249a4117..94fca8e8 100644 --- a/src/Common/Crypto.c +++ b/src/Common/Crypto.c @@ -1220,6 +1220,7 @@ BOOL IsHwEncryptionEnabled () #ifndef TC_WINDOWS_BOOT static BOOL CpuRngDisabled = TRUE; +static BOOL RamEncryptionEnabled = FALSE; BOOL IsCpuRngSupported () { @@ -1239,6 +1240,214 @@ BOOL IsCpuRngEnabled () return !CpuRngDisabled; } +BOOL IsRamEncryptionSupported () +{ +#ifdef _WIN64 + return TRUE; +#else + return FALSE; +#endif +} + +void EnableRamEncryption (BOOL enable) +{ + RamEncryptionEnabled = enable; +} + +BOOL IsRamEncryptionEnabled () +{ + return RamEncryptionEnabled; +} + +/* masking for random index to remove bias */ +byte GetRngMask (byte count) +{ + if (count >= 128) + return 0xFF; + if (count >= 64) + return 0x7F; + if (count >= 32) + return 0x3F; + if (count >= 16) + return 0x1F; + if (count >= 8) + return 0x0F; + if (count >= 4) + return 0x07; + if (count >= 2) + return 0x03; + return 1; +} + +byte GetRandomIndex (ChaCha20RngCtx* pCtx, byte elementsCount) +{ + byte index = 0; + byte mask = GetRngMask (elementsCount); + + while (TRUE) + { + ChaCha20RngGetBytes (pCtx, &index, 1); + index &= mask; + if (index < elementsCount) + break; + } + + return index; +} + +#if defined(_WIN64) && !defined (_UEFI) && defined(TC_WINDOWS_DRIVER) +/* declaration of variables and functions used for RAM encryption on 64-bit build */ +static byte* pbKeyDerivationArea = NULL; +static ULONG cbKeyDerivationArea = 0; + +static uint64 HashSeedMask = 0; +static uint64 CipherIVMask = 0; +#ifdef TC_WINDOWS_DRIVER +ULONG AllocTag = 'MMCV'; +#endif + +BOOL InitializeSecurityParameters(GetRandSeedFn rngCallback) +{ + ChaCha20RngCtx ctx; + byte pbSeed[CHACHA20RNG_KEYSZ + CHACHA20RNG_IVSZ]; +#ifdef TC_WINDOWS_DRIVER + byte i, tagLength; +#endif + + rngCallback (pbSeed, sizeof (pbSeed)); + + ChaCha20RngInit (&ctx, pbSeed, rngCallback, 0); + +#ifdef TC_WINDOWS_DRIVER + /* generate random tag length between 1 and 4 */ + tagLength = GetRandomIndex (&ctx, 4) + 1; + + /* generate random value for tag: + * Each ASCII character in the tag must be a value in the range 0x20 (space) to 0x7E (tilde) + * So we have 95 possibility + */ + AllocTag = 0; + for (i = 0; i < tagLength; i++) + { + AllocTag = (AllocTag << 8) + (((ULONG) GetRandomIndex (&ctx, 95)) + 0x20); + } + +#endif + + cbKeyDerivationArea = 1024 * 1024; + pbKeyDerivationArea = (byte*) TCalloc(cbKeyDerivationArea); + if (!pbKeyDerivationArea) + { + cbKeyDerivationArea = 2 * PAGE_SIZE; + pbKeyDerivationArea = (byte*) TCalloc(cbKeyDerivationArea); + } + + if (!pbKeyDerivationArea) + { + cbKeyDerivationArea = 0; + return FALSE; + } + + /* fill key derivation area with random bytes */ + ChaCha20RngGetBytes (&ctx, pbKeyDerivationArea, cbKeyDerivationArea); + + /* generate hash seed mask */ + ChaCha20RngGetBytes(&ctx, (unsigned char*) &HashSeedMask, sizeof (HashSeedMask)); + + /* generate IV mask */ + ChaCha20RngGetBytes(&ctx, (unsigned char*) &CipherIVMask, sizeof (CipherIVMask)); + + FAST_ERASE64 (pbSeed, sizeof (pbSeed)); + burn (&ctx, sizeof (ctx)); + burn (&tagLength, 1); + + return TRUE; +} + +void ClearSecurityParameters() +{ + if (pbKeyDerivationArea) + { + FAST_ERASE64 (pbKeyDerivationArea, cbKeyDerivationArea); + TCfree (pbKeyDerivationArea); + pbKeyDerivationArea =NULL; + cbKeyDerivationArea = 0; + } + + FAST_ERASE64 (&HashSeedMask, 8); + FAST_ERASE64 (&CipherIVMask, 8); +#ifdef TC_WINDOWS_DRIVER + burn (&AllocTag, sizeof (AllocTag)); +#endif +} + +#ifdef TC_WINDOWS_DRIVER +static void VcProtectMemory (uint64 encID, unsigned char* pbData, size_t cbData, unsigned char* pbData2, size_t cbData2) +#else +static void VcProtectMemory (uint64 encID, unsigned char* pbData, size_t cbData, + unsigned char* pbData2, size_t cbData2, + unsigned char* pbData3, size_t cbData3, + unsigned char* pbData4, size_t cbData4) +#endif +{ + if (pbKeyDerivationArea) + { + uint64 hashLow, hashHigh, hashSeed, cipherIV; + uint64 pbKey[4]; + ChaCha256Ctx ctx; + + hashSeed = (((uint64) pbKeyDerivationArea) + encID) ^ HashSeedMask; + hashLow = t1ha2_atonce128(&hashHigh, pbKeyDerivationArea, cbKeyDerivationArea, hashSeed); + + /* set the key to the hash result */ + pbKey[0] = pbKey[2] = hashLow; + pbKey[1] = pbKey[3] = hashHigh; + + /* Initialize ChaCha12 cipher */ + cipherIV = encID ^ CipherIVMask; + ChaCha256Init (&ctx, (unsigned char*) pbKey, (unsigned char*) &cipherIV, 12); + + ChaCha256Encrypt (&ctx, pbData, cbData, pbData); + ChaCha256Encrypt (&ctx, pbData2, cbData2, pbData2); +#ifndef TC_WINDOWS_DRIVER + ChaCha256Encrypt (&ctx, pbData3, cbData3, pbData3); + ChaCha256Encrypt (&ctx, pbData4, cbData4, pbData4); +#endif + FAST_ERASE64 (pbKey, sizeof(pbKey)); + FAST_ERASE64 (&hashLow, 8); + FAST_ERASE64 (&hashHigh, 8); + FAST_ERASE64 (&hashSeed, 8); + FAST_ERASE64 (&cipherIV, 8); + burn (&ctx, sizeof (ctx)); + } +} + +uint64 VcGetEncryptionID (PCRYPTO_INFO pCryptoInfo) +{ + return ((uint64) pCryptoInfo->ks) + ((uint64) pCryptoInfo->ks2) +#ifndef TC_WINDOWS_DRIVER + + ((uint64) pCryptoInfo->master_keydata) + ((uint64) pCryptoInfo->k2) +#endif + ; +} + +void VcProtectKeys (PCRYPTO_INFO pCryptoInfo, uint64 encID) +{ +#ifdef TC_WINDOWS_DRIVER + VcProtectMemory (encID, pCryptoInfo->ks, MAX_EXPANDED_KEY, pCryptoInfo->ks2, MAX_EXPANDED_KEY); +#else + VcProtectMemory (encID, pCryptoInfo->ks, MAX_EXPANDED_KEY, + pCryptoInfo->ks2, MAX_EXPANDED_KEY, + pCryptoInfo->master_keydata, MASTER_KEYDATA_SIZE, + pCryptoInfo->k2, MASTER_KEYDATA_SIZE); +#endif +} + +void VcUnprotectKeys (PCRYPTO_INFO pCryptoInfo, uint64 encID) +{ + VcProtectKeys (pCryptoInfo, encID); +} +#endif #endif diff --git a/src/Common/Crypto.h b/src/Common/Crypto.h index 6c2befb1..0951b20b 100644 --- a/src/Common/Crypto.h +++ b/src/Common/Crypto.h @@ -208,6 +208,10 @@ typedef struct # include "GostCipher.h" # include "kuznyechik.h" # include "Camellia.h" +# include "chachaRng.h" +# ifdef _WIN64 +# include "t1ha.h" +# endif #else # include "CamelliaSmall.h" #endif @@ -381,6 +385,19 @@ void DecryptDataUnitsCurrentThread (unsigned __int8 *buf, const UINT64_STRUCT *s void EncryptBuffer (unsigned __int8 *buf, TC_LARGEST_COMPILER_UINT len, PCRYPTO_INFO cryptoInfo); void DecryptBuffer (unsigned __int8 *buf, TC_LARGEST_COMPILER_UINT len, PCRYPTO_INFO cryptoInfo); +#if defined(_WIN64) && !defined (_UEFI) && defined(TC_WINDOWS_DRIVER) +BOOL InitializeSecurityParameters(GetRandSeedFn rngCallback); +void ClearSecurityParameters(); +uint64 VcGetEncryptionID (PCRYPTO_INFO pCryptoInfo); +void VcProtectKeys (PCRYPTO_INFO pCryptoInfo, uint64 encID); +void VcUnprotectKeys (PCRYPTO_INFO pCryptoInfo, uint64 encID); +void EncryptDataUnitsCurrentThreadEx (unsigned __int8 *buf, const UINT64_STRUCT *structUnitNo, TC_LARGEST_COMPILER_UINT nbrUnits, PCRYPTO_INFO ci); +void DecryptDataUnitsCurrentThreadEx (unsigned __int8 *buf, const UINT64_STRUCT *structUnitNo, TC_LARGEST_COMPILER_UINT nbrUnits, PCRYPTO_INFO ci); +#else +#define EncryptDataUnitsCurrentThreadEx EncryptDataUnitsCurrentThread +#define DecryptDataUnitsCurrentThreadEx DecryptDataUnitsCurrentThread +#endif + BOOL IsAesHwCpuSupported (); void EnableHwEncryption (BOOL enable); BOOL IsHwEncryptionEnabled (); @@ -389,6 +406,10 @@ BOOL IsCpuRngSupported (); void EnableCpuRng (BOOL enable); BOOL IsCpuRngEnabled (); +BOOL IsRamEncryptionSupported (); +void EnableRamEncryption (BOOL enable); +BOOL IsRamEncryptionEnabled (); + #ifdef __cplusplus } #endif diff --git a/src/Common/EncryptionThreadPool.c b/src/Common/EncryptionThreadPool.c index 618e8474..fdf1101c 100644 --- a/src/Common/EncryptionThreadPool.c +++ b/src/Common/EncryptionThreadPool.c @@ -111,6 +111,43 @@ static TC_MUTEX DequeueMutex; static TC_EVENT WorkItemReadyEvent; static TC_EVENT WorkItemCompletedEvent; +#if defined(_WIN64) && defined(TC_WINDOWS_DRIVER) +void EncryptDataUnitsCurrentThreadEx (unsigned __int8 *buf, const UINT64_STRUCT *structUnitNo, TC_LARGEST_COMPILER_UINT nbrUnits, PCRYPTO_INFO ci) +{ + if (IsRamEncryptionEnabled()) + { + CRYPTO_INFO tmpCI; + memcpy (&tmpCI, ci, sizeof (CRYPTO_INFO)); + VcUnprotectKeys (&tmpCI, VcGetEncryptionID (ci)); + + EncryptDataUnitsCurrentThread (buf, structUnitNo, nbrUnits, &tmpCI); + + burn (&tmpCI, sizeof(CRYPTO_INFO)); + } + else + EncryptDataUnitsCurrentThread (buf, structUnitNo, nbrUnits, ci); +} + +void DecryptDataUnitsCurrentThreadEx (unsigned __int8 *buf, const UINT64_STRUCT *structUnitNo, TC_LARGEST_COMPILER_UINT nbrUnits, PCRYPTO_INFO ci) +{ + if (IsRamEncryptionEnabled()) + { + CRYPTO_INFO tmpCI; + memcpy (&tmpCI, ci, sizeof (CRYPTO_INFO)); + VcUnprotectKeys (&tmpCI, VcGetEncryptionID (ci)); + + DecryptDataUnitsCurrentThread (buf, structUnitNo, nbrUnits, &tmpCI); + + burn (&tmpCI, sizeof(CRYPTO_INFO)); + } + else + DecryptDataUnitsCurrentThread (buf, structUnitNo, nbrUnits, ci); +} + +#else +#define EncryptDataUnitsCurrentThreadEx EncryptDataUnitsCurrentThread +#define DecryptDataUnitsCurrentThreadEx DecryptDataUnitsCurrentThread +#endif static WorkItemState GetWorkItemState (EncryptionThreadPoolWorkItem *workItem) { @@ -152,11 +189,11 @@ static TC_THREAD_PROC EncryptionThreadProc (void *threadArg) switch (workItem->Type) { case DecryptDataUnitsWork: - DecryptDataUnitsCurrentThread (workItem->Encryption.Data, &workItem->Encryption.StartUnitNo, workItem->Encryption.UnitCount, workItem->Encryption.CryptoInfo); + DecryptDataUnitsCurrentThreadEx (workItem->Encryption.Data, &workItem->Encryption.StartUnitNo, workItem->Encryption.UnitCount, workItem->Encryption.CryptoInfo); break; case EncryptDataUnitsWork: - EncryptDataUnitsCurrentThread (workItem->Encryption.Data, &workItem->Encryption.StartUnitNo, workItem->Encryption.UnitCount, workItem->Encryption.CryptoInfo); + EncryptDataUnitsCurrentThreadEx (workItem->Encryption.Data, &workItem->Encryption.StartUnitNo, workItem->Encryption.UnitCount, workItem->Encryption.CryptoInfo); break; case DeriveKeyWork: @@ -414,11 +451,11 @@ void EncryptionThreadPoolDoWork (EncryptionThreadPoolWorkType type, byte *data, switch (type) { case DecryptDataUnitsWork: - DecryptDataUnitsCurrentThread (data, startUnitNo, unitCount, cryptoInfo); + DecryptDataUnitsCurrentThreadEx (data, startUnitNo, unitCount, cryptoInfo); break; case EncryptDataUnitsWork: - EncryptDataUnitsCurrentThread (data, startUnitNo, unitCount, cryptoInfo); + EncryptDataUnitsCurrentThreadEx (data, startUnitNo, unitCount, cryptoInfo); break; default: diff --git a/src/Common/Tcdefs.h b/src/Common/Tcdefs.h index ed0c94bb..47a4bc54 100644 --- a/src/Common/Tcdefs.h +++ b/src/Common/Tcdefs.h @@ -248,9 +248,14 @@ void ThrowFatalException(int line); /* variables used in the implementation of enhanced protection of NX pool under Windows 8 and later */ extern POOL_TYPE ExDefaultNonPagedPoolType; extern ULONG ExDefaultMdlProtection; +#ifdef _WIN64 +extern ULONG AllocTag; +#else +#define AllocTag 'MMCV' +#endif -#define TCalloc(size) ((void *) ExAllocatePoolWithTag( ExDefaultNonPagedPoolType, size, 'MMCV' )) -#define TCfree(memblock) ExFreePoolWithTag( memblock, 'MMCV' ) +#define TCalloc(size) ((void *) ExAllocatePoolWithTag( ExDefaultNonPagedPoolType, size, AllocTag )) +#define TCfree(memblock) ExFreePoolWithTag( memblock, AllocTag ) #define DEVICE_DRIVER diff --git a/src/Crypto/Sources b/src/Crypto/Sources index 36fa89e7..2db68a7a 100644 --- a/src/Crypto/Sources +++ b/src/Crypto/Sources @@ -37,6 +37,9 @@ SOURCES = \ SerpentFast.c \ SerpentFast_simd.cpp \ Sha2.c \ + t1ha_selfcheck.c \ + t1ha2.c \ + t1ha2_selfcheck.c \ Twofish.c \ Twofish_$(TC_ARCH).S \ GostCipher.c \ diff --git a/src/Crypto/t1ha.h b/src/Crypto/t1ha.h new file mode 100644 index 00000000..97615b51 --- /dev/null +++ b/src/Crypto/t1ha.h @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, + * Fast Positive Hash. + * + * Portions Copyright (c) 2010-2018 Leonid Yuriev , + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * + * The Future will Positive. Всё будет хорошо. + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#pragma once + +/***************************************************************************** + * + * PLEASE PAY ATTENTION TO THE FOLLOWING NOTES + * about macros definitions which controls t1ha behaviour and/or performance. + * + * + * 1) T1HA_SYS_UNALIGNED_ACCESS = Defines the system/platform/CPU/architecture + * abilities for unaligned data access. + * + * By default, when the T1HA_SYS_UNALIGNED_ACCESS not defined, + * it will defined on the basis hardcoded knowledge about of capabilities + * of most common CPU architectures. But you could override this + * default behavior when build t1ha library itself: + * + * // To disable unaligned access at all. + * #define T1HA_SYS_UNALIGNED_ACCESS 0 + * + * // To enable unaligned access, but indicate that it significally slow. + * #define T1HA_SYS_UNALIGNED_ACCESS 1 + * + * // To enable unaligned access, and indicate that it effecient. + * #define T1HA_SYS_UNALIGNED_ACCESS 2 + * + * + * 2) T1HA_USE_FAST_ONESHOT_READ = Controls the data reads at the end of buffer. + * + * When defined to non-zero, t1ha will use 'one shot' method for reading + * up to 8 bytes at the end of data. In this case just the one 64-bit read + * will be performed even when the available less than 8 bytes. + * + * This is little bit faster that switching by length of data tail. + * Unfortunately this will triggering a false-positive alarms from Valgrind, + * AddressSanitizer and other similar tool. + * + * By default, t1ha defines it to 1, but you could override this + * default behavior when build t1ha library itself: + * + * // For little bit faster and small code. + * #define T1HA_USE_FAST_ONESHOT_READ 1 + * + * // For calmness if doubt. + * #define T1HA_USE_FAST_ONESHOT_READ 0 + * + * + * 3) T1HA0_RUNTIME_SELECT = Controls choice fastest function in runtime. + * + * t1ha library offers the t1ha0() function as the fastest for current CPU. + * But actual CPU's features/capabilities and may be significantly different, + * especially on x86 platform. Therefore, internally, t1ha0() may require + * dynamic dispatching for choice best implementation. + * + * By default, t1ha enables such runtime choice and (may be) corresponding + * indirect calls if it reasonable, but you could override this default + * behavior when build t1ha library itself: + * + * // To enable runtime choice of fastest implementation. + * #define T1HA0_RUNTIME_SELECT 1 + * + * // To disable runtime choice of fastest implementation. + * #define T1HA0_RUNTIME_SELECT 0 + * + * When T1HA0_RUNTIME_SELECT is nonzero the t1ha0_resolve() function could + * be used to get actual t1ha0() implementation address at runtime. This is + * useful for two cases: + * - calling by local pointer-to-function usually is little + * bit faster (less overhead) than via a PLT thru the DSO boundary. + * - GNU Indirect functions (see below) don't supported by environment + * and calling by t1ha0_funcptr is not available and/or expensive. + * + * 4) T1HA_USE_INDIRECT_FUNCTIONS = Controls usage of GNU Indirect functions. + * + * In continue of T1HA0_RUNTIME_SELECT the T1HA_USE_INDIRECT_FUNCTIONS + * controls usage of ELF indirect functions feature. In general, when + * available, this reduces overhead of indirect function's calls though + * a DSO-bundary (https://sourceware.org/glibc/wiki/GNU_IFUNC). + * + * By default, t1ha engage GNU Indirect functions when it available + * and useful, but you could override this default behavior when build + * t1ha library itself: + * + * // To enable use of GNU ELF Indirect functions. + * #define T1HA_USE_INDIRECT_FUNCTIONS 1 + * + * // To disable use of GNU ELF Indirect functions. This may be useful + * // if the actual toolchain or the system's loader don't support ones. + * #define T1HA_USE_INDIRECT_FUNCTIONS 0 + * + * 5) T1HA0_AESNI_AVAILABLE = Controls AES-NI detection and dispatching on x86. + * + * In continue of T1HA0_RUNTIME_SELECT the T1HA0_AESNI_AVAILABLE controls + * detection and usage of AES-NI CPU's feature. On the other hand, this + * requires compiling parts of t1ha library with certain properly options, + * and could be difficult or inconvenient in some cases. + * + * By default, t1ha engade AES-NI for t1ha0() on the x86 platform, but + * you could override this default behavior when build t1ha library itself: + * + * // To disable detection and usage of AES-NI instructions for t1ha0(). + * // This may be useful when you unable to build t1ha library properly + * // or known that AES-NI will be unavailable at the deploy. + * #define T1HA0_AESNI_AVAILABLE 0 + * + * // To force detection and usage of AES-NI instructions for t1ha0(), + * // but I don't known reasons to anybody would need this. + * #define T1HA0_AESNI_AVAILABLE 1 + * + * 6) T1HA0_DISABLED, T1HA1_DISABLED, T1HA2_DISABLED = Controls availability of + * t1ha functions. + * + * In some cases could be useful to import/use only few of t1ha functions + * or just the one. So, this definitions allows disable corresponding parts + * of t1ha library. + * + * // To disable t1ha0(), t1ha0_32le(), t1ha0_32be() and all AES-NI. + * #define T1HA0_DISABLED + * + * // To disable t1ha1_le() and t1ha1_be(). + * #define T1HA1_DISABLED + * + * // To disable t1ha2_atonce(), t1ha2_atonce128() and so on. + * #define T1HA2_DISABLED + * + *****************************************************************************/ + +#define T1HA_VERSION_MAJOR 2 +#define T1HA_VERSION_MINOR 1 +#define T1HA_VERSION_RELEASE 0 + +#include "Common/Tcdefs.h" +#include "config.h" +#include "misc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define T1HA_ALIGN_PREFIX CRYPTOPP_ALIGN_DATA(32) +#define T1HA_ALIGN_SUFFIX + +#ifdef _MSC_VER +#define uint8_t byte +#define uint16_t uint16 +#define uint32_t uint32 +#define uint64_t uint64 +#endif + +typedef union T1HA_ALIGN_PREFIX t1ha_state256 { + uint8_t bytes[32]; + uint32_t u32[8]; + uint64_t u64[4]; + struct { + uint64_t a, b, c, d; + } n; +} t1ha_state256_t T1HA_ALIGN_SUFFIX; + +typedef struct t1ha_context { + t1ha_state256_t state; + t1ha_state256_t buffer; + size_t partial; + uint64_t total; +} t1ha_context_t; + +/****************************************************************************** + * + * t1ha2 = 64 and 128-bit, SLIGHTLY MORE ATTENTION FOR QUALITY AND STRENGTH. + * + * - The recommended version of "Fast Positive Hash" with good quality + * for checksum, hash tables and fingerprinting. + * - Portable and extremely efficiency on modern 64-bit CPUs. + * Designed for 64-bit little-endian platforms, + * in other cases will runs slowly. + * - Great quality of hashing and still faster than other non-t1ha hashes. + * Provides streaming mode and 128-bit result. + * + * Note: Due performance reason 64- and 128-bit results are completely + * different each other, i.e. 64-bit result is NOT any part of 128-bit. + */ + +/* The at-once variant with 64-bit result */ +uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed); + +/* The at-once variant with 128-bit result. + * Argument `extra_result` is NOT optional and MUST be valid. + * The high 64-bit part of 128-bit hash will be always unconditionally + * stored to the address given by `extra_result` argument. */ +uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result, + const void *__restrict data, size_t length, + uint64_t seed); + +/* The init/update/final trinity for streaming. + * Return 64 or 128-bit result depentently from `extra_result` argument. */ +void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y); +void t1ha2_update(t1ha_context_t *__restrict ctx, + const void *__restrict data, size_t length); + +/* Argument `extra_result` is optional and MAY be NULL. + * - If `extra_result` is NOT NULL then the 128-bit hash will be calculated, + * and high 64-bit part of it will be stored to the address given + * by `extra_result` argument. + * - Otherwise the 64-bit hash will be calculated + * and returned from function directly. + * + * Note: Due performance reason 64- and 128-bit results are completely + * different each other, i.e. 64-bit result is NOT any part of 128-bit. */ +uint64_t t1ha2_final(t1ha_context_t *__restrict ctx, + uint64_t *__restrict extra_result /* optional */); + + +int t1ha_selfcheck__t1ha2_atonce(void); +int t1ha_selfcheck__t1ha2_atonce128(void); +int t1ha_selfcheck__t1ha2_stream(void); +int t1ha_selfcheck__t1ha2(void); + +#ifdef __cplusplus +} +#endif diff --git a/src/Crypto/t1ha2.c b/src/Crypto/t1ha2.c new file mode 100644 index 00000000..1a67f9c4 --- /dev/null +++ b/src/Crypto/t1ha2.c @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, + * Fast Positive Hash. + * + * Portions Copyright (c) 2010-2018 Leonid Yuriev , + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * + * The Future will Positive. Всё будет хорошо. + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +static __always_inline void init_ab(t1ha_state256_t *s, uint64_t x, + uint64_t y) { + s->n.a = x; + s->n.b = y; +} + +static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x, + uint64_t y) { + s->n.c = rot64(y, 23) + ~x; + s->n.d = ~y + rot64(x, 19); +} + +/* TODO: C++ template in the next version */ +#define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v) \ + do { \ + t1ha_state256_t *const s = state; \ + const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \ + const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \ + const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \ + const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \ + \ + const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); \ + const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); \ + s->n.d ^= s->n.b + rot64(w1, 38); \ + s->n.c ^= s->n.a + rot64(w0, 57); \ + s->n.b ^= prime_6 * (c13 + w2); \ + s->n.a ^= prime_5 * (d02 + w3); \ + } while (0) + +static __always_inline void squash(t1ha_state256_t *s) { + s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23)); + s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d); +} + +/* TODO: C++ template in the next version */ +#define T1HA2_LOOP(ENDIANNES, ALIGNESS, state, data, len) \ + do { \ + const void *detent = (const uint8_t *)data + len - 31; \ + do { \ + const uint64_t *v = (const uint64_t *)data; \ + data = (const uint64_t *)data + 4; \ + prefetch(data); \ + T1HA2_UPDATE(le, ALIGNESS, state, v); \ + } while (likely(data < detent)); \ + } while (0) + +/* TODO: C++ template in the next version */ +#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, state, data, len) \ + do { \ + t1ha_state256_t *const s = state; \ + const uint64_t *v = (const uint64_t *)data; \ + switch (len) { \ + default: \ + mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_4); \ + /* fall through */ \ + case 24: \ + case 23: \ + case 22: \ + case 21: \ + case 20: \ + case 19: \ + case 18: \ + case 17: \ + mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_3); \ + /* fall through */ \ + case 16: \ + case 15: \ + case 14: \ + case 13: \ + case 12: \ + case 11: \ + case 10: \ + case 9: \ + mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_2); \ + /* fall through */ \ + case 8: \ + case 7: \ + case 6: \ + case 5: \ + case 4: \ + case 3: \ + case 2: \ + case 1: \ + mixup64(&s->n.b, &s->n.a, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ + prime_1); \ + /* fall through */ \ + case 0: \ + return final64(s->n.a, s->n.b); \ + } \ + } while (0) + +/* TODO: C++ template in the next version */ +#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, state, data, len) \ + do { \ + t1ha_state256_t *const s = state; \ + const uint64_t *v = (const uint64_t *)data; \ + switch (len) { \ + default: \ + mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_4); \ + /* fall through */ \ + case 24: \ + case 23: \ + case 22: \ + case 21: \ + case 20: \ + case 19: \ + case 18: \ + case 17: \ + mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_3); \ + /* fall through */ \ + case 16: \ + case 15: \ + case 14: \ + case 13: \ + case 12: \ + case 11: \ + case 10: \ + case 9: \ + mixup64(&s->n.c, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_2); \ + /* fall through */ \ + case 8: \ + case 7: \ + case 6: \ + case 5: \ + case 4: \ + case 3: \ + case 2: \ + case 1: \ + mixup64(&s->n.d, &s->n.c, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ + prime_1); \ + /* fall through */ \ + case 0: \ + return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result); \ + } \ + } while (0) + +static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c, + uint64_t d, uint64_t *h) { + mixup64(&a, &b, rot64(c, 41) ^ d, prime_0); + mixup64(&b, &c, rot64(d, 23) ^ a, prime_6); + mixup64(&c, &d, rot64(a, 19) ^ b, prime_5); + mixup64(&d, &a, rot64(b, 31) ^ c, prime_4); + *h = c + d; + return a ^ b; +} + +//------------------------------------------------------------------------------ + +uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) { + t1ha_state256_t state; + init_ab(&state, seed, length); + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + if (unlikely(length > 32)) { + init_cd(&state, seed, length); + T1HA2_LOOP(le, unaligned, &state, data, length); + squash(&state); + length &= 31; + } + T1HA2_TAIL_AB(le, unaligned, &state, data, length); +#else + if ((((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0) { + if (unlikely(length > 32)) { + init_cd(&state, seed, length); + T1HA2_LOOP(le, unaligned, &state, data, length); + squash(&state); + length &= 31; + } + T1HA2_TAIL_AB(le, unaligned, &state, data, length); + } else { + if (unlikely(length > 32)) { + init_cd(&state, seed, length); + T1HA2_LOOP(le, aligned, &state, data, length); + squash(&state); + length &= 31; + } + T1HA2_TAIL_AB(le, aligned, &state, data, length); + } +#endif +} + +uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result, + const void *__restrict data, size_t length, + uint64_t seed) { + t1ha_state256_t state; + init_ab(&state, seed, length); + init_cd(&state, seed, length); + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + if (unlikely(length > 32)) { + T1HA2_LOOP(le, unaligned, &state, data, length); + length &= 31; + } + T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); +#else + if ((((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0) { + if (unlikely(length > 32)) { + T1HA2_LOOP(le, unaligned, &state, data, length); + length &= 31; + } + T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); + } else { + if (unlikely(length > 32)) { + T1HA2_LOOP(le, aligned, &state, data, length); + length &= 31; + } + T1HA2_TAIL_ABCD(le, aligned, &state, data, length); + } +#endif +} + +//------------------------------------------------------------------------------ + +void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y) { + init_ab(&ctx->state, seed_x, seed_y); + init_cd(&ctx->state, seed_x, seed_y); + ctx->partial = 0; + ctx->total = 0; +} + +void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data, + size_t length) { + ctx->total += length; + + if (ctx->partial) { + const size_t left = 32 - ctx->partial; + const size_t chunk = (length >= left) ? left : length; + memcpy(ctx->buffer.bytes + ctx->partial, data, chunk); + ctx->partial += chunk; + if (ctx->partial < 32) { + assert(left >= length); + return; + } + ctx->partial = 0; + data = (const uint8_t *)data + chunk; + length -= chunk; + T1HA2_UPDATE(le, aligned, &ctx->state, ctx->buffer.u64); + } + + if (length >= 32) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + T1HA2_LOOP(le, unaligned, &ctx->state, data, length); +#else + if ((((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0) { + T1HA2_LOOP(le, unaligned, &ctx->state, data, length); + } else { + T1HA2_LOOP(le, aligned, &ctx->state, data, length); + } +#endif + length &= 31; + } + + if (length) + memcpy(ctx->buffer.bytes, data, ctx->partial = length); +} + +uint64_t t1ha2_final(t1ha_context_t *__restrict ctx, + uint64_t *__restrict extra_result) { + uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63); +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + bits = bswap64(bits); +#endif + t1ha2_update(ctx, &bits, 8); + + if (likely(!extra_result)) { + squash(&ctx->state); + T1HA2_TAIL_AB(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); + } + + T1HA2_TAIL_ABCD(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); +} diff --git a/src/Crypto/t1ha2_selfcheck.c b/src/Crypto/t1ha2_selfcheck.c new file mode 100644 index 00000000..35e21916 --- /dev/null +++ b/src/Crypto/t1ha2_selfcheck.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, + * Fast Positive Hash. + * + * Portions Copyright (c) 2010-2018 Leonid Yuriev , + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * + * The Future will Positive. Всё будет хорошо. + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + + +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +/* *INDENT-OFF* */ +/* clang-format off */ + +const uint64_t t1ha_refval_2atonce[81] = { 0, + 0x772C7311BE32FF42, 0x444753D23F207E03, 0x71F6DF5DA3B4F532, 0x555859635365F660, + 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, + 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, + 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, + 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, + 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, + 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, + 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, + 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0x25A7201C85D9E2A3, 0x911573EDA15299AA, + 0x5C0062B669E18E4C, 0x17734ADE08D54E28, 0xFFF036E33883F43B, 0xFE0756E7777DF11E, + 0x37972472D023F129, 0x6CFCE201B55C7F57, 0xE019D1D89F02B3E1, 0xAE5CC580FA1BB7E6, + 0x295695FB7E59FC3A, 0x76B6C820A40DD35E, 0xB1680A1768462B17, 0x2FB6AF279137DADA, + 0x28FB6B4366C78535, 0xEC278E53924541B1, 0x164F8AAB8A2A28B5, 0xB6C330AEAC4578AD, + 0x7F6F371070085084, 0x94DEAD60C0F448D3, 0x99737AC232C559EF, 0x6F54A6F9CA8EDD57, + 0x979B01E926BFCE0C, 0xF7D20BC85439C5B4, 0x64EDB27CD8087C12, 0x11488DE5F79C0BE2, + 0x25541DDD1680B5A4, 0x8B633D33BE9D1973, 0x404A3113ACF7F6C6, 0xC59DBDEF8550CD56, + 0x039D23C68F4F992C, 0x5BBB48E4BDD6FD86, 0x41E312248780DF5A, 0xD34791CE75D4E94F, + 0xED523E5D04DCDCFF, 0x7A6BCE0B6182D879, 0x21FB37483CAC28D8, 0x19A1B66E8DA878AD, + 0x6F804C5295B09ABE, 0x2A4BE5014115BA81, 0xA678ECC5FC924BE0, 0x50F7A54A99A36F59, + 0x0FD7E63A39A66452, 0x5AB1B213DD29C4E4, 0xF3ED80D9DF6534C5, 0xC736B12EF90615FD +}; + +const uint64_t t1ha_refval_2atonce128[81] = { 0x4EC7F6A48E33B00A, + 0xB7B7FAA5BD7D8C1E, 0x3269533F66534A76, 0x6C3EC6B687923BFC, 0xC096F5E7EFA471A9, + 0x79D8AFB550CEA471, 0xCEE0507A20FD5119, 0xFB04CFFC14A9F4BF, 0xBD4406E923807AF2, + 0x375C02FF11010491, 0xA6EA4C2A59E173FF, 0xE0A606F0002CADDF, 0xE13BEAE6EBC07897, + 0xF069C2463E48EA10, 0x75BEE1A97089B5FA, 0x378F22F8DE0B8085, 0x9C726FC4D53D0D8B, + 0x71F6130A2D08F788, 0x7A9B20433FF6CF69, 0xFF49B7CD59BF6D61, 0xCCAAEE0D1CA9C6B3, + 0xC77889D86039D2AD, 0x7B378B5BEA9B0475, 0x6520BFA79D59AD66, 0x2441490CB8A37267, + 0xA715A66B7D5CF473, 0x9AE892C88334FD67, 0xD2FFE9AEC1D2169A, 0x790B993F18B18CBB, + 0xA0D02FBCF6A7B1AD, 0xA90833E6F151D0C1, 0x1AC7AFA37BD79BE0, 0xD5383628B2881A24, + 0xE5526F9D63F9F8F1, 0xC1F165A01A6D1F4D, 0x6CCEF8FF3FCFA3F2, 0x2030F18325E6DF48, + 0x289207230E3FB17A, 0x077B66F713A3C4B9, 0x9F39843CAF871754, 0x512FDA0F808ACCF3, + 0xF4D9801CD0CD1F14, 0x28A0C749ED323638, 0x94844CAFA671F01C, 0xD0E261876B8ACA51, + 0x8FC2A648A4792EA2, 0x8EF87282136AF5FE, 0x5FE6A54A9FBA6B40, 0xA3CC5B8FE6223D54, + 0xA8C3C0DD651BB01C, 0x625E9FDD534716F3, 0x1AB2604083C33AC5, 0xDE098853F8692F12, + 0x4B0813891BD87624, 0x4AB89C4553D182AD, 0x92C15AA2A3C27ADA, 0xFF2918D68191F5D9, + 0x06363174F641C325, 0x667112ADA74A2059, 0x4BD605D6B5E53D7D, 0xF2512C53663A14C8, + 0x21857BCB1852667C, 0xAFBEBD0369AEE228, 0x7049340E48FBFD6B, 0x50710E1924F46954, + 0x869A75E04A976A3F, 0x5A41ABBDD6373889, 0xA781778389B4B188, 0x21A3AFCED6C925B6, + 0x107226192EC10B42, 0x62A862E84EC2F9B1, 0x2B15E91659606DD7, 0x613934D1F9EC5A42, + 0x4DC3A96DC5361BAF, 0xC80BBA4CB5F12903, 0x3E3EDAE99A7D6987, 0x8F97B2D55941DCB0, + 0x4C9787364C3E4EC1, 0xEF0A2D07BEA90CA7, 0x5FABF32C70AEEAFB, 0x3356A5CFA8F23BF4 +}; + +const uint64_t t1ha_refval_2stream[81] = { 0x3C8426E33CB41606, + 0xFD74BE70EE73E617, 0xF43DE3CDD8A20486, 0x882FBCB37E8EA3BB, 0x1AA2CDD34CAA3D4B, + 0xEE755B2BFAE07ED5, 0xD4E225250D92E213, 0xA09B49083205965B, 0xD47B21724EF9EC9E, + 0xAC888FC3858CEE11, 0x94F820D85736F244, 0x1707951CCA920932, 0x8E0E45603F7877F0, + 0x9FD2592C0E3A7212, 0x9A66370F3AE3D427, 0xD33382D2161DE2B7, 0x9A35BE079DA7115F, + 0x73457C7FF58B4EC3, 0xBE8610BD53D7CE98, 0x65506DFE5CCD5371, 0x286A321AF9D5D9FA, + 0xB81EF9A7EF3C536D, 0x2CFDB5E6825C6E86, 0xB2A58CBFDFDD303A, 0xD26094A42B950635, + 0xA34D666A5F02AD9A, 0x0151E013EBCC72E5, 0x9254A6EA7FCB6BB5, 0x10C9361B3869DC2B, + 0xD7EC55A060606276, 0xA2FF7F8BF8976FFD, 0xB5181BB6852DCC88, 0x0EE394BB6178BAFF, + 0x3A8B4B400D21B89C, 0xEC270461970960FD, 0x615967FAB053877E, 0xFA51BF1CFEB4714C, + 0x29FDA8383070F375, 0xC3B663061BC52EDA, 0x192BBAF1F1A57923, 0x6D193B52F93C53AF, + 0x7F6F5639FE87CA1E, 0x69F7F9140B32EDC8, 0xD0F2416FB24325B6, 0x62C0E37FEDD49FF3, + 0x57866A4B809D373D, 0x9848D24BD935E137, 0xDFC905B66734D50A, 0x9A938DD194A68529, + 0x8276C44DF0625228, 0xA4B35D00AD67C0AB, 0x3D9CB359842DB452, 0x4241BFA8C23B267F, + 0x650FA517BEF15952, 0x782DE2ABD8C7B1E1, 0x4EAE456166CA3E15, 0x40CDF3A02614E337, + 0xAD84092C46102172, 0x0C68479B03F9A167, 0x7E1BA046749E181C, 0x3F3AB41A697382C1, + 0xC5E5DD6586EBFDC4, 0xFF926CD4EB02555C, 0x035CFE67F89E709B, 0x89F06AB6464A1B9D, + 0x8EFF58F3F7DEA758, 0x8B54AC657902089F, 0xC6C4F1F9F8DA4D64, 0xBDB729048AAAC93A, + 0xEA76BA628F5E5CD6, 0x742159B728B8A979, 0x6D151CD3C720E53D, 0xE97FFF9368FCDC42, + 0xCA5B38314914FBDA, 0xDD92C91D8B858EAE, 0x66E5F07CF647CBF2, 0xD4CF9B42F4985AFB, + 0x72AE17AC7D92F6B7, 0xB8206B22AB0472E1, 0x385876B5CFD42479, 0x03294A249EBE6B26 +}; + +const uint64_t t1ha_refval_2stream128[81] = { 0xCD2801D3B92237D6, + 0x10E4D47BD821546D, 0x9100704B9D65CD06, 0xD6951CB4016313EF, 0x24DB636F96F474DA, + 0x3F4AF7DF3C49E422, 0xBFF25B8AF143459B, 0xA157EC13538BE549, 0xD3F5F52C47DBD419, + 0x0EF3D7D735AF1575, 0x46B7B892823F7B1B, 0xEE22EA4655213289, 0x56AD76F02FE929BC, + 0x9CF6CD1AC886546E, 0xAF45CE47AEA0B933, 0x535F9DC09F3996B7, 0x1F0C3C01694AE128, + 0x18495069BE0766F7, 0x37E5FFB3D72A4CB1, 0x6D6C2E9299F30709, 0x4F39E693F50B41E3, + 0xB11FC4EF0658E116, 0x48BFAACB78E5079B, 0xE1B4C89C781B3AD0, 0x81D2F34888D333A1, + 0xF6D02270D2EA449C, 0xC884C3C2C3CE1503, 0x711AE16BA157A9B9, 0x1E6140C642558C9D, + 0x35AB3D238F5DC55B, 0x33F07B6AEF051177, 0xE57336776EEFA71C, 0x6D445F8318BA3752, + 0xD4F5F6631934C988, 0xD5E260085727C4A2, 0x5B54B41EC180B4FA, 0x7F5D75769C15A898, + 0xAE5A6DB850CA33C6, 0x038CCB8044663403, 0xDA16310133DC92B8, 0x6A2FFB7AB2B7CE2B, + 0xDC1832D9229BAE20, 0x8C62C479F5ABC9E4, 0x5EB7B617857C9CCB, 0xB79CF7D749A1E80D, + 0xDE7FAC3798324FD3, 0x8178911813685D06, 0x6A726CBD394D4410, 0x6CBE6B3280DA1113, + 0x6829BA4410CF1148, 0xFA7E417EB26C5BC6, 0x22ED87884D6E3A49, 0x15F1472D5115669D, + 0x2EA0B4C8BF69D318, 0xDFE87070AA545503, 0x6B4C14B5F7144AB9, 0xC1ED49C06126551A, + 0x351919FC425C3899, 0x7B569C0FA6F1BD3E, 0x713AC2350844CFFD, 0xE9367F9A638C2FF3, + 0x97F17D325AEA0786, 0xBCB907CC6CF75F91, 0x0CB7517DAF247719, 0xBE16093CC45BE8A9, + 0x786EEE97359AD6AB, 0xB7AFA4F326B97E78, 0x2694B67FE23E502E, 0x4CB492826E98E0B4, + 0x838D119F74A416C7, 0x70D6A91E4E5677FD, 0xF3E4027AD30000E6, 0x9BDF692795807F77, + 0x6A371F966E034A54, 0x8789CF41AE4D67EF, 0x02688755484D60AE, 0xD5834B3A4BF5CE42, + 0x9405FC61440DE25D, 0x35EB280A157979B6, 0x48D40D6A525297AC, 0x6A87DC185054BADA +}; + +/* *INDENT-ON* */ +/* clang-format on */ + +__cold int t1ha_selfcheck__t1ha2_atonce(void) { + return t1ha_selfcheck(t1ha2_atonce, t1ha_refval_2atonce); +} + +__cold static uint64_t thunk_atonce128(const void *data, size_t len, + uint64_t seed) { + uint64_t unused; + return t1ha2_atonce128(&unused, data, len, seed); +} + +__cold int t1ha_selfcheck__t1ha2_atonce128(void) { + return t1ha_selfcheck(thunk_atonce128, t1ha_refval_2atonce128); +} + +__cold static uint64_t thunk_stream(const void *data, size_t len, + uint64_t seed) { + t1ha_context_t ctx; + t1ha2_init(&ctx, seed, seed); + t1ha2_update(&ctx, data, len); + return t1ha2_final(&ctx, NULL); +} + +__cold static uint64_t thunk_stream128(const void *data, size_t len, + uint64_t seed) { + uint64_t unused; + t1ha_context_t ctx; + t1ha2_init(&ctx, seed, seed); + t1ha2_update(&ctx, data, len); + return t1ha2_final(&ctx, &unused); +} + +__cold int t1ha_selfcheck__t1ha2_stream(void) { + return t1ha_selfcheck(thunk_stream, t1ha_refval_2stream) | + t1ha_selfcheck(thunk_stream128, t1ha_refval_2stream128); +} + +__cold int t1ha_selfcheck__t1ha2(void) { + return t1ha_selfcheck__t1ha2_atonce() | t1ha_selfcheck__t1ha2_atonce128() | + t1ha_selfcheck__t1ha2_stream(); +} + diff --git a/src/Crypto/t1ha_bits.h b/src/Crypto/t1ha_bits.h new file mode 100644 index 00000000..b78c4129 --- /dev/null +++ b/src/Crypto/t1ha_bits.h @@ -0,0 +1,904 @@ +/* + * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, + * Fast Positive Hash. + * + * Portions Copyright (c) 2010-2018 Leonid Yuriev , + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * + * The Future will Positive. Всё будет хорошо. + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#pragma once + +#if defined(_MSC_VER) +#pragma warning(disable : 4201) /* nameless struct/union */ +#if _MSC_VER > 1800 +#pragma warning(disable : 4464) /* relative include path contains '..' */ +#endif /* 1800 */ +#endif /* MSVC */ +#include "t1ha.h" + +#ifndef T1HA_USE_FAST_ONESHOT_READ +/* Define it to 1 for little bit faster code. + * Unfortunately this may triggering a false-positive alarms from Valgrind, + * AddressSanitizer and other similar tool. + * So, define it to 0 for calmness if doubt. */ +#define T1HA_USE_FAST_ONESHOT_READ 1 +#endif /* T1HA_USE_FAST_ONESHOT_READ */ + +/*****************************************************************************/ + +#include /* for assert() */ +#include /* for memcpy() */ + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ && \ + __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ +#error Unsupported byte order. +#endif + +#define T1HA_UNALIGNED_ACCESS__UNABLE 0 +#define T1HA_UNALIGNED_ACCESS__SLOW 1 +#define T1HA_UNALIGNED_ACCESS__EFFICIENT 2 + +#ifndef T1HA_SYS_UNALIGNED_ACCESS +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT +#elif defined(__ia32__) +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT +#elif defined(__e2k__) +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__SLOW +#elif defined(__ARM_FEATURE_UNALIGNED) +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT +#else +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__UNABLE +#endif +#endif /* T1HA_SYS_UNALIGNED_ACCESS */ + +#define ALIGNMENT_16 2 +#define ALIGNMENT_32 4 +#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul +#define ALIGNMENT_64 8 +#else +#define ALIGNMENT_64 4 +#endif + +#ifndef PAGESIZE +#define PAGESIZE 4096 +#endif /* PAGESIZE */ + +/***************************************************************************/ + +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + +#ifndef __has_warning +#define __has_warning(x) (0) +#endif + +#ifndef __has_feature +#define __has_feature(x) (0) +#endif + +#ifndef __has_extension +#define __has_extension(x) (0) +#endif + +#ifndef __has_attribute +#define __has_attribute(x) (0) +#endif + +#if __has_feature(address_sanitizer) +#define __SANITIZE_ADDRESS__ 1 +#endif + +#ifndef __optimize +#if defined(__clang__) && !__has_attribute(optimize) +#define __optimize(ops) +#elif defined(__GNUC__) || __has_attribute(optimize) +#define __optimize(ops) __attribute__((optimize(ops))) +#else +#define __optimize(ops) +#endif +#endif /* __optimize */ + +#ifndef __cold +#if defined(__OPTIMIZE__) +#if defined(__e2k__) +#define __cold __optimize(1) __attribute__((cold)) +#elif defined(__clang__) && !__has_attribute(cold) +/* just put infrequently used functions in separate section */ +#define __cold __attribute__((section("text.unlikely"))) __optimize("Os") +#elif defined(__GNUC__) || __has_attribute(cold) +#define __cold __attribute__((cold)) __optimize("Os") +#else +#define __cold __optimize("Os") +#endif +#else +#define __cold +#endif +#endif /* __cold */ + + +#if defined(_MSC_VER) + +#pragma warning(push, 1) + +#include +#define likely(cond) (cond) +#define unlikely(cond) (cond) +#define unreachable() __assume(0) +#define bswap64(v) byteswap_64(v) +#define bswap32(v) byteswap_32(v) +#define bswap16(v) byteswap_16(v) +#define rot64(v, s) rotr64(v, s) +#define rot32(v, s) rotr32(v, s) +#define __always_inline __forceinline + +#ifdef TC_WINDOWS_DRIVER +#undef assert +#define assert ASSERT +#endif + +#if defined(_M_X64) || defined(_M_IA64) +#pragma intrinsic(_umul128) +#define mul_64x64_128(a, b, ph) _umul128(a, b, ph) +#endif + +#if defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) +#pragma intrinsic(__umulh) +#define mul_64x64_high(a, b) __umulh(a, b) +#endif + +#pragma warning(pop) +#pragma warning(disable : 4514) /* 'xyz': unreferenced inline function \ + has been removed */ +#pragma warning(disable : 4710) /* 'xyz': function not inlined */ +#pragma warning(disable : 4711) /* function 'xyz' selected for \ + automatic inline expansion */ +#pragma warning(disable : 4127) /* conditional expression is constant */ +#pragma warning(disable : 4702) /* unreachable code */ + +#define __GNUC_PREREQ(a,b) 0 +#define UINT64_C(value) value ## ULL + +#endif /* Compiler */ + +#ifndef likely +#define likely(cond) (cond) +#endif +#ifndef unlikely +#define unlikely(cond) (cond) +#endif +#ifndef __maybe_unused +#define __maybe_unused +#endif +#ifndef __always_inline +#define __always_inline __inline +#endif +#ifndef unreachable +#define unreachable() \ + do { \ + } while (1) +#endif + + + +#ifndef read_unaligned +#if defined(__GNUC__) || __has_attribute(packed) +typedef struct { + uint8_t unaligned_8; + uint16_t unaligned_16; + uint32_t unaligned_32; + uint64_t unaligned_64; +} __attribute__((packed)) t1ha_unaligned_proxy; +#define read_unaligned(ptr, bits) \ + (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \ + t1ha_unaligned_proxy, unaligned_##bits))) \ + ->unaligned_##bits) +#elif defined(_MSC_VER) +#pragma warning( \ + disable : 4235) /* nonstandard extension used: '__unaligned' \ + * keyword not supported on this architecture */ +#define read_unaligned(ptr, bits) (*(const __unaligned uint##bits##_t *)(ptr)) +#else +#pragma pack(push, 1) +typedef struct { + uint8_t unaligned_8; + uint16_t unaligned_16; + uint32_t unaligned_32; + uint64_t unaligned_64; +} t1ha_unaligned_proxy; +#pragma pack(pop) +#define read_unaligned(ptr, bits) \ + (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \ + t1ha_unaligned_proxy, unaligned_##bits))) \ + ->unaligned_##bits) +#endif +#endif /* read_unaligned */ + +#ifndef read_aligned +#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_assume_aligned) +#define read_aligned(ptr, bits) \ + (*(const uint##bits##_t *)__builtin_assume_aligned(ptr, ALIGNMENT_##bits)) +#elif (__GNUC_PREREQ(3, 3) || __has_attribute(aligned)) && !defined(__clang__) +#define read_aligned(ptr, bits) \ + (*(const uint##bits##_t __attribute__((aligned(ALIGNMENT_##bits))) *)(ptr)) +#elif __has_attribute(assume_aligned) + +static __always_inline const + uint16_t *__attribute__((assume_aligned(ALIGNMENT_16))) + cast_aligned_16(const void *ptr) { + return (const uint16_t *)ptr; +} +static __always_inline const + uint32_t *__attribute__((assume_aligned(ALIGNMENT_32))) + cast_aligned_32(const void *ptr) { + return (const uint32_t *)ptr; +} +static __always_inline const + uint64_t *__attribute__((assume_aligned(ALIGNMENT_64))) + cast_aligned_64(const void *ptr) { + return (const uint64_t *)ptr; +} + +#define read_aligned(ptr, bits) (*cast_aligned_##bits(ptr)) + +#elif defined(_MSC_VER) +#define read_aligned(ptr, bits) \ + (*(const __declspec(align(ALIGNMENT_##bits)) uint##bits##_t *)(ptr)) +#else +#define read_aligned(ptr, bits) (*(const uint##bits##_t *)(ptr)) +#endif +#endif /* read_aligned */ + +#ifndef prefetch +#if (__GNUC_PREREQ(4, 0) || __has_builtin(__builtin_prefetch)) && \ + !defined(__ia32__) +#define prefetch(ptr) __builtin_prefetch(ptr) +#elif defined(_M_ARM64) || defined(_M_ARM) +#define prefetch(ptr) __prefetch(ptr) +#else +#define prefetch(ptr) \ + do { \ + (void)(ptr); \ + } while (0) +#endif +#endif /* prefetch */ + +#if __has_warning("-Wconstant-logical-operand") +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wconstant-logical-operand" +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wconstant-logical-operand" +#else +#pragma warning disable "constant-logical-operand" +#endif +#endif /* -Wconstant-logical-operand */ + +#if __has_warning("-Wtautological-pointer-compare") +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wtautological-pointer-compare" +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wtautological-pointer-compare" +#else +#pragma warning disable "tautological-pointer-compare" +#endif +#endif /* -Wtautological-pointer-compare */ + +/***************************************************************************/ + +#if __GNUC_PREREQ(4, 0) +#pragma GCC visibility push(hidden) +#endif /* __GNUC_PREREQ(4,0) */ + +/*---------------------------------------------------------- Little Endian */ + +#ifndef fetch16_le_aligned +static __always_inline uint16_t fetch16_le_aligned(const void *v) { + assert(((uintptr_t)v) % ALIGNMENT_16 == 0); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_aligned(v, 16); +#else + return bswap16(read_aligned(v, 16)); +#endif +} +#endif /* fetch16_le_aligned */ + +#ifndef fetch16_le_unaligned +static __always_inline uint16_t fetch16_le_unaligned(const void *v) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + const uint8_t *p = (const uint8_t *)v; + return p[0] | (uint16_t)p[1] << 8; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_unaligned(v, 16); +#else + return bswap16(read_unaligned(v, 16)); +#endif +} +#endif /* fetch16_le_unaligned */ + +#ifndef fetch32_le_aligned +static __always_inline uint32_t fetch32_le_aligned(const void *v) { + assert(((uintptr_t)v) % ALIGNMENT_32 == 0); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_aligned(v, 32); +#else + return bswap32(read_aligned(v, 32)); +#endif +} +#endif /* fetch32_le_aligned */ + +#ifndef fetch32_le_unaligned +static __always_inline uint32_t fetch32_le_unaligned(const void *v) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + return fetch16_le_unaligned(v) | + (uint32_t)fetch16_le_unaligned((const uint8_t *)v + 2) << 16; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_unaligned(v, 32); +#else + return bswap32(read_unaligned(v, 32)); +#endif +} +#endif /* fetch32_le_unaligned */ + +#ifndef fetch64_le_aligned +static __always_inline uint64_t fetch64_le_aligned(const void *v) { + assert(((uintptr_t)v) % ALIGNMENT_64 == 0); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_aligned(v, 64); +#else + return bswap64(read_aligned(v, 64)); +#endif +} +#endif /* fetch64_le_aligned */ + +#ifndef fetch64_le_unaligned +static __always_inline uint64_t fetch64_le_unaligned(const void *v) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + return fetch32_le_unaligned(v) | + (uint64_t)fetch32_le_unaligned((const uint8_t *)v + 4) << 32; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_unaligned(v, 64); +#else + return bswap64(read_unaligned(v, 64)); +#endif +} +#endif /* fetch64_le_unaligned */ + +static __always_inline uint64_t tail64_le_aligned(const void *v, size_t tail) { + const uint8_t *const p = (const uint8_t *)v; +#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) + /* We can perform a 'oneshot' read, which is little bit faster. */ + const unsigned shift = ((8 - tail) & 7) << 3; + return fetch64_le_aligned(p) & ((~UINT64_C(0)) >> shift); +#else + uint64_t r = 0; + switch (tail & 7) { + default: + unreachable(); +/* fall through */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + /* For most CPUs this code is better when not needed byte reordering. */ + case 0: + return fetch64_le_aligned(p); + case 7: + r = (uint64_t)p[6] << 8; + /* fall through */ + case 6: + r += p[5]; + r <<= 8; + /* fall through */ + case 5: + r += p[4]; + r <<= 32; + /* fall through */ + case 4: + return r + fetch32_le_aligned(p); + case 3: + r = (uint64_t)p[2] << 16; + /* fall through */ + case 2: + return r + fetch16_le_aligned(p); + case 1: + return p[0]; +#else + case 0: + r = p[7] << 8; + /* fall through */ + case 7: + r += p[6]; + r <<= 8; + /* fall through */ + case 6: + r += p[5]; + r <<= 8; + /* fall through */ + case 5: + r += p[4]; + r <<= 8; + /* fall through */ + case 4: + r += p[3]; + r <<= 8; + /* fall through */ + case 3: + r += p[2]; + r <<= 8; + /* fall through */ + case 2: + r += p[1]; + r <<= 8; + /* fall through */ + case 1: + return r + p[0]; +#endif + } +#endif /* T1HA_USE_FAST_ONESHOT_READ */ +} + +#if T1HA_USE_FAST_ONESHOT_READ && \ + T1HA_SYS_UNALIGNED_ACCESS != T1HA_UNALIGNED_ACCESS__UNABLE && \ + defined(PAGESIZE) && PAGESIZE > 42 && !defined(__SANITIZE_ADDRESS__) +#define can_read_underside(ptr, size) \ + (((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0) +#endif /* T1HA_USE_FAST_ONESHOT_READ */ + +static __always_inline uint64_t tail64_le_unaligned(const void *v, + size_t tail) { + const uint8_t *p = (const uint8_t *)v; +#if defined(can_read_underside) && \ + (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) + /* On some systems (e.g. x86_64) we can perform a 'oneshot' read, which + * is little bit faster. Thanks Marcin Żukowski + * for the reminder. */ + const unsigned offset = (8 - tail) & 7; + const unsigned shift = offset << 3; + if (likely(can_read_underside(p, 8))) { + p -= offset; + return fetch64_le_unaligned(p) >> shift; + } + return fetch64_le_unaligned(p) & ((~UINT64_C(0)) >> shift); +#else + uint64_t r = 0; + switch (tail & 7) { + default: + unreachable(); +/* fall through */ +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 0: + return fetch64_le_unaligned(p); + case 7: + r = (uint64_t)p[6] << 8; + /* fall through */ + case 6: + r += p[5]; + r <<= 8; + /* fall through */ + case 5: + r += p[4]; + r <<= 32; + /* fall through */ + case 4: + return r + fetch32_le_unaligned(p); + case 3: + r = (uint64_t)p[2] << 16; + /* fall through */ + case 2: + return r + fetch16_le_unaligned(p); + case 1: + return p[0]; +#else + /* For most CPUs this code is better than a + * copying for alignment and/or byte reordering. */ + case 0: + r = p[7] << 8; + /* fall through */ + case 7: + r += p[6]; + r <<= 8; + /* fall through */ + case 6: + r += p[5]; + r <<= 8; + /* fall through */ + case 5: + r += p[4]; + r <<= 8; + /* fall through */ + case 4: + r += p[3]; + r <<= 8; + /* fall through */ + case 3: + r += p[2]; + r <<= 8; + /* fall through */ + case 2: + r += p[1]; + r <<= 8; + /* fall through */ + case 1: + return r + p[0]; +#endif + } +#endif /* can_read_underside */ +} + +/*------------------------------------------------------------- Big Endian */ + +#ifndef fetch16_be_aligned +static __maybe_unused __always_inline uint16_t +fetch16_be_aligned(const void *v) { + assert(((uintptr_t)v) % ALIGNMENT_16 == 0); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_aligned(v, 16); +#else + return bswap16(read_aligned(v, 16)); +#endif +} +#endif /* fetch16_be_aligned */ + +#ifndef fetch16_be_unaligned +static __maybe_unused __always_inline uint16_t +fetch16_be_unaligned(const void *v) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + const uint8_t *p = (const uint8_t *)v; + return (uint16_t)p[0] << 8 | p[1]; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_unaligned(v, 16); +#else + return bswap16(read_unaligned(v, 16)); +#endif +} +#endif /* fetch16_be_unaligned */ + +#ifndef fetch32_be_aligned +static __maybe_unused __always_inline uint32_t +fetch32_be_aligned(const void *v) { + assert(((uintptr_t)v) % ALIGNMENT_32 == 0); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_aligned(v, 32); +#else + return bswap32(read_aligned(v, 32)); +#endif +} +#endif /* fetch32_be_aligned */ + +#ifndef fetch32_be_unaligned +static __maybe_unused __always_inline uint32_t +fetch32_be_unaligned(const void *v) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + return (uint32_t)fetch16_be_unaligned(v) << 16 | + fetch16_be_unaligned((const uint8_t *)v + 2); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_unaligned(v, 32); +#else + return bswap32(read_unaligned(v, 32)); +#endif +} +#endif /* fetch32_be_unaligned */ + +#ifndef fetch64_be_aligned +static __maybe_unused __always_inline uint64_t +fetch64_be_aligned(const void *v) { + assert(((uintptr_t)v) % ALIGNMENT_64 == 0); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_aligned(v, 64); +#else + return bswap64(read_aligned(v, 64)); +#endif +} +#endif /* fetch64_be_aligned */ + +#ifndef fetch64_be_unaligned +static __maybe_unused __always_inline uint64_t +fetch64_be_unaligned(const void *v) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + return (uint64_t)fetch32_be_unaligned(v) << 32 | + fetch32_be_unaligned((const uint8_t *)v + 4); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_unaligned(v, 64); +#else + return bswap64(read_unaligned(v, 64)); +#endif +} +#endif /* fetch64_be_unaligned */ + +static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v, + size_t tail) { + const uint8_t *const p = (const uint8_t *)v; +#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) + /* We can perform a 'oneshot' read, which is little bit faster. */ + const unsigned shift = ((8 - tail) & 7) << 3; + return fetch64_be_aligned(p) >> shift; +#else + switch (tail & 7) { + default: + unreachable(); +/* fall through */ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* For most CPUs this code is better when not byte reordering. */ + case 1: + return p[0]; + case 2: + return fetch16_be_aligned(p); + case 3: + return (uint32_t)fetch16_be_aligned(p) << 8 | p[2]; + case 4: + return fetch32_be_aligned(p); + case 5: + return (uint64_t)fetch32_be_aligned(p) << 8 | p[4]; + case 6: + return (uint64_t)fetch32_be_aligned(p) << 16 | fetch16_be_aligned(p + 4); + case 7: + return (uint64_t)fetch32_be_aligned(p) << 24 | + (uint32_t)fetch16_be_aligned(p + 4) << 8 | p[6]; + case 0: + return fetch64_be_aligned(p); +#else + case 1: + return p[0]; + case 2: + return p[1] | (uint32_t)p[0] << 8; + case 3: + return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; + case 4: + return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | + (uint32_t)p[0] << 24; + case 5: + return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 | + (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32; + case 6: + return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 | + (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; + case 7: + return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | + (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | + (uint64_t)p[0] << 48; + case 0: + return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | + (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | + (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; +#endif + } +#endif /* T1HA_USE_FAST_ONESHOT_READ */ +} + +static __maybe_unused __always_inline uint64_t +tail64_be_unaligned(const void *v, size_t tail) { + const uint8_t *p = (const uint8_t *)v; +#if defined(can_read_underside) && \ + (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) + /* On some systems (e.g. x86_64) we can perform a 'oneshot' read, which + * is little bit faster. Thanks Marcin Żukowski + * for the reminder. */ + const unsigned offset = (8 - tail) & 7; + const unsigned shift = offset << 3; + if (likely(can_read_underside(p, 8))) { + p -= offset; + return fetch64_be_unaligned(p) & ((~UINT64_C(0)) >> shift); + } + return fetch64_be_unaligned(p) >> shift; +#else + switch (tail & 7) { + default: + unreachable(); +/* fall through */ +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 1: + return p[0]; + case 2: + return fetch16_be_unaligned(p); + case 3: + return (uint32_t)fetch16_be_unaligned(p) << 8 | p[2]; + case 4: + return fetch32_be(p); + case 5: + return (uint64_t)fetch32_be_unaligned(p) << 8 | p[4]; + case 6: + return (uint64_t)fetch32_be_unaligned(p) << 16 | + fetch16_be_unaligned(p + 4); + case 7: + return (uint64_t)fetch32_be_unaligned(p) << 24 | + (uint32_t)fetch16_be_unaligned(p + 4) << 8 | p[6]; + case 0: + return fetch64_be_unaligned(p); +#else + /* For most CPUs this code is better than a + * copying for alignment and/or byte reordering. */ + case 1: + return p[0]; + case 2: + return p[1] | (uint32_t)p[0] << 8; + case 3: + return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; + case 4: + return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | + (uint32_t)p[0] << 24; + case 5: + return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 | + (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32; + case 6: + return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 | + (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; + case 7: + return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | + (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | + (uint64_t)p[0] << 48; + case 0: + return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | + (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | + (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; +#endif + } +#endif /* can_read_underside */ +} + +/***************************************************************************/ + +#ifndef rot64 +static __always_inline uint64_t rot64(uint64_t v, unsigned s) { + return (v >> s) | (v << (64 - s)); +} +#endif /* rot64 */ + +#ifndef mul_32x32_64 +static __always_inline uint64_t mul_32x32_64(uint32_t a, uint32_t b) { + return a * (uint64_t)b; +} +#endif /* mul_32x32_64 */ + +#ifndef add64carry_first +static __maybe_unused __always_inline unsigned +add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { +#if __has_builtin(__builtin_addcll) + unsigned long long carryout; + *sum = __builtin_addcll(base, addend, 0, &carryout); + return (unsigned)carryout; +#else + *sum = base + addend; + return *sum < addend; +#endif /* __has_builtin(__builtin_addcll) */ +} +#endif /* add64carry_fist */ + +#ifndef add64carry_next +static __maybe_unused __always_inline unsigned +add64carry_next(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { +#if __has_builtin(__builtin_addcll) + unsigned long long carryout; + *sum = __builtin_addcll(base, addend, carry, &carryout); + return (unsigned)carryout; +#else + *sum = base + addend + carry; + return *sum < addend || (carry && *sum == addend); +#endif /* __has_builtin(__builtin_addcll) */ +} +#endif /* add64carry_next */ + +#ifndef add64carry_last +static __maybe_unused __always_inline void +add64carry_last(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { +#if __has_builtin(__builtin_addcll) + unsigned long long carryout; + *sum = __builtin_addcll(base, addend, carry, &carryout); + (void)carryout; +#else + *sum = base + addend + carry; +#endif /* __has_builtin(__builtin_addcll) */ +} +#endif /* add64carry_last */ + +#ifndef mul_64x64_128 +static __maybe_unused __always_inline uint64_t mul_64x64_128(uint64_t a, + uint64_t b, + uint64_t *h) { +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + __uint128_t r = (__uint128_t)a * (__uint128_t)b; + /* modern GCC could nicely optimize this */ + *h = (uint64_t)(r >> 64); + return (uint64_t)r; +#elif defined(mul_64x64_high) + *h = mul_64x64_high(a, b); + return a * b; +#else + /* performs 64x64 to 128 bit multiplication */ + const uint64_t ll = mul_32x32_64((uint32_t)a, (uint32_t)b); + const uint64_t lh = mul_32x32_64(a >> 32, (uint32_t)b); + const uint64_t hl = mul_32x32_64((uint32_t)a, b >> 32); + const uint64_t hh = mul_32x32_64(a >> 32, b >> 32); + + /* Few simplification are possible here for 32-bit architectures, + * but thus we would lost compatibility with the original 64-bit + * version. Think is very bad idea, because then 32-bit t1ha will + * still (relatively) very slowly and well yet not compatible. */ + uint64_t l; + add64carry_last(add64carry_first(ll, lh << 32, &l), hh, lh >> 32, h); + add64carry_last(add64carry_first(l, hl << 32, &l), *h, hl >> 32, h); + return l; +#endif +} +#endif /* mul_64x64_128() */ + +#ifndef mul_64x64_high +static __maybe_unused __always_inline uint64_t mul_64x64_high(uint64_t a, + uint64_t b) { + uint64_t h; + mul_64x64_128(a, b, &h); + return h; +} +#endif /* mul_64x64_high */ + +/***************************************************************************/ + +/* 'magic' primes */ +static const uint64_t prime_0 = UINT64_C(0xEC99BF0D8372CAAB); +static const uint64_t prime_1 = UINT64_C(0x82434FE90EDCEF39); +static const uint64_t prime_2 = UINT64_C(0xD4F06DB99D67BE4B); +static const uint64_t prime_3 = UINT64_C(0xBD9CACC22C6E9571); +static const uint64_t prime_4 = UINT64_C(0x9C06FAF4D023E3AB); +static const uint64_t prime_5 = UINT64_C(0xC060724A8424F345); +static const uint64_t prime_6 = UINT64_C(0xCB5AF53AE3AAAC31); + +/* xor high and low parts of full 128-bit product */ +static __maybe_unused __always_inline uint64_t mux64(uint64_t v, + uint64_t prime) { + uint64_t l, h; + l = mul_64x64_128(v, prime, &h); + return l ^ h; +} + +static __always_inline uint64_t final64(uint64_t a, uint64_t b) { + uint64_t x = (a + rot64(b, 41)) * prime_0; + uint64_t y = (rot64(a, 23) + b) * prime_6; + return mux64(x ^ y, prime_5); +} + +static __always_inline void mixup64(uint64_t *__restrict a, + uint64_t *__restrict b, uint64_t v, + uint64_t prime) { + uint64_t h; + *a ^= mul_64x64_128(*b + v, prime, &h); + *b += h; +} diff --git a/src/Crypto/t1ha_selfcheck.c b/src/Crypto/t1ha_selfcheck.c new file mode 100644 index 00000000..51d02912 --- /dev/null +++ b/src/Crypto/t1ha_selfcheck.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, + * Fast Positive Hash. + * + * Portions Copyright (c) 2010-2018 Leonid Yuriev , + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * + * The Future will Positive. Всё будет хорошо. + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#include "t1ha_selfcheck.h" +#include "t1ha_bits.h" + +const uint8_t t1ha_test_pattern[64] = { + 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x7F, 0x3F, + 0x1F, 0xF, 8, 16, 32, 64, 0x80, 0xFE, 0xFC, 0xF8, 0xF0, + 0xE0, 0xC0, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x55, 0xAA, 11, + 17, 19, 23, 29, 37, 42, 43, 'a', 'b', 'c', 'd', + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x'}; + +static VC_INLINE int probe(uint64_t (*hash)(const void *, size_t, uint64_t), + const uint64_t reference, const void *data, + unsigned len, uint64_t seed) { + const uint64_t actual = hash(data, len, seed); + assert(actual == reference); + return actual != reference; +} + +__cold int t1ha_selfcheck(uint64_t (*hash)(const void *, size_t, uint64_t), + const uint64_t *reference_values) { + int failed = 0; + uint64_t seed = 1; + const uint64_t zero = 0; + uint8_t pattern_long[512]; + int i; + failed |= probe(hash, /* empty-zero */ *reference_values++, NULL, 0, zero); + failed |= probe(hash, /* empty-all1 */ *reference_values++, NULL, 0, ~zero); + failed |= probe(hash, /* bin64-zero */ *reference_values++, t1ha_test_pattern, + 64, zero); + + for (i = 1; i < 64; i++) { + /* bin%i-1p%i */ + failed |= probe(hash, *reference_values++, t1ha_test_pattern, i, seed); + seed <<= 1; + } + + seed = ~zero; + for (i = 1; i <= 7; i++) { + seed <<= 1; + /* align%i_F%i */; + failed |= + probe(hash, *reference_values++, t1ha_test_pattern + i, 64 - i, seed); + } + + + for (i = 0; i < sizeof(pattern_long); ++i) + pattern_long[i] = (uint8_t)i; + for (i = 0; i <= 7; i++) { + /* long-%05i */ + failed |= + probe(hash, *reference_values++, pattern_long + i, 128 + i * 17, seed); + } + + return failed ? -1 : 0; +} diff --git a/src/Crypto/t1ha_selfcheck.h b/src/Crypto/t1ha_selfcheck.h new file mode 100644 index 00000000..943bf2d2 --- /dev/null +++ b/src/Crypto/t1ha_selfcheck.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016-2018 Positive Technologies, https://www.ptsecurity.com, + * Fast Positive Hash. + * + * Portions Copyright (c) 2010-2018 Leonid Yuriev , + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * + * The Future will Positive. Всё будет хорошо. + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#pragma once +#if defined(_MSC_VER) && _MSC_VER > 1800 +#pragma warning(disable : 4464) /* relative include path contains '..' */ +#endif /* MSVC */ +#include "t1ha.h" + +/***************************************************************************/ +/* Self-checking */ + +extern const uint8_t t1ha_test_pattern[64]; +int t1ha_selfcheck(uint64_t (*hash)(const void *, size_t, uint64_t), + const uint64_t *reference_values); + +#ifndef T1HA2_DISABLED +extern const uint64_t t1ha_refval_2atonce[81]; +extern const uint64_t t1ha_refval_2atonce128[81]; +extern const uint64_t t1ha_refval_2stream[81]; +extern const uint64_t t1ha_refval_2stream128[81]; +#endif /* T1HA2_DISABLED */ + +#ifndef T1HA1_DISABLED +extern const uint64_t t1ha_refval_64le[81]; +extern const uint64_t t1ha_refval_64be[81]; +#endif /* T1HA1_DISABLED */ + +#ifndef T1HA0_DISABLED +extern const uint64_t t1ha_refval_32le[81]; +extern const uint64_t t1ha_refval_32be[81]; +#if T1HA0_AESNI_AVAILABLE +extern const uint64_t t1ha_refval_ia32aes_a[81]; +extern const uint64_t t1ha_refval_ia32aes_b[81]; +#endif /* T1HA0_AESNI_AVAILABLE */ +#endif /* T1HA0_DISABLED */ diff --git a/src/Driver/DriveFilter.c b/src/Driver/DriveFilter.c index 3c7687f8..5fbacac4 100644 --- a/src/Driver/DriveFilter.c +++ b/src/Driver/DriveFilter.c @@ -94,9 +94,9 @@ NTSTATUS LoadBootArguments () bootArgsAddr.QuadPart = BootArgsRegions[bootLoaderArgsIndex] + TC_BOOT_LOADER_ARGS_OFFSET; Dump ("Checking BootArguments at 0x%x\n", bootArgsAddr.LowPart); - mappedBootArgs = MmMapIoSpace (bootArgsAddr, sizeof (BootArguments), MmCached); - if (!mappedBootArgs) - return STATUS_INSUFFICIENT_RESOURCES; + mappedBootArgs = MmMapIoSpace (bootArgsAddr, sizeof (BootArguments), MmCached); + if (!mappedBootArgs) + return STATUS_INSUFFICIENT_RESOURCES; if (TC_IS_BOOT_ARGUMENTS_SIGNATURE (mappedBootArgs)) { @@ -639,6 +639,12 @@ static NTSTATUS MountDrive (DriveFilterExtension *Extension, Password *password, } else Extension->Queue.MaxReadAheadOffset = BootDriveLength; + + /* encrypt keys */ +#ifdef _WIN64 + VcProtectKeys (Extension->HeaderCryptoInfo, VcGetEncryptionID (Extension->HeaderCryptoInfo)); + VcProtectKeys (Extension->Queue.CryptoInfo, VcGetEncryptionID (Extension->Queue.CryptoInfo)); +#endif status = EncryptedIoQueueStart (&Extension->Queue); if (!NT_SUCCESS (status)) @@ -710,8 +716,18 @@ static NTSTATUS SaveDriveVolumeHeader (DriveFilterExtension *Extension) uint32 headerCrc32; uint64 encryptedAreaLength = Extension->Queue.EncryptedAreaEnd + 1 - Extension->Queue.EncryptedAreaStart; byte *fieldPos = header + TC_HEADER_OFFSET_ENCRYPTED_AREA_LENGTH; + PCRYPTO_INFO pCryptoInfo = Extension->HeaderCryptoInfo; +#ifdef _WIN64 + CRYPTO_INFO tmpCI; + if (IsRamEncryptionEnabled()) + { + memcpy (&tmpCI, pCryptoInfo, sizeof (CRYPTO_INFO)); + VcUnprotectKeys (&tmpCI, VcGetEncryptionID (pCryptoInfo)); + pCryptoInfo = &tmpCI; + } +#endif - DecryptBuffer (header + HEADER_ENCRYPTED_DATA_OFFSET, HEADER_ENCRYPTED_DATA_SIZE, Extension->HeaderCryptoInfo); + DecryptBuffer (header + HEADER_ENCRYPTED_DATA_OFFSET, HEADER_ENCRYPTED_DATA_SIZE, pCryptoInfo); if (GetHeaderField32 (header, TC_HEADER_OFFSET_MAGIC) != 0x56455241) { @@ -726,7 +742,13 @@ static NTSTATUS SaveDriveVolumeHeader (DriveFilterExtension *Extension) fieldPos = header + TC_HEADER_OFFSET_HEADER_CRC; mputLong (fieldPos, headerCrc32); - EncryptBuffer (header + HEADER_ENCRYPTED_DATA_OFFSET, HEADER_ENCRYPTED_DATA_SIZE, Extension->HeaderCryptoInfo); + EncryptBuffer (header + HEADER_ENCRYPTED_DATA_OFFSET, HEADER_ENCRYPTED_DATA_SIZE, pCryptoInfo); +#ifdef _WIN64 + if (IsRamEncryptionEnabled()) + { + burn (&tmpCI, sizeof (CRYPTO_INFO)); + } +#endif } status = TCWriteDevice (Extension->LowerDeviceObject, header, offset, TC_BOOT_ENCRYPTION_VOLUME_HEADER_SIZE); @@ -961,6 +983,9 @@ static NTSTATUS DispatchPower (PDEVICE_OBJECT DeviceObject, PIRP Irp, DriveFilte && irpSp->Parameters.Power.Type == DevicePowerState) { DismountDrive (Extension, TRUE); +#ifdef _WIN64 + ClearSecurityParameters (); +#endif } PoStartNextPowerIrp (Irp); @@ -1087,6 +1112,10 @@ void EmergencyClearAllKeys (PIRP irp, PIO_STACK_LOCATION irpSp) if (BootDriveFound && BootDriveFilterExtension && BootDriveFilterExtension->DriveMounted) InvalidateDriveFilterKeys (BootDriveFilterExtension); +#ifdef _WIN64 + ClearSecurityParameters(); +#endif + irp->IoStatus.Status = STATUS_SUCCESS; } } @@ -1139,9 +1168,22 @@ void ReopenBootVolumeHeader (PIRP irp, PIO_STACK_LOCATION irpSp) goto ret; } +#ifdef _WIN64 + if (IsRamEncryptionEnabled()) + { + VcUnprotectKeys (BootDriveFilterExtension->HeaderCryptoInfo, VcGetEncryptionID (BootDriveFilterExtension->HeaderCryptoInfo)); + } +#endif + if (ReadVolumeHeader (!BootDriveFilterExtension->HiddenSystem, header, &request->VolumePassword, request->pkcs5_prf, request->pim, FALSE, NULL, BootDriveFilterExtension->HeaderCryptoInfo) == 0) { Dump ("Header reopened\n"); +#ifdef _WIN64 + if (IsRamEncryptionEnabled()) + { + VcProtectKeys (BootDriveFilterExtension->HeaderCryptoInfo, VcGetEncryptionID(BootDriveFilterExtension->HeaderCryptoInfo)); + } +#endif ComputeBootLoaderFingerprint (BootDriveFilterExtension->LowerDeviceObject, header); BootDriveFilterExtension->Queue.CryptoInfo->pkcs5 = BootDriveFilterExtension->HeaderCryptoInfo->pkcs5; @@ -1258,7 +1300,7 @@ static NTSTATUS HiberDriverWriteFunctionFilter (int filterNumber, PLARGE_INTEGER if (BootDriveFilterExtension->Queue.RemapEncryptedArea) dataUnit.Value += BootDriveFilterExtension->Queue.RemappedAreaDataUnitOffset; - EncryptDataUnitsCurrentThread (HibernationWriteBuffer + (intersectStart - offset), + EncryptDataUnitsCurrentThreadEx (HibernationWriteBuffer + (intersectStart - offset), &dataUnit, intersectLength / ENCRYPTION_DATA_UNIT_SIZE, BootDriveFilterExtension->Queue.CryptoInfo); diff --git a/src/Driver/Driver.vcxproj b/src/Driver/Driver.vcxproj index 5744934b..58a21d3d 100644 --- a/src/Driver/Driver.vcxproj +++ b/src/Driver/Driver.vcxproj @@ -201,6 +201,9 @@ BuildDriver.cmd -rebuild -debug -x64 "$(SolutionDir)\Common" "$(SolutionDir)\Cry + + + diff --git a/src/Driver/Driver.vcxproj.filters b/src/Driver/Driver.vcxproj.filters index a6f5da3c..f260fb56 100644 --- a/src/Driver/Driver.vcxproj.filters +++ b/src/Driver/Driver.vcxproj.filters @@ -126,6 +126,15 @@ Source Files\Crypto + + Source Files\Crypto + + + Source Files\Crypto + + + Source Files\Crypto + diff --git a/src/Driver/DumpFilter.c b/src/Driver/DumpFilter.c index 2b58d061..fc1c7d37 100644 --- a/src/Driver/DumpFilter.c +++ b/src/Driver/DumpFilter.c @@ -218,7 +218,7 @@ static NTSTATUS DumpFilterWrite (PFILTER_EXTENSION filterExtension, PLARGE_INTEG dataUnit.Value += BootDriveFilterExtension->Queue.RemappedAreaDataUnitOffset; } - EncryptDataUnitsCurrentThread (WriteFilterBuffer + (intersectStart - offset), + EncryptDataUnitsCurrentThreadEx (WriteFilterBuffer + (intersectStart - offset), &dataUnit, intersectLength / ENCRYPTION_DATA_UNIT_SIZE, BootDriveFilterExtension->Queue.CryptoInfo); diff --git a/src/Driver/Ntdriver.c b/src/Driver/Ntdriver.c index ba2de477..97fb1bf1 100644 --- a/src/Driver/Ntdriver.c +++ b/src/Driver/Ntdriver.c @@ -287,6 +287,24 @@ NTSTATUS DriverEntry (PDRIVER_OBJECT DriverObject, PUNICODE_STRING RegistryPath) TCfree (startKeyValue); } +#ifdef _WIN64 + if ((OsMajorVersion > 6) || (OsMajorVersion == 6 && OsMinorVersion >= 1)) + { + // we enable RAM encryption only starting from Windows 7 + if (IsRamEncryptionEnabled()) + { + if (t1ha_selfcheck__t1ha2() != 0) + TC_BUG_CHECK (STATUS_INVALID_PARAMETER); + if (!InitializeSecurityParameters(GetDriverRandomSeed)) + TC_BUG_CHECK (STATUS_INVALID_PARAMETER); + } + } + else + { + EnableRamEncryption (FALSE); + } +#endif + for (i = 0; i <= IRP_MJ_MAXIMUM_FUNCTION; ++i) { DriverObject->MajorFunction[i] = TCDispatchQueueIRP; @@ -326,7 +344,7 @@ NTSTATUS DriverAddDevice (PDRIVER_OBJECT driverObject, PDEVICE_OBJECT pdo) return DriveFilterAddDevice (driverObject, pdo); } - +#if defined (DEBUG) || defined (DEBUG_TRACE) // Dumps a memory region to debug output void DumpMemory (void *mem, int size) { @@ -351,6 +369,7 @@ void DumpMemory (void *mem, int size) m+=8; } } +#endif BOOL IsAllZeroes (unsigned char* pbData, DWORD dwDataLen) { @@ -4453,6 +4472,8 @@ NTSTATUS ReadRegistryConfigFlags (BOOL driverEntry) flags ^= VC_DRIVER_CONFIG_CLEAR_KEYS_ON_NEW_DEVICE_INSERTION; WriteRegistryConfigFlags (flags); } + + EnableRamEncryption ((flags & VC_DRIVER_CONFIG_ENABLE_RAM_ENCRYPTION) ? TRUE : FALSE); } EnableHwEncryption ((flags & TC_DRIVER_CONFIG_DISABLE_HARDWARE_ENCRYPTION) ? FALSE : TRUE); diff --git a/src/Driver/Ntvol.c b/src/Driver/Ntvol.c index a317d8be..ab9370f7 100644 --- a/src/Driver/Ntvol.c +++ b/src/Driver/Ntvol.c @@ -591,6 +591,10 @@ NTSTATUS TCOpenVolume (PDEVICE_OBJECT DeviceObject, mount->VolumePim, mount->bTrueCryptMode, &Extension->cryptoInfo); +#ifdef _WIN64 + if (IsRamEncryptionEnabled()) + VcProtectKeys (Extension->cryptoInfo, VcGetEncryptionID (Extension->cryptoInfo)); +#endif } ReadVolumeHeaderRecoveryMode = FALSE; diff --git a/src/Mount/Mount.c b/src/Mount/Mount.c index 5f12baa5..cc3ae11e 100644 --- a/src/Mount/Mount.c +++ b/src/Mount/Mount.c @@ -11125,6 +11125,16 @@ static BOOL CALLBACK PerformanceSettingsDlgProc (HWND hwndDlg, UINT msg, WPARAM EnableWindow (GetDlgItem (hwndDlg, IDC_ENABLE_CPU_RNG), FALSE); } + if (IsOSAtLeast (WIN_7) && IsRamEncryptionSupported()) + { + CheckDlgButton (hwndDlg, IDC_ENABLE_RAM_ENCRYPTION, (driverConfig & VC_DRIVER_CONFIG_ENABLE_RAM_ENCRYPTION) ? BST_CHECKED : BST_UNCHECKED); + } + else + { + CheckDlgButton (hwndDlg, IDC_ENABLE_RAM_ENCRYPTION, BST_UNCHECKED); + EnableWindow (GetDlgItem (hwndDlg, IDC_ENABLE_RAM_ENCRYPTION), FALSE); + } + SYSTEM_INFO sysInfo; GetSystemInfo (&sysInfo); @@ -11181,6 +11191,7 @@ static BOOL CALLBACK PerformanceSettingsDlgProc (HWND hwndDlg, UINT msg, WPARAM BOOL disableHW = !IsDlgButtonChecked (hwndDlg, IDC_ENABLE_HARDWARE_ENCRYPTION); BOOL enableCpuRng = IsDlgButtonChecked (hwndDlg, IDC_ENABLE_CPU_RNG); + BOOL enableRamEncryption = IsDlgButtonChecked (hwndDlg, IDC_ENABLE_RAM_ENCRYPTION); BOOL enableExtendedIOCTL = IsDlgButtonChecked (hwndDlg, IDC_ENABLE_EXTENDED_IOCTL_SUPPORT); BOOL allowTrimCommand = IsDlgButtonChecked (hwndDlg, IDC_ALLOW_TRIM_NONSYS_SSD); BOOL allowWindowsDefrag = IsDlgButtonChecked (hwndDlg, IDC_ALLOW_WINDOWS_DEFRAG); @@ -11224,6 +11235,8 @@ static BOOL CALLBACK PerformanceSettingsDlgProc (HWND hwndDlg, UINT msg, WPARAM if (IsOSAtLeast (WIN_8_1)) SetDriverConfigurationFlag (VC_DRIVER_CONFIG_ALLOW_WINDOWS_DEFRAG, allowWindowsDefrag); SetDriverConfigurationFlag (VC_DRIVER_CONFIG_ENABLE_CPU_RNG, enableCpuRng); + if (IsOSAtLeast (WIN_7)) + SetDriverConfigurationFlag (VC_DRIVER_CONFIG_ENABLE_RAM_ENCRYPTION, enableRamEncryption); DWORD bytesReturned; if (!DeviceIoControl (hDriver, TC_IOCTL_REREAD_DRIVER_CONFIG, NULL, 0, NULL, 0, &bytesReturned, NULL)) diff --git a/src/Mount/Mount.rc b/src/Mount/Mount.rc index c5a35b46..6082d2cd 100644 --- a/src/Mount/Mount.rc +++ b/src/Mount/Mount.rc @@ -311,7 +311,7 @@ BEGIN "Button",BS_AUTOCHECKBOX | WS_TABSTOP,16,112,340,10 END -IDD_PERFORMANCE_SETTINGS DIALOGEX 0, 0, 371, 279 +IDD_PERFORMANCE_SETTINGS DIALOGEX 0, 0, 371, 293 STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | DS_CENTER | WS_POPUP | WS_CAPTION | WS_SYSMENU CAPTION "VeraCrypt - Performance Options" FONT 8, "MS Shell Dlg", 400, 0, 0x1 @@ -329,17 +329,19 @@ BEGIN "Button",BS_AUTOCHECKBOX | WS_TABSTOP,18,198,337,10 CONTROL "Allow TRIM command for non-system SSD partition/drive",IDC_ALLOW_TRIM_NONSYS_SSD, "Button",BS_AUTOCHECKBOX | WS_TABSTOP,18,212,337,10 - PUSHBUTTON "&Benchmark",IDC_BENCHMARK,7,258,59,14 - DEFPUSHBUTTON "OK",IDOK,257,258,50,14 - PUSHBUTTON "Cancel",IDCANCEL,314,258,50,14 + PUSHBUTTON "&Benchmark",IDC_BENCHMARK,7,272,59,14 + DEFPUSHBUTTON "OK",IDOK,257,272,50,14 + PUSHBUTTON "Cancel",IDCANCEL,314,272,50,14 LTEXT "Processor (CPU) in this computer supports hardware acceleration for AES:",IDT_HW_AES_SUPPORTED_BY_CPU,18,23,273,9 GROUPBOX "Hardware Acceleration",IDT_ACCELERATION_OPTIONS,7,6,355,74 GROUPBOX "Thread-Based Parallelization",IDT_PARALLELIZATION_OPTIONS,7,84,355,93 - GROUPBOX "Driver Configuration",IDT_DRIVER_OPTIONS,7,183,357,72 + GROUPBOX "Driver Configuration",IDT_DRIVER_OPTIONS,7,183,357,86 CONTROL "Allow Windows Disk Defragmenter to defragment non-system partition/drive",IDC_ALLOW_WINDOWS_DEFRAG, "Button",BS_AUTOCHECKBOX | WS_TABSTOP,18,226,337,10 CONTROL "Use CPU hardware random generator as an additional source of entropy",IDC_ENABLE_CPU_RNG, "Button",BS_AUTOCHECKBOX | WS_TABSTOP,18,240,335,10 + CONTROL "Activate encryption of keys and passwords stored in RAM",IDC_ENABLE_RAM_ENCRYPTION, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,18,254,337,10 END IDD_FAVORITE_VOLUMES DIALOGEX 0, 0, 380, 368 @@ -508,7 +510,7 @@ BEGIN LEFTMARGIN, 7 RIGHTMARGIN, 364 TOPMARGIN, 7 - BOTTOMMARGIN, 272 + BOTTOMMARGIN, 286 END IDD_FAVORITE_VOLUMES, DIALOG diff --git a/src/Mount/Resource.h b/src/Mount/Resource.h index fdc9f890..f0903cab 100644 --- a/src/Mount/Resource.h +++ b/src/Mount/Resource.h @@ -193,6 +193,7 @@ #define IDC_LOWER_BOX 1170 #define IDC_CLEAR_KEYS_ON_NEW_DEVICE_INSERTION 1171 #define IDC_ENABLE_CPU_RNG 1172 +#define IDC_ENABLE_RAM_ENCRYPTION 1173 #define IDM_HELP 40001 #define IDM_ABOUT 40002 #define IDM_UNMOUNT_VOLUME 40003 @@ -269,7 +270,7 @@ #define _APS_NO_MFC 1 #define _APS_NEXT_RESOURCE_VALUE 120 #define _APS_NEXT_COMMAND_VALUE 40069 -#define _APS_NEXT_CONTROL_VALUE 1173 +#define _APS_NEXT_CONTROL_VALUE 1174 #define _APS_NEXT_SYMED_VALUE 101 #endif #endif -- cgit v1.2.3