diff options
Diffstat (limited to 'src/Crypto')
-rw-r--r-- | src/Crypto/Sha2Intel.c | 60 | ||||
-rw-r--r-- | src/Crypto/Whirlpool.c | 51 | ||||
-rw-r--r-- | src/Crypto/chacha-xmm.c | 39 | ||||
-rw-r--r-- | src/Crypto/config.h | 2 | ||||
-rw-r--r-- | src/Crypto/cpu.c | 57 | ||||
-rw-r--r-- | src/Crypto/wolfCrypt.c | 4 |
6 files changed, 93 insertions, 120 deletions
diff --git a/src/Crypto/Sha2Intel.c b/src/Crypto/Sha2Intel.c index c926f76a..943115bf 100644 --- a/src/Crypto/Sha2Intel.c +++ b/src/Crypto/Sha2Intel.c @@ -1,53 +1,113 @@ /* * Support for SHA-256 x86 instrinsic * Based on public domain code by Sean Gulley * (https://github.com/mitls/hacl-star/tree/master/experimental/hash) * * Botan is released under the Simplified BSD License (see license.txt) */ /* November 10th 2024: Modified for VeraCrypt */ #include "Sha2.h" #include "Common/Endian.h" #include "cpu.h" #include "misc.h" #if defined(_UEFI) || defined(CRYPTOPP_DISABLE_ASM) #define NO_OPTIMIZED_VERSIONS #endif #ifndef NO_OPTIMIZED_VERSIONS #if CRYPTOPP_SHANI_AVAILABLE +#ifndef _MSC_VER +#include <signal.h> +#include <setjmp.h> + +typedef void (*SigHandler)(int); + +static jmp_buf s_jmpNoSHA; +static void SigIllHandlerSHA(int p) +{ + longjmp(s_jmpNoSHA, 1); +} +#endif + +int TrySHA256() +{ + volatile int result = 0; +#ifdef _MSC_VER + __try +#else + SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA); + if (oldHandler == SIG_ERR) + return 0; + if (setjmp(s_jmpNoSHA)) + result = 0; + else +#endif + { + // Known input message block + __m128i msg0 = _mm_setr_epi32(0x12345678, 0x9ABCDEF0, 0x87654321, 0x0FEDCBA9); + __m128i msg1 = _mm_setr_epi32(0x11111111, 0x22222222, 0x33333333, 0x44444444); + + // SHA256 message schedule update + __m128i tmp = _mm_sha256msg1_epu32(msg0, msg1); + + // Verify result - these values were pre-computed for the given input +#ifdef _MSC_VER + if (tmp.m128i_u32[0] == 0xD8131B44 && + tmp.m128i_u32[1] == 0x9DE6E22B && + tmp.m128i_u32[2] == 0xA86D643A && + tmp.m128i_u32[3] == 0x74320FED) +#else + if (((uint32_t*)(&tmp))[0] == 0xD8131B44 && + ((uint32_t*)(&tmp))[1] == 0x9DE6E22B && + ((uint32_t*)(&tmp))[2] == 0xA86D643A && + ((uint32_t*)(&tmp))[3] == 0x74320FED) +#endif + result = 1; + } +#ifdef _MSC_VER + __except (EXCEPTION_EXECUTE_HANDLER) + { + // ignore error if SHA instructions not supported + } +#else + signal(SIGILL, oldHandler); +#endif + + return result; +} + // void sha256_intel(void *mp, uint_32t state[8], uint_64t num_blks) { // Constants table - align for better performance CRYPTOPP_ALIGN_DATA(64) static const uint_32t K[64] = { 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2, }; const __m128i* K_mm = (const __m128i*)K; const __m128i* input_mm = (const __m128i*)mp; // Create byte shuffle mask for big-endian to little-endian conversion const __m128i MASK = _mm_set_epi64x(0x0c0d0e0f08090a0b, 0x0405060700010203); // Load initial values __m128i STATE0 = _mm_loadu_si128((__m128i*)&state[0]); __m128i STATE1 = _mm_loadu_si128((__m128i*)&state[4]); // Adjust byte ordering STATE0 = _mm_shuffle_epi32(STATE0, 0xB1); // CDAB STATE1 = _mm_shuffle_epi32(STATE1, 0x1B); // EFGH diff --git a/src/Crypto/Whirlpool.c b/src/Crypto/Whirlpool.c index 6a1fe8b4..140c7c6f 100644 --- a/src/Crypto/Whirlpool.c +++ b/src/Crypto/Whirlpool.c @@ -930,82 +930,89 @@ void WHIRLPOOL_add(const unsigned char * input, uint64 len = sourceBytes; if ((ctx->countLo = oldCountLo + (uint64)len) < oldCountLo) ctx->countHi++; // carry from low to high if (ctx->countHi < oldCountHi) return; else { uint64* dataBuf = ctx->data; uint8* data = (uint8 *)dataBuf; num = oldCountLo & 63; if (num != 0) // process left over data { if (num+len >= 64) { memcpy(data+num, input, (size_t) (64-num)); HashMultipleBlocks(ctx, dataBuf, 64); input += (64-num); len -= (64-num); // drop through and do the rest } else { memcpy(data+num, input, (size_t) len); return; } } // now process the input data in blocks of 64 bytes and save the leftovers to ctx->data - if (len >= 64) - { - if (input == data) - { - HashMultipleBlocks(ctx, dataBuf, 64); - return; - } - else if (IsAligned16(input)) - { - uint64 leftOver = HashMultipleBlocks(ctx, (uint64 *)input, len); - input += (len - leftOver); - len = leftOver; - } - else - do - { // copy input first if it's not aligned correctly - memcpy(data, input, 64); - HashMultipleBlocks(ctx, dataBuf, 64); - input+=64; - len-=64; - } while (len >= 64); - } + if (len >= 64) + { + if (input == data) + { + HashMultipleBlocks(ctx, dataBuf, 64); + return; + } + else + { +#ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS + if (IsAligned16(input)) +#endif + { + uint64 leftOver = HashMultipleBlocks(ctx, (uint64*)input, len); + input += (len - leftOver); + len = leftOver; + } +#ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS + else + do + { // copy input first if it's not aligned correctly + memcpy(data, input, 64); + HashMultipleBlocks(ctx, dataBuf, 64); + input += 64; + len -= 64; + } while (len >= 64); +#endif + } + } if (len && data != input) memcpy(data, input, (size_t) len); } } /** * Get the hash value from the hashing state. * * This method uses the invariant: bufferBits < DIGESTBITS */ void WHIRLPOOL_finalize(WHIRLPOOL_CTX * const ctx, unsigned char * result) { unsigned int num = ctx->countLo & 63; uint64* dataBuf = ctx->data; uint64* stateBuf = ctx->state; uint8* data = (uint8 *)dataBuf; data[num++] = 0x80; if (num <= 32) memset(data+num, 0, 32-num); else { memset(data+num, 0, 64-num); HashMultipleBlocks(ctx, dataBuf, 64); memset(data, 0, 32); } #if BYTE_ORDER == LITTLE_ENDIAN CorrectEndianness(dataBuf, dataBuf, 32); diff --git a/src/Crypto/chacha-xmm.c b/src/Crypto/chacha-xmm.c index 54c3680c..980c2c81 100644 --- a/src/Crypto/chacha-xmm.c +++ b/src/Crypto/chacha-xmm.c @@ -54,99 +54,60 @@ __inline __m128i _mm_set1_epi64x(int64 a) #define XOR(v,w) ((v) ^ (w)) #define PLUS(v,w) (U32V((v) + (w))) #define PLUSONE(v) (PLUS((v),1)) #define QUARTERROUND(a,b,c,d) \ x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \ x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \ x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \ x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7); static void salsa20_wordtobyte(uint8 output[64],const uint32 input[16], unsigned int r) { uint32 x[16]; int i; for (i = 0;i < 16;++i) x[i] = input[i]; for (i = r;i > 0;--i) { QUARTERROUND( 0, 4, 8,12) QUARTERROUND( 1, 5, 9,13) QUARTERROUND( 2, 6,10,14) QUARTERROUND( 3, 7,11,15) QUARTERROUND( 0, 5,10,15) QUARTERROUND( 1, 6,11,12) QUARTERROUND( 2, 7, 8,13) QUARTERROUND( 3, 4, 9,14) } for (i = 0;i < 16;++i) x[i] = PLUS(x[i],input[i]); for (i = 0;i < 16;++i) U32TO8_LITTLE(output + 4 * i,x[i]); } -void chacha_ECRYPT_init(void) -{ - return; -} - -static const char sigma[17] = "expand 32-byte k"; -static const char tau[17] = "expand 16-byte k"; - -void chacha_ECRYPT_keysetup(uint32* input,const uint8 *k,uint32 kbits,uint32 ivbits) -{ - const char *constants; - - input[4] = U8TO32_LITTLE(k + 0); - input[5] = U8TO32_LITTLE(k + 4); - input[6] = U8TO32_LITTLE(k + 8); - input[7] = U8TO32_LITTLE(k + 12); - if (kbits == 256) { /* recommended */ - k += 16; - constants = sigma; - } else { /* kbits == 128 */ - constants = tau; - } - input[8] = U8TO32_LITTLE(k + 0); - input[9] = U8TO32_LITTLE(k + 4); - input[10] = U8TO32_LITTLE(k + 8); - input[11] = U8TO32_LITTLE(k + 12); - input[0] = U8TO32_LITTLE(constants + 0); - input[1] = U8TO32_LITTLE(constants + 4); - input[2] = U8TO32_LITTLE(constants + 8); - input[3] = U8TO32_LITTLE(constants + 12); -} - -void chacha_ECRYPT_ivsetup(uint32* input,const uint8 *iv) -{ - input[12] = 0; - input[13] = 0; - input[14] = U8TO32_LITTLE(iv + 0); - input[15] = U8TO32_LITTLE(iv + 4); -} void chacha_ECRYPT_encrypt_bytes(size_t bytes, uint32* x, const uint8* m, uint8* out, uint8* output, unsigned int r) { unsigned int i; #include "chacha_u4.h" #include "chacha_u1.h" #ifndef _M_X64 #ifdef _MSC_VER #if _MSC_VER < 1900 _mm_empty(); #endif #endif #endif if (!bytes) return; // bytes is now guaranteed to be between 1 and 63 salsa20_wordtobyte(output,x, r); x[12] = PLUSONE(x[12]); if (!x[12]) { x[13] = PLUSONE(x[13]); /* stopping at 2^70 bytes per nonce is user's responsibility */ } for (i = 0;i < bytes;++i) out[i] = m[i] ^ output[i]; } #endif diff --git a/src/Crypto/config.h b/src/Crypto/config.h index 774f7951..1c2aff72 100644 --- a/src/Crypto/config.h +++ b/src/Crypto/config.h @@ -24,61 +24,61 @@ #if defined(__clang__ ) && !defined(__apple_build_version__) && !defined(_DCSPKG_ANALYZE) #define CRYPTOPP_LLVM_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) #define CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER 1 #elif defined(__clang__ ) && defined(__apple_build_version__) && !defined(_DCSPKG_ANALYZE) #define CRYPTOPP_APPLE_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) #define CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER 1 #endif // Clang due to "Inline assembly operands don't work with .intel_syntax", http://llvm.org/bugs/show_bug.cgi?id=24232 // TODO: supply the upper version when LLVM fixes it. We set it to 20.0 for compilation purposes. #if (defined(CRYPTOPP_LLVM_CLANG_VERSION) && CRYPTOPP_LLVM_CLANG_VERSION <= 200000) || (defined(CRYPTOPP_APPLE_CLANG_VERSION) && CRYPTOPP_APPLE_CLANG_VERSION <= 200000) || defined(CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER) #define CRYPTOPP_DISABLE_INTEL_ASM 1 #endif #ifndef CRYPTOPP_L1_CACHE_LINE_SIZE // This should be a lower bound on the L1 cache line size. It's used for defense against timing attacks. // Also see http://stackoverflow.com/questions/794632/programmatically-get-the-cache-line-size. #if defined(_M_X64) || defined(__x86_64__) || (__ILP32__ >= 1) #define CRYPTOPP_L1_CACHE_LINE_SIZE 64 #else // L1 cache line size is 32 on Pentium III and earlier #define CRYPTOPP_L1_CACHE_LINE_SIZE 32 #endif #endif #if defined(_MSC_VER) && (_MSC_VER > 1200) #define CRYPTOPP_MSVC6PP_OR_LATER #endif #ifndef CRYPTOPP_ALIGN_DATA - #if defined(_MSC_VER) + #if defined(_MSC_VER) && !defined(TC_WINDOWS_BOOT) #define CRYPTOPP_ALIGN_DATA(x) __declspec(align(x)) #elif defined(__GNUC__) #define CRYPTOPP_ALIGN_DATA(x) __attribute__((aligned(x))) #else #define CRYPTOPP_ALIGN_DATA(x) #endif #endif #ifndef CRYPTOPP_SECTION_ALIGN16 #if defined(__GNUC__) && !defined(__APPLE__) // the alignment attribute doesn't seem to work without this section attribute when -fdata-sections is turned on #define CRYPTOPP_SECTION_ALIGN16 __attribute__((section ("CryptoPP_Align16"))) #else #define CRYPTOPP_SECTION_ALIGN16 #endif #endif #if defined(_MSC_VER) || defined(__fastcall) #define CRYPTOPP_FASTCALL __fastcall #else #define CRYPTOPP_FASTCALL #endif #ifdef CRYPTOPP_DISABLE_X86ASM // for backwards compatibility: this macro had both meanings #define CRYPTOPP_DISABLE_ASM #define CRYPTOPP_DISABLE_SSE2 #endif // Apple's Clang prior to 5.0 cannot handle SSE2 (and Apple does not use LLVM Clang numbering...) #if defined(CRYPTOPP_APPLE_CLANG_VERSION) && (CRYPTOPP_APPLE_CLANG_VERSION < 50000) diff --git a/src/Crypto/cpu.c b/src/Crypto/cpu.c index c00d8409..30263361 100644 --- a/src/Crypto/cpu.c +++ b/src/Crypto/cpu.c @@ -28,70 +28,60 @@ int CpuId(uint32 input, uint32 output[4]) __cpuid((int *)output, input); return 1; } #else #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY #if defined(__cplusplus) extern "C" { #endif typedef void (*SigHandler)(int); static jmp_buf s_jmpNoCPUID; static void SigIllHandlerCPUID(int p) { longjmp(s_jmpNoCPUID, 1); } #if !defined (_UEFI) && ((defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER) || CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE) static jmp_buf s_jmpNoAESNI; static void SigIllHandlerAESNI(int p) { longjmp(s_jmpNoAESNI, 1); } #endif -#if !defined (_UEFI) && (defined(__SHA__) || defined(__INTEL_COMPILER) || CRYPTOPP_SHANI_AVAILABLE) - -static jmp_buf s_jmpNoSHA; -static void SigIllHandlerSHA(int p) -{ - longjmp(s_jmpNoSHA, 1); -} - -#endif - #if CRYPTOPP_BOOL_X64 == 0 static jmp_buf s_jmpNoSSE2; static void SigIllHandlerSSE2(int p) { longjmp(s_jmpNoSSE2, 1); } #endif #if defined(__cplusplus) } #endif #endif int CpuId(uint32 input, uint32 output[4]) { #ifdef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY #ifndef _UEFI __try { #endif __asm { mov eax, input mov ecx, 0 cpuid mov edi, output mov [edi], eax mov [edi+4], ebx mov [edi+8], ecx mov [edi+12], edx @@ -295,106 +285,61 @@ static int Detect_MS_HyperV_AES () // when Hyper-V is enabled on older versions of Windows Server (i.e. 2008 R2), the AES-NI capability // gets masked out for all applications, even running on the host. // We try to detect Hyper-V virtual CPU and perform a dummy AES-NI operation to check its real presence uint32 cpuid[4] = {0}; char HvProductName[13]; CpuId(0x40000000, cpuid); memcpy (HvProductName, &cpuid[1], 12); HvProductName[12] = 0; if (_stricmp(HvProductName, "Microsoft Hv") == 0) { #if defined (TC_WINDOWS_DRIVER) && !defined (_WIN64) KFLOATING_SAVE floatingPointState; if (NT_SUCCESS (KeSaveFloatingPointState (&floatingPointState))) { #endif hasAesNI = TryAESNI (); #if defined (TC_WINDOWS_DRIVER) && !defined (_WIN64) KeRestoreFloatingPointState (&floatingPointState); } #endif } return hasAesNI; } #endif #if defined(__SHA__) || defined(__INTEL_COMPILER) || CRYPTOPP_SHANI_AVAILABLE -static int TrySHA256() -{ - volatile int result = 0; -#ifdef _MSC_VER - __try -#else - SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA); - if (oldHandler == SIG_ERR) - return 0; - if (setjmp(s_jmpNoSHA)) - result = 0; - else -#endif - { - // Known input message block - __m128i msg0 = _mm_setr_epi32(0x12345678, 0x9ABCDEF0, 0x87654321, 0x0FEDCBA9); - __m128i msg1 = _mm_setr_epi32(0x11111111, 0x22222222, 0x33333333, 0x44444444); - - // SHA256 message schedule update - __m128i tmp = _mm_sha256msg1_epu32(msg0, msg1); - - // Verify result - these values were pre-computed for the given input -#ifdef _MSC_VER - if (tmp.m128i_u32[0] == 0xD8131B44 && - tmp.m128i_u32[1] == 0x9DE6E22B && - tmp.m128i_u32[2] == 0xA86D643A && - tmp.m128i_u32[3] == 0x74320FED) -#else - if (((uint32_t*)(&tmp))[0] == 0xD8131B44 && - ((uint32_t*)(&tmp))[1] == 0x9DE6E22B && - ((uint32_t*)(&tmp))[2] == 0xA86D643A && - ((uint32_t*)(&tmp))[3] == 0x74320FED) -#endif - result = 1; - } -#ifdef _MSC_VER - __except (EXCEPTION_EXECUTE_HANDLER) - { - // ignore error if SHA instructions not supported - } -#else - signal(SIGILL, oldHandler); -#endif - - return result; -} +extern int TrySHA256(); #endif static BOOL CheckSHA256Support() { #if CRYPTOPP_BOOL_X64 && CRYPTOPP_SHANI_AVAILABLE #if defined(_MSC_VER) // Windows with MSVC int cpuInfo[4] = { 0 }; __cpuidex(cpuInfo, 7, 0); return (cpuInfo[1] & (1 << 29)) != 0? TRUE : FALSE; #elif defined(__GNUC__) || defined(__clang__) // Linux, FreeBSD, macOS with GCC/Clang unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; // First check if CPUID leaf 7 is supported if (__get_cpuid(0, &eax, &ebx, &ecx, &edx)) { if (eax >= 7) { // Now check SHA-256 support in leaf 7, sub-leaf 0 if (__get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx)) { return (ebx & (1 << 29)) != 0? TRUE : FALSE; } } } return FALSE; #else #error "Unsupported compiler" #endif #else return FALSE; #endif } diff --git a/src/Crypto/wolfCrypt.c b/src/Crypto/wolfCrypt.c index 4a4946a6..da0fbe2f 100644 --- a/src/Crypto/wolfCrypt.c +++ b/src/Crypto/wolfCrypt.c @@ -205,39 +205,39 @@ void sha256(unsigned char * result, const unsigned char* source, uint_32t source wc_InitSha256(&sha256); wc_Sha256Update(&sha256, source, sourceLen); wc_Sha256Final(&sha256, result); wc_Sha256Free(&sha256); } void sha512_begin(sha512_ctx* ctx) { wc_InitSha512(ctx); } void sha512_hash(const unsigned char * source, uint_64t sourceLen, sha512_ctx *ctx) { wc_Sha512Update(ctx, source, sourceLen); } void sha512_end(unsigned char * result, sha512_ctx* ctx) { wc_Sha512Final(ctx, result); } void sha512(unsigned char * result, const unsigned char* source, uint_64t sourceLen) { wc_Sha512 sha512; wc_InitSha512(&sha512); wc_Sha512Update(&sha512, source, sourceLen); wc_Sha512Final(&sha512, result); wc_Sha512Free(&sha512); } -void derive_key_sha512 (char *pwd, int pwd_len, char *salt, int salt_len, uint32 iterations, char *dk, int dklen) { +void derive_key_sha512 (unsigned char *pwd, int pwd_len, unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen) { (void) iterations; wc_HKDF(WC_SHA512, (uint8*)pwd, (word32)pwd_len, (uint8*)salt, (word32)salt_len, NULL, 0, (uint8*)dk, (word32)dklen); } -void derive_key_sha256 (char *pwd, int pwd_len, char *salt, int salt_len, uint32 iterations, char *dk, int dklen) { +void derive_key_sha256 (unsigned char *pwd, int pwd_len, unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen) { (void) iterations; wc_HKDF(WC_SHA256, (uint8*)pwd, (word32)pwd_len, (uint8*)salt, (word32)salt_len, NULL, 0, (uint8*)dk, (word32)dklen); } |