diff options
-rw-r--r-- | src/Crypto/Crypto.vcxproj | 4 | ||||
-rw-r--r-- | src/Crypto/Crypto.vcxproj.filters | 3 | ||||
-rw-r--r-- | src/Crypto/Sha2.c | 17 | ||||
-rw-r--r-- | src/Crypto/config.h | 13 | ||||
-rw-r--r-- | src/Crypto/cpu.c | 29 | ||||
-rw-r--r-- | src/Crypto/cpu.h | 2 | ||||
-rw-r--r-- | src/Crypto/sha256_armv8.c | 184 | ||||
-rw-r--r-- | src/Driver/Driver.vcxproj | 4 | ||||
-rw-r--r-- | src/Driver/Driver.vcxproj.filters | 3 | ||||
-rw-r--r-- | src/Release/Setup Files/veracrypt-arm64.sys | bin | 440472 -> 430592 bytes | |||
-rw-r--r-- | src/Release/Setup Files/veracrypt-x64.sys | bin | 639632 -> 629248 bytes | |||
-rw-r--r-- | src/Volume/Volume.make | 8 |
12 files changed, 267 insertions, 0 deletions
diff --git a/src/Crypto/Crypto.vcxproj b/src/Crypto/Crypto.vcxproj index c17bd607..cad50c06 100644 --- a/src/Crypto/Crypto.vcxproj +++ b/src/Crypto/Crypto.vcxproj @@ -238,40 +238,44 @@ <ClCompile Include="chacha-xmm.c" /> <ClCompile Include="chacha256.c" /> <ClCompile Include="chachaRng.c" /> <ClCompile Include="cpu.c" /> <ClCompile Include="jitterentropy-base.c"> <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Disabled</Optimization> <Optimization Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">Disabled</Optimization> </ClCompile> <ClCompile Include="kuznyechik.c" /> <ClCompile Include="kuznyechik_simd.c"> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild> </ClCompile> <ClCompile Include="rdrand.c"> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild> </ClCompile> <ClCompile Include="SerpentFast.c" /> <ClCompile Include="SerpentFast_simd.cpp" /> <ClCompile Include="Sha2.c" /> + <ClCompile Include="sha256_armv8.c"> + <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild> + <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild> + </ClCompile> <ClCompile Include="Sha2Intel.c" /> <ClCompile Include="Streebog.c" /> <ClCompile Include="t1ha2.c" /> <ClCompile Include="t1ha2_selfcheck.c" /> <ClCompile Include="t1ha_selfcheck.c" /> <ClCompile Include="Twofish.c" /> <ClCompile Include="Whirlpool.c" /> </ItemGroup> <ItemGroup> <ClInclude Include="Aes.h" /> <ClInclude Include="Aes_hw_cpu.h" /> <ClInclude Include="Aesopt.h" /> <ClInclude Include="Aestab.h" /> <ClInclude Include="Camellia.h" /> <ClInclude Include="chacha256.h" /> <ClInclude Include="chachaRng.h" /> <ClInclude Include="chacha_u1.h" /> <ClInclude Include="chacha_u4.h" /> <ClInclude Include="config.h" /> <ClInclude Include="cpu.h" /> diff --git a/src/Crypto/Crypto.vcxproj.filters b/src/Crypto/Crypto.vcxproj.filters index f2b1b54d..099c3ce4 100644 --- a/src/Crypto/Crypto.vcxproj.filters +++ b/src/Crypto/Crypto.vcxproj.filters @@ -79,40 +79,43 @@ <Filter>Source Files</Filter> </ClCompile> <ClCompile Include="blake2s_SSE2.c"> <Filter>Source Files</Filter> </ClCompile> <ClCompile Include="blake2s_SSE41.c"> <Filter>Source Files</Filter> </ClCompile> <ClCompile Include="blake2s_SSSE3.c"> <Filter>Source Files</Filter> </ClCompile> <ClCompile Include="Sha2Intel.c"> <Filter>Source Files</Filter> </ClCompile> <ClCompile Include="Aescrypt.c"> <Filter>Source Files</Filter> </ClCompile> <ClCompile Include="Aes_hw_armv8.c"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="sha256_armv8.c"> + <Filter>Source Files</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <ClInclude Include="Aes.h"> <Filter>Header Files</Filter> </ClInclude> <ClInclude Include="Aes_hw_cpu.h"> <Filter>Header Files</Filter> </ClInclude> <ClInclude Include="Aesopt.h"> <Filter>Header Files</Filter> </ClInclude> <ClInclude Include="Aestab.h"> <Filter>Header Files</Filter> </ClInclude> <ClInclude Include="Camellia.h"> <Filter>Header Files</Filter> </ClInclude> <ClInclude Include="config.h"> <Filter>Header Files</Filter> </ClInclude> diff --git a/src/Crypto/Sha2.c b/src/Crypto/Sha2.c index 5ae9cae2..27e61c3d 100644 --- a/src/Crypto/Sha2.c +++ b/src/Crypto/Sha2.c @@ -298,40 +298,44 @@ void sha512(unsigned char * result, const unsigned char* source, uint_64t source #ifndef NO_OPTIMIZED_VERSIONS #if defined(__cplusplus) extern "C" { #endif #if CRYPTOPP_BOOL_X64 void sha256_sse4(void *input_data, uint_32t digest[8], uint_64t num_blks); void sha256_rorx(void *input_data, uint_32t digest[8], uint_64t num_blks); void sha256_avx(void *input_data, uint_32t digest[8], uint_64t num_blks); #if CRYPTOPP_SHANI_AVAILABLE void sha256_intel(void *input_data, uint_32t digest[8], uint_64t num_blks); #endif #endif #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 void VC_CDECL sha256_compress_nayuki(uint_32t state[8], const uint_8t block[64]); #endif +#if CRYPTOPP_ARM_SHA2_AVAILABLE + void sha256_compress_digest_armv8(const void* input_data, uint_32t digest[8], uint_64t num_blks); +#endif + #if defined(__cplusplus) } #endif #endif CRYPTOPP_ALIGN_DATA(16) static const uint_32t SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }; #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE)) #ifdef _MSC_VER @@ -740,40 +744,47 @@ void AvxSha256Transform(sha256_ctx* ctx, void* mp, uint_64t num_blks) if (num_blks > 1) sha256_avx(mp, ctx->hash, num_blks); else sha256_sse4(mp, ctx->hash, num_blks); } void SSE4Sha256Transform(sha256_ctx* ctx, void* mp, uint_64t num_blks) { sha256_sse4(mp, ctx->hash, num_blks); } #endif #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE)) void SSE2Sha256Transform(sha256_ctx* ctx, void* mp, uint_64t num_blks) { X86_SHA256_HashBlocks(ctx->hash, (const uint_32t*)mp, (size_t)(num_blks * 64)); } #endif +#if CRYPTOPP_ARM_SHA2_AVAILABLE +void ArmSha256Transform(sha256_ctx* ctx, void* mp, uint_64t num_blks) +{ + sha256_compress_digest_armv8(mp, ctx->hash, num_blks); +} +#endif + #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 void Sha256AsmTransform(sha256_ctx* ctx, void* mp, uint_64t num_blks) { uint_64t i; for (i = 0; i < num_blks; i++) sha256_compress_nayuki(ctx->hash, (uint_8t*)mp + i * 64); } #endif #endif void sha256_begin(sha256_ctx* ctx) { ctx->hash[0] = 0x6a09e667; ctx->hash[1] = 0xbb67ae85; ctx->hash[2] = 0x3c6ef372; ctx->hash[3] = 0xa54ff53a; ctx->hash[4] = 0x510e527f; ctx->hash[5] = 0x9b05688c; ctx->hash[6] = 0x1f83d9ab; @@ -788,40 +799,46 @@ void sha256_begin(sha256_ctx* ctx) #if CRYPTOPP_SHANI_AVAILABLE if (HasSHA256()) sha256transfunc = IntelSha256Transform; else #endif if (g_isIntel && HasSAVX2() && HasSBMI2()) sha256transfunc = Avx2Sha256Transform; else if (g_isIntel && HasSAVX()) sha256transfunc = AvxSha256Transform; else if (HasSSE41()) sha256transfunc = SSE4Sha256Transform; else #endif #if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE)) if (HasSSE2 ()) sha256transfunc = SSE2Sha256Transform; else #endif +#if CRYPTOPP_ARM_SHA2_AVAILABLE + if (HasSHA256()) + sha256transfunc = ArmSha256Transform; + else +#endif + #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 sha256transfunc = Sha256AsmTransform; #else sha256transfunc = StdSha256Transform; #endif #else sha256transfunc = StdSha256Transform; #endif } } void sha256_end(unsigned char * result, sha256_ctx* ctx) { int i; uint_64t mlen, pos = ctx->count[0]; uint_8t* m = (uint_8t*) ctx->wbuf; m[pos++] = 0x80; if (pos > 56) { memset(m + pos, 0, (size_t) (64 - pos)); diff --git a/src/Crypto/config.h b/src/Crypto/config.h index dd8e3f06..f485c07e 100644 --- a/src/Crypto/config.h +++ b/src/Crypto/config.h @@ -223,40 +223,53 @@ (CRYPTOPP_GCC_VERSION >= 40800) || (CRYPTOPP_LLVM_CLANG_VERSION >= 30300) || \ (CRYPTOPP_APPLE_CLANG_VERSION >= 40000) || (CRYPTOPP_MSC_VERSION >= 1916) # define CRYPTOPP_ARM_NEON_AVAILABLE 1 # define CRYPTOPP_ARM_ASIMD_AVAILABLE 1 # endif // Compilers # endif // Platforms #endif // ARMv8 and AES. -march=armv8-a+crypto or above must be present // Requires GCC 4.8, Clang 3.3 or Visual Studio 2017 #if !defined(CRYPTOPP_ARM_AES_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ARM_AES) # if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64) # if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || \ (CRYPTOPP_LLVM_CLANG_VERSION >= 30300) || (CRYPTOPP_APPLE_CLANG_VERSION >= 40300) || \ (CRYPTOPP_MSC_VERSION >= 1916) # define CRYPTOPP_ARM_AES_AVAILABLE 1 # endif // Compilers # endif // Platforms #endif +// ARMv8 and SHA-1, SHA-256. -march=armv8-a+crypto or above must be present +// Requires GCC 4.8, Clang 3.3 or Visual Studio 2017 +#if !defined(CRYPTOPP_ARM_SHA_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ARM_SHA) +# if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64) +# if defined(__ARM_FEATURE_CRYPTO) || (CRYPTOPP_GCC_VERSION >= 40800) || \ + (CRYPTOPP_LLVM_CLANG_VERSION >= 30300) || (CRYPTOPP_APPLE_CLANG_VERSION >= 40300) || \ + (CRYPTOPP_MSC_VERSION >= 1916) +# define CRYPTOPP_ARM_SHA1_AVAILABLE 1 +# define CRYPTOPP_ARM_SHA2_AVAILABLE 1 +# endif // Compilers +# endif // Platforms +#endif + // Undo the ASM and Intrinsic related defines due to X32. #if CRYPTOPP_BOOL_X32 # undef CRYPTOPP_BOOL_X64 # undef CRYPTOPP_X64_ASM_AVAILABLE # undef CRYPTOPP_X64_MASM_AVAILABLE #endif #if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && !defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) #if (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || defined(__powerpc__) || (__ARM_FEATURE_UNALIGNED >= 1)) #define CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS #endif #endif // this version of the macro is fastest on Pentium 3 and Pentium 4 with MSVC 6 SP5 w/ Processor Pack #define GETBYTE(x, y) (unsigned int)((unsigned char)((x)>>(8*(y)))) // these may be faster on other CPUs/compilers // #define GETBYTE(x, y) (unsigned int)(((x)>>(8*(y)))&255) // #define GETBYTE(x, y) (((uint8 *)&(x))[y]) #define CRYPTOPP_GET_BYTE_AS_BYTE(x, y) ((uint8)((x)>>(8*(y)))) diff --git a/src/Crypto/cpu.c b/src/Crypto/cpu.c index 0f1ba54d..a5b5bb19 100644 --- a/src/Crypto/cpu.c +++ b/src/Crypto/cpu.c @@ -458,54 +458,83 @@ void DisableCPUExtendedFeatures () g_hasMMX = 0; g_hasAVX = 0; g_hasAVX2 = 0; g_hasBMI2 = 0; g_hasSSE42 = 0; g_hasSSE41 = 0; g_hasSSSE3 = 0; g_hasAESNI = 0; g_hasCLMUL = 0; g_hasSHA256 = 0; } #endif #if CRYPTOPP_BOOL_ARMV8 #if defined(__linux__) && defined(__aarch64__) #include <sys/auxv.h> #ifndef HWCAP_AES # define HWCAP_AES (1 << 3) #endif +#ifndef HWCAP_SHA2 +# define HWCAP_SHA2 (1 << 6) +#endif #endif volatile int g_hasAESARM = 0; +volatile int g_hasSHA256ARM = 0; inline int CPU_QueryAES() { #if defined(CRYPTOPP_ARM_AES_AVAILABLE) #if defined(__linux__) && defined(__aarch64__) if ((getauxval(AT_HWCAP) & HWCAP_AES) != 0) return 1; #elif defined(__APPLE__) && defined(__aarch64__) // Apple Sillcon (M1) and later return 1; #elif defined(_WIN32) && defined(_M_ARM64) #ifdef TC_WINDOWS_DRIVER if (ExIsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0) return 1; #else if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0) return 1; #endif #endif return 0; #else return 0; #endif } +inline int CPU_QuerySHA2() +{ +#if defined(CRYPTOPP_ARM_SHA2_AVAILABLE) +#if defined(__linux__) && defined(__aarch64__) + if ((getauxval(AT_HWCAP) & HWCAP_SHA2) != 0) + return 1; +#elif defined(__APPLE__) && defined(__aarch64__) + // Apple Sillcon (M1) and later + return 1; +#elif defined(_WIN32) && defined(_M_ARM64) +#ifdef TC_WINDOWS_DRIVER + if (ExIsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0) + return 1; +#else + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0) + return 1; +#endif +#endif + return 0; +#else + return 0; +#endif +} + void DetectArmFeatures() { g_hasAESARM = CPU_QueryAES(); + g_hasSHA256ARM = CPU_QuerySHA2(); } #endif
\ No newline at end of file diff --git a/src/Crypto/cpu.h b/src/Crypto/cpu.h index 761258f2..cb34ad1f 100644 --- a/src/Crypto/cpu.h +++ b/src/Crypto/cpu.h @@ -281,43 +281,45 @@ void DisableCPUExtendedFeatures (); #define HasRDSEED() g_hasRDSEED #define HasSHA256() g_hasSHA256 #define IsCpuIntel() g_isIntel #define IsCpuAMD() g_isAMD #define GetCacheLineSize() g_cacheLineSize #if defined(__cplusplus) } #endif #elif CRYPTOPP_BOOL_ARMV8 #if defined(__cplusplus) extern "C" { #endif #if !defined(CRYPTOPP_DISABLE_AESNI) && !defined(WOLFCRYPT_BACKEND) #define TC_AES_HW_CPU #endif extern volatile int g_hasAESARM; +extern volatile int g_hasSHA256ARM; void DetectArmFeatures(); #define HasAESNI() g_hasAESARM +#define HasSHA256() g_hasSHA256ARM #if defined(__cplusplus) } #endif #else #define HasSSE2() 0 #define HasISSE() 0 #define HasMMX() 0 #define HasSSE42() 0 #define HasSSE41() 0 #define HasSAVX() 0 #define HasSAVX2() 0 #define HasSBMI2() 0 #define HasSSSE3() 0 #define HasAESNI() 0 #define HasCLMUL() 0 #define IsP4() 0 diff --git a/src/Crypto/sha256_armv8.c b/src/Crypto/sha256_armv8.c new file mode 100644 index 00000000..1599350a --- /dev/null +++ b/src/Crypto/sha256_armv8.c @@ -0,0 +1,184 @@ +/* +* SHA-256 using CPU instructions in ARMv8 +* +* Contributed by Jeffrey Walton. Based on public domain code by +* Johannes Schneiders, Skip Hovsmith and Barry O'Rourke. +* +* Further changes (C) 2020 Jack Lloyd +* +* Botan is released under the Simplified BSD License (see license.txt) +*/ + +/* Modified and adapted for VeraCrypt */ + +#include "Common/Tcdefs.h" +#if !defined(_UEFI) +#include <memory.h> +#include <stdlib.h> +#endif +#include "cpu.h" +#include "misc.h" + +#if CRYPTOPP_ARM_SHA2_AVAILABLE + +#include <arm_neon.h> + +CRYPTOPP_ALIGN_DATA(64) static const uint32 K[] = { + 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, + 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, + 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, + 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, + 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, + 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, + 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, + 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2, +}; + +void sha256_compress_digest_armv8(void* input_data, uint32 digest[8], uint64 num_blks) { + + + // Load initial values + uint32x4_t STATE0 = vld1q_u32(&digest[0]); + uint32x4_t STATE1 = vld1q_u32(&digest[4]); + + // Intermediate void* cast due to https://llvm.org/bugs/show_bug.cgi?id=20670 + const uint32* input32 = (const uint32*)(const void*)input_data; + + while (num_blks > 0) { + // Save current state + const uint32x4_t ABCD_SAVE = STATE0; + const uint32x4_t EFGH_SAVE = STATE1; + + uint32x4_t MSG0 = vld1q_u32(input32 + 0); + uint32x4_t MSG1 = vld1q_u32(input32 + 4); + uint32x4_t MSG2 = vld1q_u32(input32 + 8); + uint32x4_t MSG3 = vld1q_u32(input32 + 12); + + MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0))); + MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1))); + MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2))); + MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3))); + + uint32x4_t MSG_K, TSTATE; + + // Rounds 0-3 + MSG_K = vaddq_u32(MSG0, vld1q_u32(&K[4 * 0])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + MSG0 = vsha256su1q_u32(vsha256su0q_u32(MSG0, MSG1), MSG2, MSG3); + + // Rounds 4-7 + MSG_K = vaddq_u32(MSG1, vld1q_u32(&K[4 * 1])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + MSG1 = vsha256su1q_u32(vsha256su0q_u32(MSG1, MSG2), MSG3, MSG0); + + // Rounds 8-11 + MSG_K = vaddq_u32(MSG2, vld1q_u32(&K[4 * 2])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + MSG2 = vsha256su1q_u32(vsha256su0q_u32(MSG2, MSG3), MSG0, MSG1); + + // Rounds 12-15 + MSG_K = vaddq_u32(MSG3, vld1q_u32(&K[4 * 3])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + MSG3 = vsha256su1q_u32(vsha256su0q_u32(MSG3, MSG0), MSG1, MSG2); + + // Rounds 16-19 + MSG_K = vaddq_u32(MSG0, vld1q_u32(&K[4 * 4])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + MSG0 = vsha256su1q_u32(vsha256su0q_u32(MSG0, MSG1), MSG2, MSG3); + + // Rounds 20-23 + MSG_K = vaddq_u32(MSG1, vld1q_u32(&K[4 * 5])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + MSG1 = vsha256su1q_u32(vsha256su0q_u32(MSG1, MSG2), MSG3, MSG0); + + // Rounds 24-27 + MSG_K = vaddq_u32(MSG2, vld1q_u32(&K[4 * 6])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + MSG2 = vsha256su1q_u32(vsha256su0q_u32(MSG2, MSG3), MSG0, MSG1); + + // Rounds 28-31 + MSG_K = vaddq_u32(MSG3, vld1q_u32(&K[4 * 7])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + MSG3 = vsha256su1q_u32(vsha256su0q_u32(MSG3, MSG0), MSG1, MSG2); + + // Rounds 32-35 + MSG_K = vaddq_u32(MSG0, vld1q_u32(&K[4 * 8])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + MSG0 = vsha256su1q_u32(vsha256su0q_u32(MSG0, MSG1), MSG2, MSG3); + + // Rounds 36-39 + MSG_K = vaddq_u32(MSG1, vld1q_u32(&K[4 * 9])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + MSG1 = vsha256su1q_u32(vsha256su0q_u32(MSG1, MSG2), MSG3, MSG0); + + // Rounds 40-43 + MSG_K = vaddq_u32(MSG2, vld1q_u32(&K[4 * 10])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + MSG2 = vsha256su1q_u32(vsha256su0q_u32(MSG2, MSG3), MSG0, MSG1); + + // Rounds 44-47 + MSG_K = vaddq_u32(MSG3, vld1q_u32(&K[4 * 11])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + MSG3 = vsha256su1q_u32(vsha256su0q_u32(MSG3, MSG0), MSG1, MSG2); + + // Rounds 48-51 + MSG_K = vaddq_u32(MSG0, vld1q_u32(&K[4 * 12])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + + // Rounds 52-55 + MSG_K = vaddq_u32(MSG1, vld1q_u32(&K[4 * 13])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + + // Rounds 56-59 + MSG_K = vaddq_u32(MSG2, vld1q_u32(&K[4 * 14])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + + // Rounds 60-63 + MSG_K = vaddq_u32(MSG3, vld1q_u32(&K[4 * 15])); + TSTATE = vsha256hq_u32(STATE0, STATE1, MSG_K); + STATE1 = vsha256h2q_u32(STATE1, STATE0, MSG_K); + STATE0 = TSTATE; + + // Add back to state + STATE0 = vaddq_u32(STATE0, ABCD_SAVE); + STATE1 = vaddq_u32(STATE1, EFGH_SAVE); + + input32 += 64 / 4; + num_blks--; + } + + // Save state + vst1q_u32(&digest[0], STATE0); + vst1q_u32(&digest[4], STATE1); +} +#endif diff --git a/src/Driver/Driver.vcxproj b/src/Driver/Driver.vcxproj index f6aacf9a..76ca90ed 100644 --- a/src/Driver/Driver.vcxproj +++ b/src/Driver/Driver.vcxproj @@ -255,40 +255,44 @@ copy $(OutDir)veracrypt.inf "$(SolutionDir)Debug\Setup Files\veracrypt.inf"</Com <ClCompile Include="..\Crypto\kuznyechik_simd.c"> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild> </ClCompile> <ClCompile Include="..\Crypto\rdrand.c"> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild> </ClCompile> <ClCompile Include="..\Crypto\SerpentFast.c" /> <ClCompile Include="..\Crypto\SerpentFast_simd.cpp"> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild> </ClCompile> <ClCompile Include="..\Crypto\Sha2.c" /> + <ClCompile Include="..\Crypto\sha256_armv8.c"> + <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild> + <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild> + </ClCompile> <ClCompile Include="..\Crypto\Sha2Intel.c" /> <ClCompile Include="..\Crypto\Streebog.c" /> <ClCompile Include="..\Crypto\t1ha2.c" /> <ClCompile Include="..\Crypto\t1ha2_selfcheck.c" /> <ClCompile Include="..\Crypto\t1ha_selfcheck.c" /> <ClCompile Include="..\Crypto\Twofish.c" /> <ClCompile Include="..\Crypto\Whirlpool.c" /> <ClCompile Include="..\Driver\DriveFilter.c" /> <ClCompile Include="..\Driver\DumpFilter.c" /> <ClCompile Include="..\Driver\EncryptedIoQueue.c" /> <ClCompile Include="..\Driver\Ntdriver.c" /> <ClCompile Include="..\Driver\Ntvol.c" /> <ClCompile Include="..\Driver\VolumeFilter.c" /> </ItemGroup> <ItemGroup> <ClInclude Include="..\Common\Tcdefs.h" /> <ClInclude Include="..\Crypto\Aes.h" /> <ClInclude Include="..\Crypto\Aesopt.h" /> <ClInclude Include="..\Crypto\AesSmall.h" /> <ClInclude Include="..\Crypto\Aestab.h" /> diff --git a/src/Driver/Driver.vcxproj.filters b/src/Driver/Driver.vcxproj.filters index 6f43b0e8..3c427dad 100644 --- a/src/Driver/Driver.vcxproj.filters +++ b/src/Driver/Driver.vcxproj.filters @@ -151,40 +151,43 @@ <Filter>Source Files</Filter> </ClCompile> <ClCompile Include="..\Driver\DumpFilter.c"> <Filter>Source Files</Filter> </ClCompile> <ClCompile Include="..\Driver\EncryptedIoQueue.c"> <Filter>Source Files</Filter> </ClCompile> <ClCompile Include="..\Driver\Ntdriver.c"> <Filter>Source Files</Filter> </ClCompile> <ClCompile Include="..\Driver\Ntvol.c"> <Filter>Source Files</Filter> </ClCompile> <ClCompile Include="..\Driver\VolumeFilter.c"> <Filter>Source Files</Filter> </ClCompile> <ClCompile Include="..\Crypto\Aes_hw_armv8.c"> <Filter>Crypto\Source Files</Filter> </ClCompile> + <ClCompile Include="..\Crypto\sha256_armv8.c"> + <Filter>Crypto\Source Files</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <ClInclude Include="..\Common\Tcdefs.h"> <Filter>Common</Filter> </ClInclude> <ClInclude Include="..\Crypto\Aes.h"> <Filter>Crypto\Header Files</Filter> </ClInclude> <ClInclude Include="..\Crypto\Aesopt.h"> <Filter>Crypto\Header Files</Filter> </ClInclude> <ClInclude Include="..\Crypto\AesSmall.h"> <Filter>Crypto\Header Files</Filter> </ClInclude> <ClInclude Include="..\Crypto\Aestab.h"> <Filter>Crypto\Header Files</Filter> </ClInclude> <ClInclude Include="..\Crypto\Aes_hw_cpu.h"> <Filter>Crypto\Header Files</Filter> </ClInclude> diff --git a/src/Release/Setup Files/veracrypt-arm64.sys b/src/Release/Setup Files/veracrypt-arm64.sys Binary files differindex 8925f812..707fa0d6 100644 --- a/src/Release/Setup Files/veracrypt-arm64.sys +++ b/src/Release/Setup Files/veracrypt-arm64.sys diff --git a/src/Release/Setup Files/veracrypt-x64.sys b/src/Release/Setup Files/veracrypt-x64.sys Binary files differindex 42e677f7..ae31c516 100644 --- a/src/Release/Setup Files/veracrypt-x64.sys +++ b/src/Release/Setup Files/veracrypt-x64.sys diff --git a/src/Volume/Volume.make b/src/Volume/Volume.make index e38542bb..a4f62562 100644 --- a/src/Volume/Volume.make +++ b/src/Volume/Volume.make @@ -33,72 +33,74 @@ OBJS += VolumePassword.o OBJS += VolumePasswordCache.o ifeq "$(ENABLE_WOLFCRYPT)" "0" OBJS += EncryptionModeXTS.o else OBJS += EncryptionModeWolfCryptXTS.o endif ifeq "$(ENABLE_WOLFCRYPT)" "0" ifeq "$(PLATFORM)" "MacOSX" ifneq "$(COMPILE_ASM)" "false" OBJSEX += ../Crypto/Aes_asm.oo OBJS += ../Crypto/Aes_hw_cpu.o OBJSEX += ../Crypto/Aes_hw_armv8.oo OBJS += ../Crypto/Aescrypt.o OBJSEX += ../Crypto/Twofish_asm.oo OBJSEX += ../Crypto/Camellia_asm.oo OBJSEX += ../Crypto/Camellia_aesni_asm.oo OBJSEX += ../Crypto/sha256-nayuki.oo OBJSEX += ../Crypto/sha512-nayuki.oo + OBJSEX += ../Crypto/sha256_armv8.oo OBJSEX += ../Crypto/sha256_avx1.oo OBJSEX += ../Crypto/sha256_avx2.oo OBJSEX += ../Crypto/sha256_sse4.oo OBJSEX += ../Crypto/sha512_avx1.oo OBJSEX += ../Crypto/sha512_avx2.oo OBJSEX += ../Crypto/sha512_sse4.oo endif else ifeq "$(CPU_ARCH)" "x86" OBJS += ../Crypto/Aes_x86.o ifeq "$(DISABLE_AESNI)" "0" OBJS += ../Crypto/Aes_hw_cpu.o endif OBJS += ../Crypto/sha256-x86-nayuki.o OBJS += ../Crypto/sha512-x86-nayuki.o else ifeq "$(CPU_ARCH)" "x64" OBJS += ../Crypto/Aes_x64.o ifeq "$(DISABLE_AESNI)" "0" OBJS += ../Crypto/Aes_hw_cpu.o endif OBJS += ../Crypto/Twofish_x64.o OBJS += ../Crypto/Camellia_x64.o OBJS += ../Crypto/Camellia_aesni_x64.o OBJS += ../Crypto/sha512-x64-nayuki.o OBJS += ../Crypto/sha256_avx1_x64.o OBJS += ../Crypto/sha256_avx2_x64.o OBJS += ../Crypto/sha256_sse4_x64.o OBJS += ../Crypto/sha512_avx1_x64.o OBJS += ../Crypto/sha512_avx2_x64.o OBJS += ../Crypto/sha512_sse4_x64.o else ifeq "$(CPU_ARCH)" "arm64" OBJARMV8CRYPTO += ../Crypto/Aes_hw_armv8.oarmv8crypto OBJS += ../Crypto/Aescrypt.o + OBJARMV8CRYPTO += ../Crypto/sha256_armv8.oarmv8crypto else OBJS += ../Crypto/Aescrypt.o endif ifeq "$(GCC_GTEQ_430)" "1" OBJSSSE41 += ../Crypto/blake2s_SSE41.osse41 OBJSSSSE3 += ../Crypto/blake2s_SSSE3.ossse3 else OBJS += ../Crypto/blake2s_SSE41.o OBJS += ../Crypto/blake2s_SSSE3.o endif ifeq "$(GCC_GTEQ_500)" "1" OBJSHANI += ../Crypto/Sha2Intel.oshani else OBJS += ../Crypto/Sha2Intel.o endif else OBJS += ../Crypto/wolfCrypt.o endif @@ -133,40 +135,46 @@ OBJS += ../Common/SCardReader.o OBJS += ../Common/Token.o OBJS += ../Common/Crc.o OBJS += ../Common/TLVParser.o OBJS += ../Common/EMVCard.o OBJS += ../Common/EMVToken.o OBJS += ../Common/Endian.o OBJS += ../Common/GfMul.o OBJS += ../Common/SecurityToken.o VolumeLibrary: Volume.a ifeq "$(ENABLE_WOLFCRYPT)" "0" ifeq "$(PLATFORM)" "MacOSX" ifneq "$(COMPILE_ASM)" "false" ../Crypto/Aes_hw_armv8.oo: ../Crypto/Aes_hw_armv8.c @echo Compiling $(<F) $(CC) $(CFLAGS_ARM64) -c ../Crypto/Aes_hw_armv8.c -o ../Crypto/Aes_hw_armv8_arm64.o $(CC) $(CFLAGS_X64) -c ../Crypto/Aes_hw_armv8.c -o ../Crypto/Aes_hw_armv8_x64.o lipo -create ../Crypto/Aes_hw_armv8_arm64.o ../Crypto/Aes_hw_armv8_x64.o -output ../Crypto/Aes_hw_armv8.oo rm -fr ../Crypto/Aes_hw_armv8_arm64.o ../Crypto/Aes_hw_armv8_x64.o +../Crypto/sha256_armv8.oo: ../Crypto/sha256_armv8.c + @echo Compiling $(<F) + $(CC) $(CFLAGS_ARM64) -c ../Crypto/sha256_armv8.c -o ../Crypto/sha256_armv8_arm64.o + $(CC) $(CFLAGS_X64) -c ../Crypto/sha256_armv8.c -o ../Crypto/sha256_armv8_x64.o + lipo -create ../Crypto/sha256_armv8_arm64.o ../Crypto/sha256_armv8_x64.o -output ../Crypto/sha256_armv8.oo + rm -fr ../Crypto/sha256_armv8_arm64.o ../Crypto/sha256_armv8_x64.o ../Crypto/Aes_asm.oo: ../Crypto/Aes_x86.asm ../Crypto/Aes_x64.asm @echo Assembling $(<F) $(AS) $(ASFLAGS32) -o ../Crypto/Aes_x86.o ../Crypto/Aes_x86.asm $(AS) $(ASFLAGS64) -o ../Crypto/Aes_x64.o ../Crypto/Aes_x64.asm lipo -create ../Crypto/Aes_x86.o ../Crypto/Aes_x64.o -output ../Crypto/Aes_asm.oo rm -fr ../Crypto/Aes_x86.o ../Crypto/Aes_x64.o ../Crypto/Twofish_asm.oo: ../Crypto/Twofish_x64.S @echo Assembling $(<F) $(AS) $(ASFLAGS64) -p gas -o ../Crypto/Twofish_asm.oo ../Crypto/Twofish_x64.S ../Crypto/Camellia_asm.oo: ../Crypto/Camellia_x64.S @echo Assembling $(<F) $(AS) $(ASFLAGS64) -p gas -o ../Crypto/Camellia_asm.oo ../Crypto/Camellia_x64.S ../Crypto/Camellia_aesni_asm.oo: ../Crypto/Camellia_aesni_x64.S @echo Assembling $(<F) $(AS) $(ASFLAGS64) -p gas -o ../Crypto/Camellia_aesni_asm.oo ../Crypto/Camellia_aesni_x64.S ../Crypto/sha256-nayuki.oo: ../Crypto/sha256-x86-nayuki.S @echo Assembling $(<F) $(AS) $(ASFLAGS32) -p gas -o ../Crypto/sha256-x86-nayuki.o ../Crypto/sha256-x86-nayuki.S $(AS) $(ASFLAGS64) -p gas -o ../Crypto/sha256-x64-nayuki.o ../Crypto/sha256-x64-nayuki.S lipo -create ../Crypto/sha256-x86-nayuki.o ../Crypto/sha256-x64-nayuki.o -output ../Crypto/sha256-nayuki.oo |