// RIPEMD-160 written and placed in the public domain by Wei Dai /* * This code implements the MD4 message-digest algorithm. * The algorithm is due to Ron Rivest. This code was * written by Colin Plumb in 1993, no copyright is claimed. * This code is in the public domain; do with it what you wish. */ /* Adapted for TrueCrypt */ /* Adapted for VeraCrypt */ #if !defined(_UEFI) #include #endif // !defined(_UEFI) #include "Common/Tcdefs.h" #include "Common/Endian.h" #include "Rmd160.h" #define F(x, y, z) (x ^ y ^ z) #define G(x, y, z) (z ^ (x & (y^z))) #define H(x, y, z) (z ^ (x | ~y)) #define I(x, y, z) (y ^ (z & (x^y))) #define J(x, y, z) (x ^ (y | ~z)) #define PUT_64BIT_LE(cp, value) do { \ (cp)[7] = (byte) ((value) >> 56); \ (cp)[6] = (byte) ((value) >> 48); \ (cp)[5] = (byte) ((value) >> 40); \ (cp)[4] = (byte) ((value) >> 32); \ (cp)[3] = (byte) ((value) >> 24); \ (cp)[2] = (byte) ((value) >> 16); \ (cp)[1] = (byte) ((value) >> 8); \ (cp)[0] = (byte) (value); } while (0) #define PUT_32BIT_LE(cp, value) do { \ (cp)[3] = (byte) ((value) >> 24); \ (cp)[2] = (byte) ((value) >> 16); \ (cp)[1] = (byte) ((value) >> 8); \ (cp)[0] = (byte) (value); } while (0) #ifndef TC_MINIMIZE_CODE_SIZE static byte PADDING[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #else static byte PADDING[64]; #endif void RMD160Init (RMD160_CTX *ctx) { ctx->count = 0; ctx->state[0] = 0x67452301; ctx->state[1] = 0xefcdab89; ctx->state[2] = 0x98badcfe; ctx->state[3] = 0x10325476; ctx->state[4] = 0xc3d2e1f0; PADDING[0] = 0x80; } /* * Update context to reflect the concatenation of another buffer full * of bytes. */ void RMD160Update (RMD160_CTX *ctx, const unsigned char *input, unsigned __int32 lenArg) { #ifndef TC_WINDOWS_BOOT uint64 len = lenArg; #else uint32 len = lenArg; #endif unsigned int have, need; /* Check how many bytes we already have and how many more we need. */ have = (unsigned int) ((ctx->count) & (RIPEMD160_BLOCK_LENGTH - 1)); need = RIPEMD160_BLOCK_LENGTH - have; /* Update bitcount */ ctx->count += len; if (len >= need) { if (have != 0) { memcpy (ctx->buffer + have, input, (size_t) need); RMD160Transform ((uint32 *) ctx->state, (const uint32 *) ctx->buffer); input += need; len -= need; have = 0; } /* Process data in RIPEMD160_BLOCK_LENGTH-byte chunks. */ while (len >= RIPEMD160_BLOCK_LENGTH) { RMD160Transform ((uint32 *) ctx->state, (const uint32 *) input); input += RIPEMD160_BLOCK_LENGTH; len -= RIPEMD160_BLOCK_LENGTH; } } /* Handle any remaining bytes of data. */ if (len != 0) memcpy (ctx->buffer + have, input, (size_t) len); } /* * Pad pad to 64-byte boundary with the bit pattern * 1 0* (64-bit count of bits processed, MSB-first) */ static void RMD160Pad(RMD160_CTX *ctx) { byte count[8]; uint32 padlen; /* Convert count to 8 bytes in little endian order. */ #ifndef TC_WINDOWS_BOOT uint64 bitcount = ctx->count << 3; PUT_64BIT_LE(count, bitcount); #else *(uint32 *) (count + 4) = 0; *(uint32 *) (count + 0) = ctx->count << 3; #endif /* Pad out to 56 mod 64. */ padlen = RIPEMD160_BLOCK_LENGTH - (uint32)((ctx->count) & (RIPEMD160_BLOCK_LENGTH - 1)); if (padlen < 1 + 8) padlen += RIPEMD160_BLOCK_LENGTH; RMD160Update(ctx, PADDING, padlen - 8); /* padlen - 8 <= 64 */ RMD160Update(ctx, count, 8); } /* * Final wrapup--call RMD160Pad, fill in digest and zero out ctx. */ void RMD160Final(unsigned char *digest, RMD160_CTX *ctx) { int i; RMD160Pad(ctx); if (digest) { for (i = 0; i < 5; i++) PUT_32BIT_LE(digest + i * 4, ctx->state[i]); #ifndef TC_WINDOWS_BOOT burn (ctx, sizeof(*ctx)); #endif } } #ifndef TC_MINIMIZE_CODE_SIZE #define word32 unsigned __int32 #define k0 0 #define k1 0x5a827999UL #define k2 0x6ed9eba1UL #define k3 0x8f1bbcdcUL #define k4 0xa953fd4eUL #define k5 0x50a28be6UL #define k6 0x5c4dd124UL #define k7 0x6d703ef3UL #define k8 0x7a6d76e9UL #define k9 0 static word32 rotlFixed (word32 x, unsigned int y) { return (word32)((x<>(sizeof(word32)*8-y))); } #define Subround(f, a, b, c, d, e, x, s, k) \ a += f(b, c, d) + x + k;\ a = rotlFixed((word32)a, s) + e;\ c = rotlFixed((word32)c, 10U) void RMD160Transform (unsigned __int32 *digest, const unsigned __int32 *data) { #if BYTE_ORDER == LITTLE_ENDIAN const word32 *X = data; #else word32 X[16]; int i; #endif word32 a1, b1, c1, d1, e1, a2, b2, c2, d2, e2; a1 = a2 = digest[0]; b1 = b2 = digest[1]; c1 = c2 = digest[2]; d1 = d2 = digest[3]; e1 = e2 = digest[4]; #if BYTE_ORDER == BIG_ENDIAN for (i = 0; i < 16; i++) { X[i] = LE32 (data[i]); } #endif Subround(F, a1, b1, c1, d1, e1, X[ 0], 11, k0); Subround(F, e1, a1, b1, c1, d1, X[ 1], 14, k0); Subround(F, d1, e1, a1, b1, c1, X[ 2], 15, k0); Subround(F, c1, d1, e1, a1, b1, X[ 3], 12, k0); Subround(F, b1, c1, d1, e1, a1, X[ 4], 5, k0); Subround(F, a1, b1, c1, d1, e1, X[ 5], 8, k0); Subround(F, e1, a1, b1, c1, d1, X[ 6], 7, k0); Subround(F, d1, e1, a1, b1, c1, X[ 7], 9, k0); Subround(F, c1, d1, e1, a1, b1, X[ 8], 11, k0); Subround(F, b1, c1, d1, e1, a1, X[ 9], 13, k0); Subround(F, a1, b1, c1, d1, e1, X[10], 14, k0); Subround(F, e1, a1, b1, c1, d1, X[11], 15, k0); Subround(F, d1, e1, a1, b1, c1, X[12], 6, k0); Subround(F, c1, d1, e1, a1, b1, X[13], 7, k0); Subround(F, b1, c1, d1, e1, a1, X[14], 9, k0); Subround(F, a1, b1, c1, d1, e1, X[15], 8, k0); Subround(G, e1, a1, b1, c1, d1, X[ 7], 7, k1); Subround(G, d1, e1, a1, b1, c1, X[ 4], 6, k1); Subround(G, c1, d1, e1, a1, b1, X[13], 8, k1); Subround(G, b1, c1, d1, e1, a1, X[ 1], 13, k1); Subround(G, a1, b1, c1, d1, e1, X[10], 11, k1); Subround(G, e1, a1, b1, c1, d1, X[ 6], 9, k1); Subround(G, d1, e1, a1, b1, c1, X[15], 7, k1); Subround(G, c1, d1, e1, a1, b1, X[ 3], 15, k1); Subround(G, b1, c1, d1, e1, a1, X[12], 7, k1); Subround(G, a1, b1, c1, d1, e1, X[ 0], 12, k1); Subround(G, e1, a1, b1, c1, d1, X[ 9], 15, k1); Subround(G, d1, e1, a1, b1, c1, X[ 5], 9, k1); Subround(G, c1, d1, e1, a1, b1, X[ 2], 11, k1); Subround(G, b1, c1, d1, e1, a1, X[14], 7, k1); Subround(G, a1, b1, c1, d1, e1, X[11], 13, k1); Subround(G, e1, a1, b1, c1, d1, X[ 8], 12, k1); Subround(H, d1, e1, a1, b1, c1, X[ 3], 11, k2); Subround(H, c1, d1, e1, a1, b1, X[10], 13, k2); Subround(H, b1, c1, d1, e1, a1, X[14], 6, k2); Subround(H, a1, b1, c1, d1, e1, X[ 4], 7, k2); Subround(H, e1, a1, b1, c1, d1, X[ 9], 14, k2); Subround(H, d1, e1, a1, b1, c1, X[15], 9, k2); Subround(H, c1, d1, e1, a1, b1, X[ 8], 13, k2); Subround(H, b1, c1, d1, e1, a1, X[ 1], 15, k2); Subround(H, a1, b1, c1, d1, e1, X[ 2], 14, k2); Subround(H, e1, a1, b1, c1, d1, X[ 7], 8, k2); Subround(H, d1, e1, a1, b1, c1, X[ 0], 13, k2); Subround(H, c1, d1, e1, a1, b1, X[ 6], 6, k2); Subround(H, b1, c1, d1, e1, a1, X[13], 5, k2); Subround(H, a1, b1, c1, d1, e1, X[11], 12, k2); Subround(H, e1, a1, b1, c1, d1, X[ 5], 7, k2); Subround(H, d1, e1, a1, b1, c1, X[12], 5, k2); Subround(I, c1, d1, e1, a1, b1, X[ 1], 11, k3); Subround(I, b1, c1, d1, e1, a1, X[ 9], 12, k3); Subround(I, a1, b1, c1, d1, e1, X[11], 14, k3); Subround(I, e1, a1, b1, c1, d1, X[10], 15, k3); Subround(I, d1, e1, a1, b1, c1, X[ 0], 14, k3); Subround(I, c1, d1, e1, a1, b1, X[ 8], 15, k3); Subround(I, b1, c1, d1, e1, a1, X[12], 9, k3); Subround(I, a1, b1, c1, d1, e1, X[ 4], 8, k3); Subround(I, e1, a1, b1, c1, d1, X[13], 9, k3); Subround(I, d1, e1, a1, b1, c1, X[ 3], 14, k3); Subround(I, c1, d1, e1, a1, b1, X[ 7], 5, k3); Subround(I, b1, c1, d1, e1, a1, X[15], 6, k3); Subround(I, a1, b1, c1, d1, e1, X[14], 8, k3); Subround(I, e1, a1, b1, c1, d1, X[ 5], 6, k3); Subround(I, d1, e1, a1, b1, c1, X[ 6], 5, k3); Subround(I, c1, d1, e1, a1, b1, X[ 2], 12, k3); Subround(J, b1, c1, d1, e1, a1, X[ 4], 9, k4); Subround(J, a1, b1, c1, d1, e1, X[ 0], 15, k4); Subround(J, e1, a1, b1, c1, d1, X[ 5], 5, k4); Subround(J, d1, e1, a1, b1, c1, X[ 9], 11, k4); Subround(J, c1, d1, e1, a1, b1, X[ 7], 6, k4); Subround(J, b1, c1, d1, e1, a1, X[12], 8, k4); Subround(J, a1, b1, c1, d1, e1, X[ 2], 13, k4); Subround(J, e1, a1, b1, c1, d1, X[10], 12, k4); Subround(J, d1, e1, a1, b1, c1, X[14], 5, k4); Subround(J, c1, d1, e1, a1, b1, X[ 1], 12, k4); Subround(J, b1, c1, d1, e1, a1, X[ 3], 13, k4); Subround(J, a1, b1, c1, d1, e1, X[ 8], 14, k4); Subround(J, e1, a1, b1, c1, d1, X[11], 11, k4); Subround(J, d1, e1, a1, b1, c1, X[ 6], 8, k4); Subround(J, c1, d1, e1, a1, b1, X[15], 5, k4); Subround(J, b1, c1, d1, e1, a1, X[13], 6, k4); Subround(J, a2, b2, c2, d2, e2, X[ 5], 8, k5); Subround(J, e2, a2, b2, c2, d2, X[14], 9, k5); Subround(J, d2, e2, a2, b2, c2, X[ 7], 9, k5); Subround(J, c2, d2, e2, a2, b2, X[ 0], 11, k5); Subround(J, b2, c2, d2, e2, a2, X[ 9], 13, k5); Subround(J, a2, b2, c2, d2, e2, X[ 2], 15, k5); Subround(J, e2, a2, b2, c2, d2, X[11], 15, k5); Subround(J, d2, e2, a2, b2, c2, X[ 4], 5, k5); Subround(J, c2, d2, e2, a2, b2, X[13], 7, k5); Subround(J, b2, c2, d2, e2, a2, X[ 6], 7, k5); Subround(J, a2, b2, c2, d2, e2, X[15], 8, k5); Subround(J, e2, a2, b2, c2, d2, X[ 8], 11, k5); Subround(J, d2, e2, a2, b2, c2, X[ 1], 14, k5); Subround(J, c2, d2, e2, a2, b2, X[10], 14, k5); Subround(J, b2, c2, d2, e2, a2, X[ 3], 12, k5); Subround(J, a2, b2, c2, d2, e2, X[12], 6, k5); Subround(I, e2, a2, b2, c2, d2, X[ 6], 9, k6); Subround(I, d2, e2, a2, b2, c2, X[11], 13, k6); Subround(I, c2, d2, e2, a2, b2, X[ 3], 15, k6); Subround(I, b2, c2, d2, e2, a2, X[ 7], 7, k6); Subround(I, a2, b2, c2, d2, e2, X[ 0], 12, k6); Subround(I, e2, a2, b2, c2, d2, X[13], 8, k6); Subround(I, d2, e2, a2, b2, c2, X[ 5], 9, k6); Subround(I, c2, d2, e2, a2, b2, X[10], 11, k6); Subround(I, b2, c2, d2, e2, a2, X[14], 7, k6); Subround(I, a2, b2, c2, d2, e2, X[15], 7, k6); Subround(I, e2, a2, b2, c2, d2, X[ 8], 12, k6); Subround(I, d2, e2, a2, b2, c2, X[12], 7, k6); Subround(I, c2, d2, e2, a2, b2, X[ 4], 6, k6); Subround(I, b2, c2, d2, e2, a2, X[ 9], 15, k6); Subround(I, a2, b2, c2, d2, e2, X[ 1], 13, k6); Subround(I, e2, a2, b2, c2, d2, X[ 2], 11, k6); Subround(H, d2, e2, a2, b2, c2, X[15], 9, k7); Subround(H, c2, d2, e2, a2, b2, X[ 5], 7, k7); Subround(H, b2, c2, d2, e2, a2, X[ 1], 15, k7); Subround(H, a2, b2, c2, d2, e2, X[ 3], 11, k7); Subround(H, e2, a2, b2, c2, d2, X[ 7], 8, k7); Subround(H, d2, e2, a2, b2, c2, X[14], 6, k7); Subround(H, c2, d2, e2, a2, b2, X[ 6], 6, k7); Subround(H, b2, c2, d2, e2, a2, X[ 9], 14, k7); Subround(H, a2, b2, c2, d2, e2, X[11], 12, k7); Subround(H, e2, a2, b2, c2, d2, X[ 8], 13, k7); Subround(H, d2, e2, a2, b2, c2, X[12], 5, k7); Subround(H, c2, d2, e2, a2, b2, X[ 2], 14, k7); Subround(H, b2, c2, d2, e2, a2, X[10], 13, k7); Subround(H, a2, b2, c2, d2, e2, X[ 0], 13, k7); Subround(H, e2, a2, b2, c2, d2, X[ 4], 7, k7); Subround(H, d2, e2, a2, b2, c2, X[13], 5, k7); Subround(G, c2, d2, e2, a2, b2, X[ 8], 15, k8); Subround(G, b2, c2, d2, e2, a2, X[ 6], 5, k8); Subround(G, a2, b2, c2, d2, e2, X[ 4], 8, k8); Subround(G, e2, a2, b2, c2, d2, X[ 1], 11, k8); Subround(G, d2, e2, a2, b2, c2, X[ 3], 14, k8); Subround(G, c2, d2, e2, a2, b2, X[11], 14, k8); Subround(G, b2, c2, d2, e2, a2, X[15], 6, k8); Subround(G, a2, b2, c2, d2, e2, X[ 0], 14, k8); Subround(G, e2, a2, b2, c2, d2, X[ 5], 6, k8); Subround(G, d2, e2, a2, b2, c2, X[12], 9, k8); Subround(G, c2, d2, e2, a2, b2, X[ 2], 12, k8); Subround(G, b2, c2, d2, e2, a2, X[13], 9, k8); Subround(G, a2, b2, c2, d2, e2, X[ 9], 12, k8); Subround(G, e2, a2, b2, c2, d2, X[ 7], 5, k8); Subround(G, d2, e2, a2, b2, c2, X[10], 15, k8); Subround(G, c2, d2, e2, a2, b2, X[14], 8, k8); Subround(F, b2, c2, d2, e2, a2, X[12], 8, k9); Subround(F, a2, b2, c2, d2, e2, X[15], 5, k9); Subround(F, e2, a2, b2, c2, d2, X[10], 12, k9); Subround(F, d2, e2, a2, b2, c2, X[ 4], 9, k9); Subround(F, c2, d2, e2, a2, b2, X[ 1], 12, k9); Subround(F, b2, c2, d2, e2, a2, X[ 5], 5, k9); Subround(F, a2, b2, c2, d2, e2, X[ 8], 14, k9); Subround(F, e2, a2, b2, c2, d2, X[ 7], 6, k9); Subround(F, d2, e2, a2, b2, c2, X[ 6], 8, k9); Subround(F, c2, d2, e2, a2, b2, X[ 2], 13, k9); Subround(F, b2, c2, d2, e2, a2, X[13], 6, k9); Subround(F, a2, b2, c2, d2, e2, X[14], 5, k9); Subround(F, e2, a2, b2, c2, d2, X[ 0], 15, k9); Subround(F, d2, e2, a2, b2, c2, X[ 3], 13, k9); Subround(F, c2, d2, e2, a2, b2, X[ 9], 11, k9); Subround(F, b2, c2, d2, e2, a2, X[11], 11, k9); c1 = digest[1] + c1 + d2; digest[1] = digest[2] + d1 + e2; digest[2] = digest[3] + e1 + a2; digest[3] = digest[4] + a1 + b2; digest[4] = digest[0] + b1 + c2; digest[0] = c1; } #else // TC_MINIMIZE_CODE_SIZE /* Derived from source code of TrueCrypt 7.1a, which is Copyright (c) 2008-2012 TrueCrypt Developers Association and which is governed by the TrueCrypt License 3.0. Modifications and additions to the original source code (contained in this file) and all other portions of this file are Copyright (c) 2013-2017 IDRIX and are governed by the Apache License 2.0 the full text of which is contained in the file License.txt included in VeraCrypt binary and source code distribution packages. */ #pragma optimize ("tl", on) typedef unsigned __int32 uint32; typedef unsigned __int8 byte; #include #pragma intrinsic (_lrotl) static const byte OrderTab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8, 3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12, 1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2, 4, 0, 5, 9, 7, 12, 2, 10, 14, 1, 3, 8, 11, 6, 15, 13, 5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, 6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2, 15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13, 8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14, 12, 15, 10, 4, 1, 5, 8, 7, 6, 2, 13, 14, 0, 3, 9, 11 }; static const byte RolTab[] = { 11, 14, 15, 12, 5, 8, 7, 9, 11, 13, 14, 15, 6, 7, 9, 8, 7, 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12, 11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5, 11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12, 9, 15, 5, 11, 6, 8, 13, 12, 5, 12, 13, 14, 11, 8, 5, 6, 8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6, 9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11, 9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5, 15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8, 8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11 }; static const uint32 KTab[] = { 0x00000000UL, 0x5A827999UL, 0x6ED9EBA1UL, 0x8F1BBCDCUL, 0xA953FD4EUL, 0x50A28BE6UL, 0x5C4DD124UL, 0x6D703EF3UL, 0x7A6D76E9UL, 0x00000000UL }; void RMD160Transform (unsigned __int32 *state, const unsigned __int32 *data) { uint32 a, b, c, d, e; uint32 a2, b2, c2, d2, e2; byte pos; uint32 tmp; a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; for (pos = 0; pos < 160; ++pos) { tmp = a + data[OrderTab[pos]] + KTab[pos >> 4]; switch (pos >> 4) { case 0: case 9: tmp += F (b, c, d); break
/* cpu.c - written and placed in the public domain by Wei Dai */

#include "cpu.h"
#include "misc.h"

#ifndef EXCEPTION_EXECUTE_HANDLER
#define EXCEPTION_EXECUTE_HANDLER 1
#endif

#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#include <signal.h>
#include <setjmp.h>
#endif

#ifdef CRYPTOPP_CPUID_AVAILABLE

#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64

int CpuId(uint32 input, uint32 output[4])
{
	__cpuid((int *)output, input);
	return 1;
}

#else

#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY

#if defined(__cplusplus)
extern "C" {
#endif

typedef void (*SigHandler)(int);

static jmp_buf s_jmpNoCPUID;
static void SigIllHandlerCPUID(int p)
{
	longjmp(s_jmpNoCPUID, 1);
}

#if !defined (_UEFI) && ((defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER) || CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE)

static jmp_buf s_jmpNoAESNI;
static void SigIllHandlerAESNI(int p)
{
	longjmp(s_jmpNoAESNI, 1);
}

#endif

#if CRYPTOPP_BOOL_X64 == 0
static jmp_buf s_jmpNoSSE2;
static void SigIllHandlerSSE2(int p)
{
	longjmp(s_jmpNoSSE2, 1);
}
#endif

#if defined(__cplusplus)
}
#endif
#endif

int CpuId(uint32 input, uint32 output[4])
{
#ifdef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#ifndef _UEFI
    __try
	{
#endif
		__asm
		{
			mov eax, input
            mov ecx, 0
			cpuid
			mov edi, output
			mov [edi], eax
			mov [edi+4], ebx
			mov [edi+8], ecx
			mov [edi+12], edx
		}
#ifndef _UEFI
	 }
	 __except (EXCEPTION_EXECUTE_HANDLER)
	{
		return 0;
    }
#endif

	// function 0 returns the highest basic function understood in EAX
	if(input == 0)
        return !!output[0]? 1 : 0;

	return 1;
#else
	// longjmp and clobber warnings. Volatile is required.
	// http://github.com/weidai11/cryptopp/issues/24
	// http://stackoverflow.com/q/7721854
	volatile int result = 1;

	SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
	if (oldHandler == SIG_ERR)
		result = 0;

	if (setjmp(s_jmpNoCPUID))
		result = 0;
	else
	{
		asm volatile
		(
            // save ebx in case -fPIC is being used
            // TODO: this might need an early clobber on EDI.
#if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
            "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx"
#else
            "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
#endif
            : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
            : "a" (input), "c" (0)
         );
	}

	signal(SIGILL, oldHandler);
	return result;
#endif
}

#endif

static int TrySSE2()
{
#if CRYPTOPP_BOOL_X64
	return 1;
#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) && !defined(_UEFI)
	volatile int result = 1;
#if defined (TC_WINDOWS_DRIVER) && !defined (_WIN64)
	KFLOATING_SAVE floatingPointState;
	if (NT_SUCCESS (KeSaveFloatingPointState (&floatingPointState)))
	{
#endif
    __try
	{
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
        AS2(por xmm0, xmm0)        // executing SSE2 instruction
#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
		__m128i x = _mm_setzero_si128();
		result = _mm_cvtsi128_si32(x) == 0 ? 1 : 0;
#endif
	}
    __except (EXCEPTION_EXECUTE_HANDLER)
	{
		result = 0;
	}
#if defined (TC_WINDOWS_DRIVER) && !defined (_WIN64)
	KeRestoreFloatingPointState (&floatingPointState);
	}
	else
		return 0;
#endif
	return result;
#elif !defined(_UEFI)
	// longjmp and clobber warnings. Volatile is required.
	// http://github.com/weidai11/cryptopp/issues/24
	// http://stackoverflow.com/q/7721854
	volatile int result = 1;

	SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
	if (oldHandler == SIG_ERR)
		return 0;

	if (setjmp(s_jmpNoSSE2))
		result = 1;
	else
	{
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
		__asm __volatile ("por %xmm0, %xmm0");
#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
		__m128i x = _mm_setzero_si128();
		result = _mm_cvtsi128_si32(x) == 0? 1 : 0;
#endif
	}

	signal(SIGILL, oldHandler);
	return result;
#else
	return 1;
#endif
}

int g_x86DetectionDone = 0;
int g_hasISSE = 0, g_hasSSE2 = 0, g_hasSSSE3 = 0, g_hasMMX = 0, g_hasAESNI = 0, g_hasCLMUL = 0, g_isP4 = 0;
int g_hasAVX = 0, g_hasAVX2 = 0, g_hasBMI2 = 0, g_hasSSE42 = 0, g_hasSSE41 = 0;
uint32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;

VC_INLINE int IsIntel(const uint32 output[4])
{
	// This is the "GenuineIntel" string
	return (output[1] /*EBX*/ == 0x756e6547) &&
    (output[2] /*ECX*/ == 0x6c65746e) &&
    (output[3] /*EDX*/ == 0x49656e69);
}

VC_INLINE int IsAMD(const uint32 output[4])
{
	// This is the "AuthenticAMD" string
	return (output[1] /*EBX*/ == 0x68747541) &&
    (output[2] /*ECX*/ == 0x69746E65) &&
    (output[3] /*EDX*/ == 0x444D4163);
}

#if !defined (_UEFI) && ((defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER) || CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE)

static int TryAESNI ()
{
	volatile int result = 0;
#ifdef _MSC_VER
	__try
#else
	SigHandler oldHandler = signal(SIGILL, SigIllHandlerAESNI);
	if (oldHandler == SIG_ERR)
		return 0;

	if (setjmp(s_jmpNoAESNI))
		result = 0;
	else
#endif
	{
		__m128i block, subkey, ciphered;
		// perform AES round.
		block = _mm_setr_epi32(0x11223344,0x55667788,0x99AABBCC,0xDDEEFF00);
		subkey = _mm_setr_epi32(0xA5A5A5A5,0xA5A5A5A5,0x5A5A5A5A,0x5A5A5A5A);
		ciphered = _mm_aesenc_si128(block, subkey);
#ifdef _MSC_VER
		if (ciphered.m128i_u64[0] == LL(0x2f4654b9485061fa) && ciphered.m128i_u64[1] == LL(0xc8b51f1fe1256f99))
#else
		if (((uint64_t*)(&ciphered))[0] == LL(0x2f4654b9485061fa) && ((uint64_t*)(&ciphered))[1] == LL(0xc8b51f1fe1256f99))
#endif
			result = 1;
	}
#ifdef _MSC_VER
	__except (EXCEPTION_EXECUTE_HANDLER)
	{
		// ignore error if AES-NI not supported
	}
#else
	signal(SIGILL, oldHandler);
#endif
	
	return result;
}

static int Detect_MS_HyperV_AES ()
{
	int hasAesNI = 0;
	// when Hyper-V is enabled on older versions of Windows Server (i.e. 2008 R2), the AES-NI capability 
	// gets masked out for all applications, even running on the host.
	// We try to detect Hyper-V virtual CPU and perform a dummy AES-NI operation to check its real presence
	uint32 cpuid[4];
	char HvProductName[13];

	CpuId(0x40000000, cpuid);
	memcpy (HvProductName, &cpuid[1], 12);
	HvProductName[12] = 0;
	if (_stricmp(HvProductName, "Microsoft Hv") == 0)
	{
#if defined (TC_WINDOWS_DRIVER) && !defined (_WIN64)
		KFLOATING_SAVE floatingPointState;
		if (NT_SUCCESS (KeSaveFloatingPointState (&floatingPointState)))
		{
#endif
		hasAesNI = TryAESNI ();

#if defined (TC_WINDOWS_DRIVER) && !defined (_WIN64)
		KeRestoreFloatingPointState (&floatingPointState);
		}
#endif
	}

	return hasAesNI;
}

#endif

void DetectX86Features()
{
	uint32 cpuid[4] = {0}, cpuid1[4] = {0};
	if (!CpuId(0, cpuid))
		return;
	if (!CpuId(1, cpuid1))
		return;

	g_hasMMX = (cpuid1[3] & (1 << 23)) != 0;
	if ((cpuid1[3] & (1 << 26)) != 0)
		g_hasSSE2 = TrySSE2();
	g_hasAVX2 = g_hasSSE2 && (cpuid1[1] & (1 << 5));
	g_hasBMI2 = g_hasSSE2 && (cpuid1[1] & (1 << 8));
	g_hasAVX = g_hasSSE2 && (cpuid1[2] & (1 << 28));
	g_hasSSE42 = g_hasSSE2 && (cpuid1[2] & (1 << 20));
	g_hasSSE41 = g_hasSSE2 && (cpuid1[2] & (1 << 19));
	g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
	g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25));
	g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1));

#if !defined (_UEFI) && ((defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER) || CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE)
	// Hypervisor = bit 31 of ECX of CPUID leaf 0x1
	// reference: http://artemonsecurity.com/vmde.pdf
	if (!g_hasAESNI && (cpuid1[2] & (1<<31)))
	{
		g_hasAESNI = Detect_MS_HyperV_AES ();
	}
#endif

	if ((cpuid1[3] & (1 << 25)) != 0)
		g_hasISSE = 1;
	else
	{
		uint32 cpuid2[4];
		CpuId(0x080000000, cpuid2);
		if (cpuid2[0] >= 0x080000001)
		{
			CpuId(0x080000001, cpuid2);
			g_hasISSE = (cpuid2[3] & (1 << 22)) != 0;
		}
	}

	if (IsIntel(cpuid))
	{
		g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
		g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1);
	}
	else if (IsAMD(cpuid))
	{
		CpuId(0x80000005, cpuid);
		g_cacheLineSize = GETBYTE(cpuid[2], 0);
	}

	if (!g_cacheLineSize)
		g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;

	*((volatile int*)&g_x86DetectionDone) = 1;
}

int is_aes_hw_cpu_supported ()
{
	int bHasAESNI = 0;
	uint32 cpuid[4];

	if (CpuId(1, cpuid))
	{
		if (cpuid[2] & (1<<25))
			bHasAESNI = 1;
#if !defined (_UEFI) && ((defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER) || CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE)
		// Hypervisor = bit 31 of ECX of CPUID leaf 0x1
		// reference: http://artemonsecurity.com/vmde.pdf
		if (!bHasAESNI && (cpuid[2] & (1<<31)))
		{
			bHasAESNI = Detect_MS_HyperV_AES ();
		}
#endif
	}

	return bHasAESNI;
}

#endif