/* cpu.c - written and placed in the public domain by Wei Dai */

#include "cpu.h"
#include "misc.h"

#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#include <signal.h>
#include <setjmp.h>
#endif

#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
#include <emmintrin.h>
#endif

#ifdef CRYPTOPP_CPUID_AVAILABLE

#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64

int CpuId(uint32 input, uint32 *output)
{
	__cpuid((int *)output, input);
	return 1;
}

#else

#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY

#if defined(__cplusplus)
extern "C" {
#endif

typedef void (*SigHandler)(int);

static jmp_buf s_jmpNoCPUID;
static void SigIllHandlerCPUID(int p)
{
	longjmp(s_jmpNoCPUID, 1);
}

#if CRYPTOPP_BOOL_X64 == 0
static jmp_buf s_jmpNoSSE2;
static void SigIllHandlerSSE2(int p)
{
	longjmp(s_jmpNoSSE2, 1);
}
#endif
#if defined(__cplusplus)
}
#endif
#endif

int CpuId(uint32 input, uint32 *output)
{
#ifdef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
    __try
	{
		__asm
		{
			mov eax, input
			cpuid
			mov edi, output
			mov [edi], eax
			mov [edi+4], ebx
			mov [edi+8], ecx
			mov [edi+12], edx
		}
	}
    __except (1)
	{
		return 0;
    }
	return 1;
#else
	SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
	if (oldHandler == SIG_ERR)
		return 0;

	int result = 1;
	if (setjmp(s_jmpNoCPUID))
		result = 0;
	else
	{
		asm
		(
			// save ebx in case -fPIC is being used
#if CRYPTOPP_BOOL_X86
			"push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
#else
			"pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx"
#endif
			: "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
			: "a" (input)
		);
	}

	signal(SIGILL, oldHandler);
	return result;
#endif
}

#endif

static int TrySSE2()
{
#if CRYPTOPP_BOOL_X64
	return 1;
#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
    __try
	{
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
        AS2(por xmm0, xmm0)        // executing SSE2 instruction
#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
		__m128i x = _mm_setzero_si128();
		return _mm_cvtsi128_si32(x) == 0;
#endif
	}
    __except (1)
	{
		return 0;
    }
	return 1;
#else
	SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
	if (oldHandler == SIG_ERR)
		return 0;

	int result = 1;
	if (setjmp(s_jmpNoSSE2))
		result = 0;
	else
	{
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
		__asm __volatile ("por %xmm0, %xmm0");
#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
		__m128i x = _mm_setzero_si128();
		result = _mm_cvtsi128_si32(x) == 0;
#endif
	}

	signal(SIGILL, oldHandler);
	return result;
#endif
}

int g_x86DetectionDone = 0;
int g_hasISSE = 0, g_hasSSE2 = 0, g_hasSSSE3 = 0, g_hasMMX = 0, g_hasAESNI = 0, g_hasCLMUL = 0, g_isP4 = 0;
uint32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;

void DetectX86Features()
{
	uint32 cpuid[4], cpuid1[4], tmp;
	if (!CpuId(0, cpuid))
		return;
	if (!CpuId(1, cpuid1))
		return;

	g_hasMMX = (cpuid1[3] & (1 << 23)) != 0;
	if ((cpuid1[3] & (1 << 26)) != 0)
		g_hasSSE2 = TrySSE2();
	g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
	g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25));
	g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1));

	if ((cpuid1[3] & (1 << 25)) != 0)
		g_hasISSE = 1;
	else
	{
		uint32 cpuid2[4];
		CpuId(0x080000000, cpuid2);
		if (cpuid2[0] >= 0x080000001)
		{
			CpuId(0x080000001, cpuid2);
			g_hasISSE = (cpuid2[3] & (1 << 22)) != 0;
		}
	}

	tmp = cpuid[2];
	cpuid[2] = cpuid[3];
	cpuid[3] = tmp;
	if (memcmp(cpuid+1, "GenuineIntel", 12) == 0)
	{
		g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
		g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1);
	}
	else if (memcmp(cpuid+1, "AuthenticAMD", 12) == 0)
	{
		CpuId(0x80000005, cpuid);
		g_cacheLineSize = GETBYTE(cpuid[2], 0);
	}

	if (!g_cacheLineSize)
		g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;

	g_x86DetectionDone = 1;
}

#endif