VeraCrypt
aboutsummaryrefslogtreecommitdiff
path: root/doc/html
AgeCommit message (Expand)AuthorFilesLines
2023-07-23Documentation: Indicate that TrueCrypt support is dropped starting from versi...Mounir IDRASSI1-0/+3
2023-07-16HTML Documentation: Add Russian translation by Dmitry YerokhinMounir IDRASSI23-112/+1960
2023-07-11Update the Release Notes with URL related to the EMV project by the studentsMounir IDRASSI1-11/+11
2023-07-07Release Notes: Fix typoMounir IDRASSI1-2/+2
2023-07-05Updates on WinCompiling docs and script (#1146)Jertzukka1-8/+9
2023-07-02Fix typos in Release NotesMounir IDRASSI1-2/+2
2023-07-02Indicate in Release Notes that EMV support must be enabled in the settingsMounir IDRASSI1-0/+1
2023-07-02Update Release Notes with latest changes.Mounir IDRASSI1-1/+15
2023-06-29Linux: Add libpcsclite dependency into compilation guide (#1136)Jertzukka1-1/+23
2023-06-28Add EMV functionality (#1080)mmauv4-77/+325
2023-06-28Increment version to 1.26.3. Update Release NotesMounir IDRASSI1-1/+12
2023-06-28Documentation: Fix typosMounir IDRASSI2-2/+2
2023-06-13Documentation: Clarify various points in the documentationMounir IDRASSI3-9/+9
2023-06-09Documentation: Document difference between normal dismount and force dismountMounir IDRASSI2-0/+78
2023-06-09Update compilation doc to match website versionMounir IDRASSI3-75/+59
2023-06-06Update copyright on various filesMounir IDRASSI1-4/+5
2023-06-04Increment version to 1.26.2. Update Release NotesMounir IDRASSI1-2/+22
2022-12-11Documentation: remove usage of CSS collapsible to avoid breaking CHM generati...Mounir IDRASSI6-515/+254
2022-12-11Documentation: Add missing file Miscellaneous.htmlMounir IDRASSI1-0/+48
2022-12-11Revert "New sys enc wizard (#957)"Mounir IDRASSI1-48/+0
2022-08-25New sys enc wizard (#957)SysEncWizardPR957Felix Reichmann1-0/+48
2022-08-16Documentation overwork (#953)Felix Reichmann29-161/+1956
2022-03-31HTML Documentation: Add Russian translation by Dmitry YerokhinMounir IDRASSI183-0/+12985
2022-03-27Documentation: clearer description of how number of iterations are calculatedMounir IDRASSI2-3/+7
2022-03-22Update release notes for 1.26.0 and set its date to March 21st 2022Mounir IDRASSI1-0/+22
2022-03-21Update documentation to add Blake2s-256 and remove RIPEMD-160Mounir IDRASSI9-35/+33
2022-02-19Increment version to 1.25.9 and update Release Notes.Mounir IDRASSI1-1/+8
2022-02-16Update Release Notes and release dateMounir IDRASSI1-2/+3
2022-02-11Update Release Notes and set release date to February 10th.Mounir IDRASSI1-2/+4
2022-02-06Update Release Notes and add signed Windows driver.Mounir IDRASSI1-0/+6
2022-02-05Update Release NotesMounir IDRASSI1-0/+21
2022-01-07Increment version to 1.25.7 and update Release NotesMounir IDRASSI1-1/+7
2022-01-02Update documentation and Release Notes for restored Windows 7 support. Set 1....Mounir IDRASSI3-8/+23
2022-01-02Set 1.25.6 release date to January 1st 2022Mounir IDRASSI1-1/+1
2021-12-30Update Release Notes and add signed Windows driver.Mounir IDRASSI1-7/+12
2021-12-27Increment version to 1.25.6. Update Release Notes and add signed Windows driver.Mounir IDRASSI1-1/+2
2021-12-27Documentation: clarify that non-cascaded encryption algorithm for system encr...Mounir IDRASSI1-1/+1
2021-12-24MacOSX: Increment version to 1.24.5 and update release notes.Mounir IDRASSI1-1/+5
2021-12-20Increment version to 1.25.5 and update release notesMounir IDRASSI1-0/+14
2021-11-30Increment version to 1.25.4 and set release date to December 3rd 2021Mounir IDRASSI1-1/+1
2021-11-30Documentation: Put back Windows XP to the list of supported OSes following la...Mounir IDRASSI3-3/+6
2021-11-29Documentation: Update list of supported OSes. Update Release Notes.Mounir IDRASSI3-8/+8
2021-11-28Increment version to 1.25 (1.25.3)Mounir IDRASSI1-1/+1
2021-11-21Increment version to 1.25-RC2 and update release notesMounir IDRASSI102-106/+114
2021-09-05Update Release Notes for 1.25-RC1Mounir IDRASSI1-2/+16
2021-09-05Documentation: Drop official support of Mac OS X 10.7 Lion and Mac OS X 10.8 ...Mounir IDRASSI1-2/+2
2021-09-04Documentation: Drop Windows XP, Vista and 7 from list of supported OS. Add ma...Mounir IDRASSI2-12/+7
2021-09-04Minor cleanup of the repo (#822)a1346054103-294/+294
2021-08-16Increment version to 1.24-Update9-Beta-21-08-15Mounir IDRASSI1-1/+7
2021-08-15Windows: Increment version to 1.24.25.3 and update signed Windows driverMounir IDRASSI1-1/+1
> 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864
/*
This code is written by kerukuro for cppcrypto library (http://cppcrypto.sourceforge.net/)
and released into public domain.
*/

/* Modified for VeraCrypt with speed optimization for C implementation */

#include "Sha2.h"
#include "Common/Endian.h"
#include "Crypto/cpu.h"
#include "Crypto/misc.h"

#if defined(_UEFI) || defined(CRYPTOPP_DISABLE_ASM)
#define NO_OPTIMIZED_VERSIONS
#endif

#ifndef NO_OPTIMIZED_VERSIONS

#if defined(__cplusplus)
extern "C"
{
#endif
#if CRYPTOPP_BOOL_X64
	void sha512_rorx(const void* M, void* D, uint_64t l);
	void sha512_sse4(const void* M, uint_64t D[8], uint_64t l);
	void sha512_avx(const void* M, void* D, uint_64t l);
#endif
	
#if CRYPTOPP_BOOL_X64 || ((CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32) && !defined (TC_MACOSX))
	void sha512_compress_nayuki(uint_64t state[8], const uint_8t block[128]);
#endif
#if defined(__cplusplus)
}
#endif

#endif

typedef void (*transformFn)(sha512_ctx* ctx, void* m, uint_64t num_blks);

transformFn transfunc = NULL;

static const uint_64t K[80] = {
	LL(0x428a2f98d728ae22), LL(0x7137449123ef65cd), LL(0xb5c0fbcfec4d3b2f), LL(0xe9b5dba58189dbbc),
	LL(0x3956c25bf348b538), LL(0x59f111f1b605d019), LL(0x923f82a4af194f9b), LL(0xab1c5ed5da6d8118),
	LL(0xd807aa98a3030242), LL(0x12835b0145706fbe), LL(0x243185be4ee4b28c), LL(0x550c7dc3d5ffb4e2),
	LL(0x72be5d74f27b896f), LL(0x80deb1fe3b1696b1), LL(0x9bdc06a725c71235), LL(0xc19bf174cf692694),
	LL(0xe49b69c19ef14ad2), LL(0xefbe4786384f25e3), LL(0x0fc19dc68b8cd5b5), LL(0x240ca1cc77ac9c65),
	LL(0x2de92c6f592b0275), LL(0x4a7484aa6ea6e483), LL(0x5cb0a9dcbd41fbd4), LL(0x76f988da831153b5),
	LL(0x983e5152ee66dfab), LL(0xa831c66d2db43210), LL(0xb00327c898fb213f), LL(0xbf597fc7beef0ee4),
	LL(0xc6e00bf33da88fc2), LL(0xd5a79147930aa725), LL(0x06ca6351e003826f), LL(0x142929670a0e6e70),
	LL(0x27b70a8546d22ffc), LL(0x2e1b21385c26c926), LL(0x4d2c6dfc5ac42aed), LL(0x53380d139d95b3df),
	LL(0x650a73548baf63de), LL(0x766a0abb3c77b2a8), LL(0x81c2c92e47edaee6), LL(0x92722c851482353b),
	LL(0xa2bfe8a14cf10364), LL(0xa81a664bbc423001), LL(0xc24b8b70d0f89791), LL(0xc76c51a30654be30),
	LL(0xd192e819d6ef5218), LL(0xd69906245565a910), LL(0xf40e35855771202a), LL(0x106aa07032bbd1b8),
	LL(0x19a4c116b8d2d0c8), LL(0x1e376c085141ab53), LL(0x2748774cdf8eeb99), LL(0x34b0bcb5e19b48a8),
	LL(0x391c0cb3c5c95a63), LL(0x4ed8aa4ae3418acb), LL(0x5b9cca4f7763e373), LL(0x682e6ff3d6b2b8a3),
	LL(0x748f82ee5defb2fc), LL(0x78a5636f43172f60), LL(0x84c87814a1f0ab72), LL(0x8cc702081a6439ec),
	LL(0x90befffa23631e28), LL(0xa4506cebde82bde9), LL(0xbef9a3f7b2c67915), LL(0xc67178f2e372532b),
	LL(0xca273eceea26619c), LL(0xd186b8c721c0c207), LL(0xeada7dd6cde0eb1e), LL(0xf57d4f7fee6ed178),
	LL(0x06f067aa72176fba), LL(0x0a637dc5a2c898a6), LL(0x113f9804bef90dae), LL(0x1b710b35131c471b),
	LL(0x28db77f523047d84), LL(0x32caab7b40c72493), LL(0x3c9ebe0a15c9bebc), LL(0x431d67c49c100d4c),
	LL(0x4cc5d4becb3e42b6), LL(0x597f299cfc657e2a), LL(0x5fcb6fab3ad6faec), LL(0x6c44198c4a475817)
};


#define Ch(x,y,z)       ((z) ^ ((x) & ((y) ^ (z))))
#define Maj(x,y,z)      (((x) & (y)) | ((z) & ((x) ^ (y))))
#define sum0(x)			(rotr64((x), 28) ^ rotr64((x), 34) ^ rotr64((x), 39))
#define sum1(x)			(rotr64((x), 14) ^ rotr64((x), 18) ^ rotr64((x), 41))
#define sigma0(x)		(rotr64((x), 1) ^ rotr64((x), 8) ^ ((x) >> 7))
#define sigma1(x)		(rotr64((x), 19) ^ rotr64((x), 61) ^ ((x) >> 6))

#define WU(j) (W[j & 15] += sigma1(W[(j + 14) & 15]) + W[(j + 9) & 15] + sigma0(W[(j + 1) & 15]))

#define COMPRESS_ROUND(i, j, K) \
		   T1 = h + sum1(e) + Ch(e, f, g) + K[i + j] + (i? WU(j): W[j]); \
			T2 = sum0(a) + Maj(a, b, c); \
			h = g; \
			g = f; \
			f = e; \
			e = d + T1; \
			d = c; \
			c = b; \
			b = a; \
			a = T1 + T2;

void StdTransform(sha512_ctx* ctx, void* mp, uint_64t num_blks)
{
	uint_64t blk;
	for (blk = 0; blk < num_blks; blk++)
	{
		uint_64t W[16];
		uint_64t a,b,c,d,e,f,g,h;
		uint_64t T1, T2;
		int i;
#if defined (TC_WINDOWS_DRIVER) && defined (DEBUG)
		int	  j;
#endif

		for (i = 0; i < 128 / 8; i++)
		{
			W[i] = bswap_64((((const uint_64t*)(mp))[blk * 16 + i]));
		}

		a = ctx->hash[0];
		b = ctx->hash[1];
		c = ctx->hash[2];
		d = ctx->hash[3];
		e = ctx->hash[4];
		f = ctx->hash[5];
		g = ctx->hash[6];
		h = ctx->hash[7];

		for (i = 0; i <= 79; i+=16)
		{
#if defined (TC_WINDOWS_DRIVER) && defined (DEBUG)
			for (j = 0; j < 16; j++)
			{
				COMPRESS_ROUND(i, j, K);
			}
#else
			COMPRESS_ROUND(i, 0, K);
			COMPRESS_ROUND(i, 1, K);
			COMPRESS_ROUND(i , 2, K);
			COMPRESS_ROUND(i, 3, K);
			COMPRESS_ROUND(i, 4, K);
			COMPRESS_ROUND(i, 5, K);
			COMPRESS_ROUND(i, 6, K);
			COMPRESS_ROUND(i, 7, K);
			COMPRESS_ROUND(i, 8, K);
			COMPRESS_ROUND(i, 9, K);
			COMPRESS_ROUND(i, 10, K);
			COMPRESS_ROUND(i, 11, K);
			COMPRESS_ROUND(i, 12, K);
			COMPRESS_ROUND(i, 13, K);
			COMPRESS_ROUND(i, 14, K);
			COMPRESS_ROUND(i, 15, K);
#endif
		}
		ctx->hash[0] += a;
		ctx->hash[1] += b;
		ctx->hash[2] += c;
		ctx->hash[3] += d;
		ctx->hash[4] += e;
		ctx->hash[5] += f;
		ctx->hash[6] += g;
		ctx->hash[7] += h;
	}
}

#ifndef NO_OPTIMIZED_VERSIONS

#if CRYPTOPP_BOOL_X64
void Avx2Transform(sha512_ctx* ctx, void* mp, uint_64t num_blks)
{
	if (num_blks > 1)
		sha512_rorx(mp, ctx->hash, num_blks);
	else
		sha512_sse4(mp, ctx->hash, num_blks);
}

void AvxTransform(sha512_ctx* ctx, void* mp, uint_64t num_blks)
{
	if (num_blks > 1)
		sha512_avx(mp, ctx->hash, num_blks);
	else
		sha512_sse4(mp, ctx->hash, num_blks);
}

void SSE4Transform(sha512_ctx* ctx, void* mp, uint_64t num_blks)
{
	sha512_sse4(mp, ctx->hash, num_blks);
}
#endif

#if CRYPTOPP_BOOL_X64 || ((CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32) && !defined (TC_MACOSX))

void SSE2Transform(sha512_ctx* ctx, void* mp, uint_64t num_blks)
{
	uint_64t i;
	for (i = 0; i < num_blks; i++)
		sha512_compress_nayuki(ctx->hash, (uint_8t*)mp + i * 128);
}

#endif

#endif // NO_OPTIMIZED_VERSIONS

void sha512_begin(sha512_ctx* ctx)
{
	ctx->hash[0] = LL(0x6a09e667f3bcc908);
	ctx->hash[1] = LL(0xbb67ae8584caa73b);
	ctx->hash[2] = LL(0x3c6ef372fe94f82b);
	ctx->hash[3] = LL(0xa54ff53a5f1d36f1);
	ctx->hash[4] = LL(0x510e527fade682d1);
	ctx->hash[5] = LL(0x9b05688c2b3e6c1f);
	ctx->hash[6] = LL(0x1f83d9abfb41bd6b);
	ctx->hash[7] = LL(0x5be0cd19137e2179);
	ctx->count[0] = 0;
	ctx->count[1] = 0;

	if (!transfunc)
	{
#ifndef NO_OPTIMIZED_VERSIONS
#if CRYPTOPP_BOOL_X64
		if (g_isIntel&& HasSAVX2() && HasSBMI2())
			transfunc = Avx2Transform;
		else if (g_isIntel && HasSAVX())
		{
				transfunc = AvxTransform;
		}
		else if (HasSSE41())
		{
				transfunc = SSE4Transform;
		}
		else
#endif

#if CRYPTOPP_BOOL_X64 || ((CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32) && !defined (TC_MACOSX))
#if CRYPTOPP_BOOL_X64
		if (HasSSE2())
#else
		if (HasSSSE3() && HasMMX())
#endif
				transfunc = SSE2Transform;
		else
#endif

#endif
			transfunc = StdTransform;
	}
}

void sha512_end(unsigned char * result, sha512_ctx* ctx)
{
	int i;
	uint_64t mlen, pos = ctx->count[0];
	uint_8t* m = (uint_8t*) ctx->wbuf;
	m[pos++] = 0x80;
	if (pos > 112)
	{
		memset(m + pos, 0, (size_t) (128 - pos));
		transfunc(ctx, m, 1);
		pos = 0;
	}
	memset(m + pos, 0, (size_t) (128 - pos));
	mlen = bswap_64(ctx->count[1]);
	memcpy(m + (128 - 8), &mlen, 64 / 8);
	transfunc(ctx, m, 1);
	for (i = 0; i < 8; i++)
	{
		ctx->hash[i] = bswap_64(ctx->hash[i]);
	}
	memcpy(result, ctx->hash, 64);
}

void sha512_hash(const unsigned char * data, uint_64t len, sha512_ctx *ctx)
{
	uint_64t pos = ctx->count[0];
	uint_64t total = ctx->count[1];
	uint_8t* m = (uint_8t*) ctx->wbuf;
	if (pos && pos + len >= 128)
	{
		memcpy(m + pos, data, (size_t) (128 - pos));
		transfunc(ctx, m, 1);
		len -= 128 - pos;
		total += (128 - pos) * 8;
		data += 128 - pos;
		pos = 0;
	}
	if (len >= 128)
	{
		uint_64t blocks = len / 128;
		uint_64t bytes = blocks * 128;
		transfunc(ctx, (void*)data, blocks);
		len -= bytes;
		total += (bytes)* 8;
		data += bytes;
	}
	memcpy(m+pos, data, (size_t) (len));
	pos += len;
	total += len * 8;
	ctx->count[0] = pos;
	ctx->count[1] = total;
}

void sha512(unsigned char * result, const unsigned char* source, uint_64t sourceLen)
{
	sha512_ctx  ctx;

	sha512_begin(&ctx);
	sha512_hash(source, sourceLen, &ctx);
	sha512_end(result, &ctx);
}

/////////////////////////////

#ifndef NO_OPTIMIZED_VERSIONS

#if defined(__cplusplus)
extern "C"
{
#endif

#if CRYPTOPP_BOOL_X64
	void sha256_sse4(void *input_data, uint_32t digest[8], uint_64t num_blks);
	void sha256_rorx(void *input_data, uint_32t digest[8], uint_64t num_blks);
	void sha256_avx(void *input_data, uint_32t digest[8], uint_64t num_blks);
#endif

#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
	void sha256_compress_nayuki(uint_32t state[8], const uint_8t block[64]);
#endif

#if defined(__cplusplus)
}
#endif

#endif

CRYPTOPP_ALIGN_DATA(16) static const uint_32t SHA256_K[64] CRYPTOPP_SECTION_ALIGN16 = {
		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
		0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
		0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
		0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
		0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
		0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
		0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
		0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
	};

#if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE))

#ifdef _MSC_VER
# pragma warning(disable: 4100 4731)
#endif

static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(uint_32t *state, const uint_32t *data, size_t len)
{
	#define LOCALS_SIZE	8*4 + 16*4 + 4*WORD_SZ
	#define H(i)		[BASE+ASM_MOD(1024+7-(i),8)*4]
	#define G(i)		H(i+1)
	#define F(i)		H(i+2)
	#define E(i)		H(i+3)
	#define D(i)		H(i+4)
	#define C(i)		H(i+5)
	#define B(i)		H(i+6)
	#define A(i)		H(i+7)
	#define Wt(i)		BASE+8*4+ASM_MOD(1024+15-(i),16)*4
	#define Wt_2(i)		Wt((i)-2)
	#define Wt_15(i)	Wt((i)-15)
	#define Wt_7(i)		Wt((i)-7)
	#define K_END		[BASE+8*4+16*4+0*WORD_SZ]
	#define STATE_SAVE	[BASE+8*4+16*4+1*WORD_SZ]
	#define DATA_SAVE	[BASE+8*4+16*4+2*WORD_SZ]
	#define DATA_END	[BASE+8*4+16*4+3*WORD_SZ]
	#define Kt(i)		WORD_REG(si)+(i)*4
#if CRYPTOPP_BOOL_X32
	#define BASE		esp+8
#elif CRYPTOPP_BOOL_X86
	#define BASE		esp+4
#elif defined(__GNUC__)
	#define BASE		r8
#else
	#define BASE		rsp
#endif

#define RA0(i, edx, edi)		\
	AS2(	add edx, [Kt(i)]	)\
	AS2(	add edx, [Wt(i)]	)\
	AS2(	add edx, H(i)		)\

#define RA1(i, edx, edi)

#define RB0(i, edx, edi)

#define RB1(i, edx, edi)	\
	AS2(	mov AS_REG_7d, [Wt_2(i)]	)\
	AS2(	mov edi, [Wt_15(i)])\
	AS2(	mov ebx, AS_REG_7d	)\
	AS2(	shr AS_REG_7d, 10		)\
	AS2(	ror ebx, 17		)\
	AS2(	xor AS_REG_7d, ebx	)\
	AS2(	ror ebx, 2		)\
	AS2(	xor ebx, AS_REG_7d	)/* s1(W_t-2) */\
	AS2(	add ebx, [Wt_7(i)])\
	AS2(	mov AS_REG_7d, edi	)\
	AS2(	shr AS_REG_7d, 3		)\
	AS2(	ror edi, 7		)\
	AS2(	add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
	AS2(	xor AS_REG_7d, edi	)\
	AS2(	add edx, [Kt(i)])\
	AS2(	ror edi, 11		)\
	AS2(	add edx, H(i)	)\
	AS2(	xor AS_REG_7d, edi	)/* s0(W_t-15) */\
	AS2(	add AS_REG_7d, ebx	)/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
	AS2(	mov [Wt(i)], AS_REG_7d)\
	AS2(	add edx, AS_REG_7d	)\

#define ROUND(i, r, eax, ecx, edi, edx)\
	/* in: edi = E	*/\
	/* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
	AS2(	mov edx, F(i)	)\
	AS2(	xor edx, G(i)	)\
	AS2(	and edx, edi	)\
	AS2(	xor edx, G(i)	)/* Ch(E,F,G) = (G^(E&(F^G))) */\
	AS2(	mov AS_REG_7d, edi	)\
	AS2(	ror edi, 6		)\
	AS2(	ror AS_REG_7d, 25		)\
	RA##r(i, edx, edi		)/* H + Wt + Kt + Ch(E,F,G) */\
	AS2(	xor AS_REG_7d, edi	)\
	AS2(	ror edi, 5		)\
	AS2(	xor AS_REG_7d, edi	)/* S1(E) */\
	AS2(	add edx, AS_REG_7d	)/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
	RB##r(i, edx, edi		)/* H + Wt + Kt + Ch(E,F,G) */\
	/* in: ecx = A, eax = B^C, edx = T1 */\
	/* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
	AS2(	mov ebx, ecx	)\
	AS2(	xor ecx, B(i)	)/* A^B */\
	AS2(	and eax, ecx	)\
	AS2(	xor eax, B(i)	)/* Maj(A,B,C) = B^((A^B)&(B^C) */\
	AS2(	mov AS_REG_7d, ebx	)\
	AS2(	ror ebx, 2		)\
	AS2(	add eax, edx	)/* T1 + Maj(A,B,C) */\
	AS2(	add edx, D(i)	)\
	AS2(	mov D(i), edx	)\
	AS2(	ror AS_REG_7d, 22		)\
	AS2(	xor AS_REG_7d, ebx	)\
	AS2(	ror ebx, 11		)\
	AS2(	xor AS_REG_7d, ebx	)\
	AS2(	add eax, AS_REG_7d	)/* T1 + S0(A) + Maj(A,B,C) */\
	AS2(	mov H(i), eax	)\

// Unroll the use of CRYPTOPP_BOOL_X64 in assembler math. The GAS assembler on X32 (version 2.25)
//   complains "Error: invalid operands (*ABS* and *UND* sections) for `*` and `-`"
#if CRYPTOPP_BOOL_X64
#define SWAP_COPY(i)		\
	AS2(	mov		WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
	AS1(	bswap	WORD_REG(bx))\
	AS2(	mov		[Wt(i*2+1)], WORD_REG(bx))
#else // X86 and X32
#define SWAP_COPY(i)		\
	AS2(	mov		WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
	AS1(	bswap	WORD_REG(bx))\
	AS2(	mov		[Wt(i)], WORD_REG(bx))
#endif

#if defined(__GNUC__)
	#if CRYPTOPP_BOOL_X64
		CRYPTOPP_ALIGN_DATA(16) byte workspace[LOCALS_SIZE] ;
	#endif
	__asm__ __volatile__
	(
	#if CRYPTOPP_BOOL_X64
		"lea %4, %%r8;"
	#endif
	INTEL_NOPREFIX
#endif

#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
	#ifndef __GNUC__
		AS2(	mov		edi, [len])
		AS2(	lea		WORD_REG(si), [SHA256_K+48*4])
	#endif
	#if !defined(_MSC_VER) || (_MSC_VER < 1400)
		AS_PUSH_IF86(bx)
	#endif

	AS_PUSH_IF86(bp)
	AS2(	mov		ebx, esp)
	AS2(	and		esp, -16)
	AS2(	sub		WORD_REG(sp), LOCALS_SIZE)
	AS_PUSH_IF86(bx)
#endif
	AS2(	mov		STATE_SAVE, WORD_REG(cx))
	AS2(	mov		DATA_SAVE, WORD_REG(dx))
	AS2(	lea		WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
	AS2(	mov		DATA_END, WORD_REG(ax))
	AS2(	mov		K_END, WORD_REG(si))

#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
	AS2(	test	edi, 1)
	ASJ(	jnz,	2, f)
	AS1(	dec		DWORD PTR K_END)
#endif
	AS2(	movdqu	xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
	AS2(	movdqu	xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
#endif

#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
	ASJ(	jmp,	0, f)
#endif
	ASL(2)	// non-SSE2
	AS2(	mov		esi, ecx)
	AS2(	lea		edi, A(0))
	AS2(	mov		ecx, 8)
ATT_NOPREFIX
	AS1(	rep movsd)
INTEL_NOPREFIX
	AS2(	mov		esi, K_END)
	ASJ(	jmp,	3, f)
#endif

#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
	ASL(0)
	AS2(	movdqu	E(0), xmm1)
	AS2(	movdqu	A(0), xmm0)
#endif
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
	ASL(3)
#endif
	AS2(	sub		WORD_REG(si), 48*4)
	SWAP_COPY(0)	SWAP_COPY(1)	SWAP_COPY(2)	SWAP_COPY(3)
	SWAP_COPY(4)	SWAP_COPY(5)	SWAP_COPY(6)	SWAP_COPY(7)
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
	SWAP_COPY(8)	SWAP_COPY(9)	SWAP_COPY(10)	SWAP_COPY(11)
	SWAP_COPY(12)	SWAP_COPY(13)	SWAP_COPY(14)	SWAP_COPY(15)
#endif
	AS2(	mov		edi, E(0))	// E
	AS2(	mov		eax, B(0))	// B
	AS2(	xor		eax, C(0))	// B^C
	AS2(	mov		ecx, A(0))	// A

	ROUND(0, 0, eax, ecx, edi, edx)
	ROUND(1, 0, ecx, eax, edx, edi)
	ROUND(2, 0, eax, ecx, edi, edx)
	ROUND(3, 0, ecx, eax, edx, edi)
	ROUND(4, 0, eax, ecx, edi, edx)
	ROUND(5, 0, ecx, eax, edx, edi)
	ROUND(6, 0, eax, ecx, edi, edx)
	ROUND(7, 0, ecx, eax, edx, edi)
	ROUND(8, 0, eax, ecx, edi, edx)
	ROUND(9, 0, ecx, eax, edx, edi)
	ROUND(10, 0, eax, ecx, edi, edx)
	ROUND(11, 0, ecx, eax, edx, edi)
	ROUND(12, 0, eax, ecx, edi, edx)
	ROUND(13, 0, ecx, eax, edx, edi)
	ROUND(14, 0, eax, ecx, edi, edx)
	ROUND(15, 0, ecx, eax, edx, edi)

	ASL(1)
	AS2(add WORD_REG(si), 4*16)
	ROUND(0, 1, eax, ecx, edi, edx)
	ROUND(1, 1, ecx, eax, edx, edi)
	ROUND(2, 1, eax, ecx, edi, edx)
	ROUND(3, 1, ecx, eax, edx, edi)
	ROUND(4, 1, eax, ecx, edi, edx)
	ROUND(5, 1, ecx, eax, edx, edi)
	ROUND(6, 1, eax, ecx, edi, edx)
	ROUND(7, 1, ecx, eax, edx, edi)
	ROUND(8, 1, eax, ecx, edi, edx)
	ROUND(9, 1, ecx, eax, edx, edi)
	ROUND(10, 1, eax, ecx, edi, edx)
	ROUND(11, 1, ecx, eax, edx, edi)
	ROUND(12, 1, eax, ecx, edi, edx)
	ROUND(13, 1, ecx, eax, edx, edi)
	ROUND(14, 1, eax, ecx, edi, edx)
	ROUND(15, 1, ecx, eax, edx, edi)
	AS2(	cmp		WORD_REG(si), K_END)
	ATT_NOPREFIX
	ASJ(	jb,		1, b)
	INTEL_NOPREFIX

	AS2(	mov		WORD_REG(dx), DATA_SAVE)
	AS2(	add		WORD_REG(dx), 64)
	AS2(	mov		AS_REG_7, STATE_SAVE)
	AS2(	mov		DATA_SAVE, WORD_REG(dx))

#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
	AS2(	test	DWORD PTR K_END, 1)
	ASJ(	jz,		4, f)
#endif
	AS2(	movdqu	xmm1, XMMWORD_PTR [AS_REG_7+1*16])
	AS2(	movdqu	xmm0, XMMWORD_PTR [AS_REG_7+0*16])
	AS2(	paddd	xmm1, E(0))
	AS2(	paddd	xmm0, A(0))
	AS2(	movdqu	[AS_REG_7+1*16], xmm1)
	AS2(	movdqu	[AS_REG_7+0*16], xmm0)
	AS2(	cmp		WORD_REG(dx), DATA_END)
	ATT_NOPREFIX
	ASJ(	jb,		0, b)
	INTEL_NOPREFIX
#endif

#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
	ASJ(	jmp,	5, f)
	ASL(4)	// non-SSE2
#endif
	AS2(	add		[AS_REG_7+0*4], ecx)	// A
	AS2(	add		[AS_REG_7+4*4], edi)	// E
	AS2(	mov		eax, B(0))
	AS2(	mov		ebx, C(0))
	AS2(	mov		ecx, D(0))
	AS2(	add		[AS_REG_7+1*4], eax)
	AS2(	add		[AS_REG_7+2*4], ebx)
	AS2(	add		[AS_REG_7+3*4], ecx)
	AS2(	mov		eax, F(0))
	AS2(	mov		ebx, G(0))
	AS2(	mov		ecx, H(0))
	AS2(	add		[AS_REG_7+5*4], eax)
	AS2(	add		[AS_REG_7+6*4], ebx)
	AS2(	add		[AS_REG_7+7*4], ecx)
	AS2(	mov		ecx, AS_REG_7d)
	AS2(	cmp		WORD_REG(dx), DATA_END)
	ASJ(	jb,		2, b)
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
	ASL(5)
#endif
#endif

	AS_POP_IF86(sp)
	AS_POP_IF86(bp)
	#if !defined(_MSC_VER) || (_MSC_VER < 1400)
		AS_POP_IF86(bx)
	#endif

#ifdef __GNUC__
	ATT_PREFIX
	:
	: "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
	#if CRYPTOPP_BOOL_X64
		, "m" (workspace[0])
	#endif
	: "memory", "cc", "%eax"
	#if CRYPTOPP_BOOL_X64
		, "%rbx", "%r8", "%r10"
	#endif
	);
#endif
}

#endif	// (defined(CRYPTOPP_X86_ASM_AVAILABLE))

#undef sum0
#undef sum1
#undef sigma0
#undef sigma1

#define sum0(x)		(rotr32((x), 2) ^ rotr32((x), 13) ^ rotr32((x), 22))
#define sum1(x)		(rotr32((x), 6) ^ rotr32((x), 11) ^ rotr32((x), 25))
#define sigma0(x)	(rotr32((x), 7) ^ rotr32((x), 18) ^ ((x) >> 3))
#define sigma1(x)	(rotr32((x), 17) ^ rotr32((x), 19) ^ ((x) >> 10))


typedef void (*sha256transformFn)(sha256_ctx* ctx, void* m, uint_64t num_blks);

sha256transformFn sha256transfunc = NULL;

void StdSha256Transform(sha256_ctx* ctx, void* mp, uint_64t num_blks)
{
	uint_64t blk;
	for (blk = 0; blk < num_blks; blk++)
	{
		uint_32t W[16];
		uint_32t a,b,c,d,e,f,g,h;
		uint_32t T1, T2;
		int i;
#if defined (TC_WINDOWS_DRIVER) && defined (DEBUG)
		int	  j;
#endif

		for (i = 0; i < 64 / 4; i++)
		{
			W[i] = bswap_32((((const uint_32t*)(mp))[blk * 16 + i]));
		}

		a = ctx->hash[0];
		b = ctx->hash[1];
		c = ctx->hash[2];
		d = ctx->hash[3];
		e = ctx->hash[4];
		f = ctx->hash[5];
		g = ctx->hash[6];
		h = ctx->hash[7];

		for (i = 0; i <= 63; i+=16)
		{
#if defined (TC_WINDOWS_DRIVER) && defined (DEBUG)
			for (j = 0; j < 16; j++)
			{
				COMPRESS_ROUND(i, j, SHA256_K);
			}
#else
			COMPRESS_ROUND(i, 0, SHA256_K);
			COMPRESS_ROUND(i, 1, SHA256_K);
			COMPRESS_ROUND(i , 2, SHA256_K);
			COMPRESS_ROUND(i, 3, SHA256_K);
			COMPRESS_ROUND(i, 4, SHA256_K);
			COMPRESS_ROUND(i, 5, SHA256_K);
			COMPRESS_ROUND(i, 6, SHA256_K);
			COMPRESS_ROUND(i, 7, SHA256_K);
			COMPRESS_ROUND(i, 8, SHA256_K);
			COMPRESS_ROUND(i, 9, SHA256_K);
			COMPRESS_ROUND(i, 10, SHA256_K);
			COMPRESS_ROUND(i, 11, SHA256_K);
			COMPRESS_ROUND(i, 12, SHA256_K);
			COMPRESS_ROUND(i, 13, SHA256_K);
			COMPRESS_ROUND(i, 14, SHA256_K);
			COMPRESS_ROUND(i, 15, SHA256_K);
#endif
		}
		ctx->hash[0] += a;
		ctx->hash[1] += b;
		ctx->hash[2] += c;
		ctx->hash[3] += d;
		ctx->hash[4] += e;
		ctx->hash[5] += f;
		ctx->hash[6] += g;
		ctx->hash[7] += h;
	}
}

#ifndef NO_OPTIMIZED_VERSIONS

#if CRYPTOPP_BOOL_X64
void Avx2Sha256Transform(sha256_ctx* ctx, void* mp, uint_64t num_blks)
{
	if (num_blks > 1)
		sha256_rorx(mp, ctx->hash, num_blks);
	else
		sha256_sse4(mp, ctx->hash, num_blks);
}

void AvxSha256Transform(sha256_ctx* ctx, void* mp, uint_64t num_blks)
{
	if (num_blks > 1)
		sha256_avx(mp, ctx->hash, num_blks);
	else
		sha256_sse4(mp, ctx->hash, num_blks);
}

void SSE4Sha256Transform(sha256_ctx* ctx, void* mp, uint_64t num_blks)
{
	sha256_sse4(mp, ctx->hash, num_blks);
}

#endif

#if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE))
void SSE2Sha256Transform(sha256_ctx* ctx, void* mp, uint_64t num_blks)
{
	X86_SHA256_HashBlocks(ctx->hash, (const uint_32t*)mp, (size_t)(num_blks * 64));
}
#endif

#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
void Sha256AsmTransform(sha256_ctx* ctx, void* mp, uint_64t num_blks)
{
	uint_64t i;
	for (i = 0; i < num_blks; i++)
		sha256_compress_nayuki(ctx->hash, (uint_8t*)mp + i * 64);
}
#endif

#endif

void sha256_begin(sha256_ctx* ctx)
{
	ctx->hash[0] = 0x6a09e667;
	ctx->hash[1] = 0xbb67ae85;
	ctx->hash[2] = 0x3c6ef372;
	ctx->hash[3] = 0xa54ff53a;
	ctx->hash[4] = 0x510e527f;
	ctx->hash[5] = 0x9b05688c;
	ctx->hash[6] = 0x1f83d9ab;
	ctx->hash[7] = 0x5be0cd19;
	ctx->count[0] = 0;
	ctx->count[1] = 0;

	if (!sha256transfunc)
	{
#ifndef NO_OPTIMIZED_VERSIONS
#if CRYPTOPP_BOOL_X64
		if (g_isIntel && HasSAVX2() && HasSBMI2())
			sha256transfunc = Avx2Sha256Transform;
		else if (g_isIntel && HasSAVX())
				sha256transfunc = AvxSha256Transform;
		else if (HasSSE41())
				sha256transfunc = SSE4Sha256Transform;
		else
#endif

#if (defined(CRYPTOPP_X86_ASM_AVAILABLE) || defined(CRYPTOPP_X32_ASM_AVAILABLE))
		if (HasSSE2 ())
			sha256transfunc = SSE2Sha256Transform;
		else
#endif

#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
			sha256transfunc = Sha256AsmTransform;
#else
			sha256transfunc = StdSha256Transform;
#endif
#else
		sha256transfunc = StdSha256Transform;
#endif
	}
}

void sha256_end(unsigned char * result, sha256_ctx* ctx)
{
	int i;
	uint_64t mlen, pos = ctx->count[0];
	uint_8t* m = (uint_8t*) ctx->wbuf;
	m[pos++] = 0x80;
	if (pos > 56)
	{
		memset(m + pos, 0, (size_t) (64 - pos));
		sha256transfunc(ctx, m, 1);
		pos = 0;
	}
	memset(m + pos, 0, (size_t) (56 - pos));
	mlen = bswap_64((uint_64t) ctx->count[1]);
	memcpy(m + (64 - 8), &mlen, 64 / 8);
	sha256transfunc(ctx, m, 1);
	for (i = 0; i < 8; i++)
	{
		ctx->hash[i] = bswap_32(ctx->hash[i]);
	}
	memcpy(result, ctx->hash, 32);
}

void sha256_hash(const unsigned char * data, uint_32t len, sha256_ctx *ctx)
{
	uint_32t pos = ctx->count[0];
	uint_32t total = ctx->count[1];
	uint_8t* m = (uint_8t*) ctx->wbuf;
	if (pos && pos + len >= 64)
	{
		memcpy(m + pos, data, 64 - pos);
		sha256transfunc(ctx, m, 1);
		len -= 64 - pos;
		total += (64 - pos) * 8;
		data += 64 - pos;
		pos = 0;
	}
	if (len >= 64)
	{
		uint_32t blocks = len / 64;
		uint_32t bytes = blocks * 64;
		sha256transfunc(ctx, (void*)data, blocks);
		len -= bytes;
		total += (bytes)* 8;
		data += bytes;
	}
	memcpy(m+pos, data, len);
	pos += len;
	total += len * 8;
	ctx->count[0] = pos;
	ctx->count[1] = total;
}

void sha256(unsigned char * result, const unsigned char* source, uint_32t sourceLen)
{
	sha256_ctx  ctx;

	sha256_begin(&ctx);
	sha256_hash(source, sourceLen, &ctx);
	sha256_end(result, &ctx);
}