VeraCrypt
aboutsummaryrefslogtreecommitdiff
path: root/src/Boot/Windows/Makefile
blob: 737fbe5f5e6291e110cec0ecf8cabc0e78cc7a89 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#
# Copyright (c) 2008-2010 TrueCrypt Developers Association. All rights reserved.
#
# Governed by the TrueCrypt License 3.0 the full text of which is contained in
# the file License.txt included in TrueCrypt binary and source code distribution
# packages.
#

PROJ = BootLoader
.SILENT:

!ifndef MSVC16_ROOT
!error Environment variable MSVC16_ROOT must point to the installation directory of MS Visual C++ 1.5
!endif

ENVPATH = $(PATH)

CC = $(MSVC16_ROOT)\bin\cl.exe
LD = $(MSVC16_ROOT)\bin\link.exe

AFLAGS = /nologo /omf

CFLAGS = /nologo /W3 /Fc /I "$(MSVC16_ROOT)\Include" /I"..\..\.." /I"..\..\..\Common" /I"..\..\..\Crypto"
CFLAGS = $(CFLAGS) /D __int8=char /D __int16=int /D __int32=long /D BOOL=char /D FALSE=0 /D TRUE=1
CFLAGS = $(CFLAGS) /D LITTLE_ENDIAN=1234 /D BYTE_ORDER=1234 /D TC_WINDOWS_BOOT /D TC_MINIMIZE_CODE_SIZE /D TC_NO_COMPILER_INT64
CFLAGS = $(CFLAGS) /D malloc=malloc_NA

LFLAGS = /NOLOGO /ONERROR:NOEXE /NOI /BATCH

OBJDIR = Release

!ifdef RESCUE_DISK
OBJDIR = Rescue
CFLAGS = $(CFLAGS) /D TC_WINDOWS_BOOT_RESCUE_DISK_MODE
!endif

!ifdef SINGLE_CIPHER
OBJDIR = $(OBJDIR)_$(SINGLE_CIPHER)
CFLAGS = $(CFLAGS) /D TC_WINDOWS_BOOT_SINGLE_CIPHER_MODE /D TC_WINDOWS_BOOT_$(SINGLE_CIPHER)
!endif

OUTDIR = $(OBJDIR)
TARGETEXT = com
TARGETS = $(OUTDIR)\BootDefs.i $(OUTDIR)\BootSector.bin $(OUTDIR)\Decompressor.com
CFLAGS = $(CFLAGS) /AT /Zl /f- /G3 /Oe /Os /Ob1 /OV0 /Gs /Gf /Gy /D NDEBUG
LFLAGS = $(LFLAGS) /NOD /NOE /TINY
OBJS = $(OUTDIR)\BootCrt.obj
LIBS = slibce

!if 1
SRCDIR = ..
!else
SRCDIR = $(MAKEDIR)
!endif

TARGETS = $(TARGETS) $(OUTDIR)\$(PROJ).$(TARGETEXT)

OBJS = $(OBJS) $(OUTDIR)\BootConfig.obj
OBJS = $(OBJS) $(OUTDIR)\BootConsoleIo.obj
OBJS = $(OBJS) $(OUTDIR)\BootDebug.obj
OBJS = $(OBJS) $(OUTDIR)\BootDiskIo.obj
OBJS = $(OBJS) $(OUTDIR)\BootEncryptedIo.obj
OBJS = $(OBJS) $(OUTDIR)\BootMain.obj
OBJS = $(OBJS) $(OUTDIR)\BootMemory.obj
OBJS = $(OBJS) $(OUTDIR)\IntFilter.obj
OBJS = $(OBJS) $(OUTDIR)\Platform.obj

OBJS = $(OBJS) $(OUTDIR)\Crc.obj
OBJS = $(OBJS) $(OUTDIR)\Crypto.obj
OBJS = $(OBJS) $(OUTDIR)\Endian.obj
OBJS = $(OBJS) $(OUTDIR)\Pkcs5.obj
OBJS = $(OBJS) $(OUTDIR)\Volumes.obj
OBJS = $(OBJS) $(OUTDIR)\Xts.obj

OBJS = $(OBJS) $(OUTDIR)\Rmd160.obj

!if !DEFINED (SINGLE_CIPHER)
OBJS = $(OBJS) $(OUTDIR)\AesSmall.obj
!else if "$(SINGLE_CIPHER)" == "AES"
OBJS = $(OBJS) $(OUTDIR)\Aes_hw_cpu.obj
OBJS = $(OBJS) $(OUTDIR)\AesSmall_x86.obj
OBJS = $(OBJS) $(OUTDIR)\Aestab.obj
!endif

!if !DEFINED (SINGLE_CIPHER) || "$(SINGLE_CIPHER)" == "SERPENT"
OBJS = $(OBJS) $(OUTDIR)\Serpent.obj
!endif

!if !DEFINED (SINGLE_CIPHER) || "$(SINGLE_CIPHER)" == "TWOFISH"
OBJS = $(OBJS) $(OUTDIR)\Twofish.obj
!endif


all: env $(TARGETS)

env:
	set INCLUDE=.
	set LIB=.
	set LIBPATH=.
	
clean:
	-del /q /s $(OBJDIR) >NUL:


.asm{$(OUTDIR)}.obj:
	cd $(OBJDIR)
	$(AS) $(AFLAGS) /c "$(SRCDIR)\$<"
	cd ..

{..\..\Crypto}.asm{$(OUTDIR)}.obj:
	cd $(OBJDIR)
	echo $(<F)
	nasm.exe -Xvc -f obj -Ox -o "$(<B).obj" -l "$(<B).lst" "$(SRCDIR)\$<"
	cd ..

{..\..\Crypto}.c{$(OUTDIR)}.obj:
	cd $(OBJDIR)
	set PATH=.
	$(CC) $(CFLAGS) /c "$(SRCDIR)\$<"
	set PATH=$(ENVPATH)
	cd ..
	
{..\..\Common}.c{$(OUTDIR)}.obj:
	cd $(OBJDIR)
	set PATH=.
	$(CC) $(CFLAGS) /c "$(SRCDIR)\$<"
	set PATH=$(ENVPATH)
	cd ..

.c{$(OUTDIR)}.obj:
	cd $(OBJDIR)
	set PATH=.
	$(CC) $(CFLAGS) /c "$(SRCDIR)\$<"
	set PATH=$(ENVPATH)
	cd ..
			
.cpp{$(OUTDIR)}.obj:
	cd $(OBJDIR)
	set PATH=.
	$(CC) $(CFLAGS) /c "$(SRCDIR)\$<"
	set PATH=$(ENVPATH)
	cd ..

$(OUTDIR)\BootDefs.i: BootDefs.h
	cd $(OBJDIR)
	set PATH=.
	$(CC) $(CFLAGS) /D TC_ASM_PREPROCESS /P /EP "$(SRCDIR)\BootDefs.h"
	set PATH=$(ENVPATH)
	cd ..
	
$(OUTDIR)\BootSector.bin: $(OUTDIR)\BootSector.obj
	cd $(OBJDIR)
	$(LD) $(LFLAGS) BootSector.obj,BootSector.bin,,,, >NUL:
	-dd.exe conv=notrunc bs=512 if=BootSector.bin of=$(PROJ).flp 2>NUL:
	cd ..

$(OUTDIR)\Decompressor.com: $(OUTDIR)\BootCrt.obj $(OUTDIR)\Decompressor.obj
	cd $(OBJDIR)
	$(LD) $(LFLAGS) BootCrt.obj Decompressor.obj,Decompressor.com,Decompressor.map,$(MSVC16_ROOT)\lib\+slibce,,
	-dd.exe conv=notrunc,sync bs=512 seek=1 if=Decompressor.com of=$(PROJ).flp 2>NUL:
	cd ..

$(OUTDIR)\$(PROJ).$(TARGETEXT): $(OBJS)
	@echo Linking...
	cd $(OBJDIR)
	
	echo >NUL: @<<$(PROJ).crf2

$(PROJ).$(TARGETEXT)
$(PROJ).map
$(MSVC16_ROOT)\lib\+
$(LIBS)
;
<<
	del $(PROJ).crf >NUL: 2>NUL:
	for %F in ($(**F)) do @echo %F + >>$(PROJ).crf
	type $(PROJ).crf2 >>$(PROJ).crf
	
	$(LD) $(LFLAGS) @$(PROJ).crf
	del $(PROJ).crf $(PROJ).crf2

	gzip.exe -c -n --best $(PROJ).$(TARGETEXT) >$(PROJ).$(TARGETEXT).gz
	-dd.exe conv=notrunc,sync bs=512 seek=5 if=$(PROJ).$(TARGETEXT).gz of=$(PROJ).flp 2>NUL:
	cd ..
ong */ .highlight .gu { color: #666666 } /* Generic.Subheading */ .highlight .gt { color: #aa0000 } /* Generic.Traceback */ .highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */ .highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */ .highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */ .highlight .kp { color: #008800 } /* Keyword.Pseudo */ .highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */ .highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */ .highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */ .highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */ .highlight .na { color: #336699 } /* Name.Attribute */ .highlight .nb { color: #003388 } /* Name.Builtin */ .highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */ .highlight .no { color: #003366; font-weight: bold } /* Name.Constant */ .highlight .nd { color: #555555 } /* Name.Decorator */ .highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */ .highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */ .highlight .nl { color: #336699; font-style: italic } /* Name.Label */ .highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */ .highlight .py { color: #336699; font-weight: bold } /* Name.Property */ .highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */ .highlight .nv { color: #336699 } /* Name.Variable */ .highlight .ow { color: #008800 } /* Operator.Word */ .highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
; ---------------------------------------------------------------------------
; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
;
; LICENSE TERMS
;
; The free distribution and use of this software is allowed (with or without
; changes) provided that:
;
;  1. source code distributions include the above copyright notice, this
;     list of conditions and the following disclaimer;
;
;  2. binary distributions include the above copyright notice, this list
;     of conditions and the following disclaimer in their documentation;
;
;  3. the name of the copyright holder is not used to endorse products
;     built using this software without specific written permission.
;
; DISCLAIMER
;
; This software is provided 'as is' with no explicit or implied warranties
; in respect of its properties, including, but not limited to, correctness
; and/or fitness for purpose.
; ---------------------------------------------------------------------------
; Issue 20/12/2007
;
; I am grateful to Dag Arne Osvik for many discussions of the techniques that
; can be used to optimise AES assembler code on AMD64/EM64T architectures.
; Some of the techniques used in this implementation are the result of
; suggestions made by him for which I am most grateful.

;
; Adapted for TrueCrypt:
; - Compatibility with NASM
;

; An AES implementation for AMD64 processors using the YASM assembler.  This
; implemetation provides only encryption, decryption and hence requires key
; scheduling support in C. It uses 8k bytes of tables but its encryption and
; decryption performance is very close to that obtained using large tables.
; It can use either Windows or Gnu/Linux calling conventions, which are as
; follows:
;               windows  gnu/linux
;
;   in_blk          rcx     rdi
;   out_blk         rdx     rsi
;   context (cx)     r8     rdx
;
;   preserved       rsi      -    + rbx, rbp, rsp, r12, r13, r14 & r15
;   registers       rdi      -      on both
;
;   destroyed        -      rsi   + rax, rcx, rdx, r8, r9, r10 & r11
;   registers        -      rdi     on both
;
; The default convention is that for windows, the gnu/linux convention being
; used if __GNUC__ is defined.
;
; Define _SEH_ to include support for Win64 structured exception handling
; (this requires YASM version 0.6 or later).
;
; This code provides the standard AES block size (128 bits, 16 bytes) and the
; three standard AES key sizes (128, 192 and 256 bits). It has the same call
; interface as my C implementation.  It uses the Microsoft C AMD64 calling
; conventions in which the three parameters are placed in  rcx, rdx and r8
; respectively.  The rbx, rsi, rdi, rbp and r12..r15 registers are preserved.
;
;     AES_RETURN aes_encrypt(const unsigned char in_blk[],
;                   unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
;
;     AES_RETURN aes_decrypt(const unsigned char in_blk[],
;                   unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
;
;     AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
;                                            const aes_encrypt_ctx cx[1]);
;
;     AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
;                                            const aes_decrypt_ctx cx[1]);
;
;     AES_RETURN aes_encrypt_key(const unsigned char key[],
;                           unsigned int len, const aes_decrypt_ctx cx[1]);
;
;     AES_RETURN aes_decrypt_key(const unsigned char key[],
;                           unsigned int len, const aes_decrypt_ctx cx[1]);
;
; where <NNN> is 128, 102 or 256.  In the last two calls the length can be in
; either bits or bytes.
;
; Comment in/out the following lines to obtain the desired subroutines. These
; selections MUST match those in the C header file aes.h

; %define AES_128                 ; define if AES with 128 bit keys is needed
; %define AES_192                 ; define if AES with 192 bit keys is needed
%define AES_256                 ; define if AES with 256 bit keys is needed
; %define AES_VAR                 ; define if a variable key size is needed
%define ENCRYPTION              ; define if encryption is needed
%define DECRYPTION              ; define if decryption is needed
%define AES_REV_DKS             ; define if key decryption schedule is reversed
%define LAST_ROUND_TABLES       ; define for the faster version using extra tables

; The encryption key schedule has the following in memory layout where N is the
; number of rounds (10, 12 or 14):
;
; lo: | input key (round 0)  |  ; each round is four 32-bit words
;     | encryption round 1   |
;     | encryption round 2   |
;     ....
;     | encryption round N-1 |
; hi: | encryption round N   |
;
; The decryption key schedule is normally set up so that it has the same
; layout as above by actually reversing the order of the encryption key
; schedule in memory (this happens when AES_REV_DKS is set):
;
; lo: | decryption round 0   | =              | encryption round N   |
;     | decryption round 1   | = INV_MIX_COL[ | encryption round N-1 | ]
;     | decryption round 2   | = INV_MIX_COL[ | encryption round N-2 | ]
;     ....                       ....
;     | decryption round N-1 | = INV_MIX_COL[ | encryption round 1   | ]
; hi: | decryption round N   | =              | input key (round 0)  |
;
; with rounds except the first and last modified using inv_mix_column()
; But if AES_REV_DKS is NOT set the order of keys is left as it is for
; encryption so that it has to be accessed in reverse when used for
; decryption (although the inverse mix column modifications are done)
;
; lo: | decryption round 0   | =              | input key (round 0)  |
;     | decryption round 1   | = INV_MIX_COL[ | encryption round 1   | ]
;     | decryption round 2   | = INV_MIX_COL[ | encryption round 2   | ]
;     ....                       ....
;     | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
; hi: | decryption round N   | =              | encryption round N   |
;
; This layout is faster when the assembler key scheduling provided here
; is used.
;
; The DLL interface must use the _stdcall convention in which the number
; of bytes of parameter space is added after an @ to the sutine's name.
; We must also remove our parameters from the stack before return (see
; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.

;%define DLL_EXPORT

; End of user defines

%ifdef AES_VAR
%ifndef AES_128
%define AES_128
%endif
%ifndef AES_192
%define AES_192
%endif
%ifndef AES_256
%define AES_256
%endif
%endif

%ifdef AES_VAR
%define KS_LENGTH       60
%elifdef AES_256
%define KS_LENGTH       60
%elifdef AES_192
%define KS_LENGTH       52
%else
%define KS_LENGTH       44
%endif

%define     r0  rax
%define     r1  rdx
%define     r2  rcx
%define     r3  rbx
%define     r4  rsi
%define     r5  rdi
%define     r6  rbp
%define     r7  rsp

%define     raxd    eax
%define     rdxd    edx
%define     rcxd    ecx
%define     rbxd    ebx
%define     rsid    esi
%define     rdid    edi
%define     rbpd    ebp
%define     rspd    esp

%define     raxb    al
%define     rdxb    dl
%define     rcxb    cl
%define     rbxb    bl
%define     rsib    sil
%define     rdib    dil
%define     rbpb    bpl
%define     rspb    spl

%define     r0h ah
%define     r1h dh
%define     r2h ch
%define     r3h bh

%define     r0d eax
%define     r1d edx
%define     r2d ecx
%define     r3d ebx

; finite field multiplies by {02}, {04} and {08}

%define f2(x)   ((x<<1)^(((x>>7)&1)*0x11b))
%define f4(x)   ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
%define f8(x)   ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))

; finite field multiplies required in table generation

%define f3(x)   (f2(x) ^ x)
%define f9(x)   (f8(x) ^ x)
%define fb(x)   (f8(x) ^ f2(x) ^ x)
%define fd(x)   (f8(x) ^ f4(x) ^ x)
%define fe(x)   (f8(x) ^ f4(x) ^ f2(x))

; macro for expanding S-box data

%macro enc_vals 1
    db  %1(0x63),%1(0x7c),%1(0x77),%1(0x7b),%1(0xf2),%1(0x6b),%1(0x6f),%1(0xc5)
    db  %1(0x30),%1(0x01),%1(0x67),%1(0x2b),%1(0xfe),%1(0xd7),%1(0xab),%1(0x76)
    db  %1(0xca),%1(0x82),%1(0xc9),%1(0x7d),%1(0xfa),%1(0x59),%1(0x47),%1(0xf0)
    db  %1(0xad),%1(0xd4),%1(0xa2),%1(0xaf),%1(0x9c),%1(0xa4),%1(0x72),%1(0xc0)
    db  %1(0xb7),%1(0xfd),%1(0x93),%1(0x26),%1(0x36),%1(0x3f),%1(0xf7),%1(0xcc)
    db  %1(0x34),%1(0xa5),%1(0xe5),%1(0xf1),%1(0x71),%1(0xd8),%1(0x31),%1(0x15)
    db  %1(0x04),%1(0xc7),%1(0x23),%1(0xc3),%1(0x18),%1(0x96),%1(0x05),%1(0x9a)
    db  %1(0x07),%1(0x12),%1(0x80),%1(0xe2),%1(0xeb),%1(0x27),%1(0xb2),%1(0x75)
    db  %1(0x09),%1(0x83),%1(0x2c),%1(0x1a),%1(0x1b),%1(0x6e),%1(0x5a),%1(0xa0)
    db  %1(0x52),%1(0x3b),%1(0xd6),%1(0xb3),%1(0x29),%1(0xe3),%1(0x2f),%1(0x84)
    db  %1(0x53),%1(0xd1),%1(0x00),%1(0xed),%1(0x20),%1(0xfc),%1(0xb1),%1(0x5b)
    db  %1(0x6a),%1(0xcb),%1(0xbe),%1(0x39),%1(0x4a),%1(0x4c),%1(0x58),%1(0xcf)
    db  %1(0xd0),%1(0xef),%1(0xaa),%1(0xfb),%1(0x43),%1(0x4d),%1(0x33),%1(0x85)
    db  %1(0x45),%1(0xf9),%1(0x02),%1(0x7f),%1(0x50),%1(0x3c),%1(0x9f),%1(0xa8)
    db  %1(0x51),%1(0xa3),%1(0x40),%1(0x8f),%1(0x92),%1(0x9d),%1(0x38),%1(0xf5)
    db  %1(0xbc),%1(0xb6),%1(0xda),%1(0x21),%1(0x10),%1(0xff),%1(0xf3),%1(0xd2)
    db  %1(0xcd),%1(0x0c),%1(0x13),%1(0xec),%1(0x5f),%1(0x97),%1(0x44),%1(0x17)
    db  %1(0xc4),%1(0xa7),%1(0x7e),%1(0x3d),%1(0x64),%1(0x5d),%1(0x19),%1(0x73)
    db  %1(0x60),%1(0x81),%1(0x4f),%1(0xdc),%1(0x22),%1(0x2a),%1(0x90),%1(0x88)
    db  %1(0x46),%1(0xee),%1(0xb8),%1(0x14),%1(0xde),%1(0x5e),%1(0x0b),%1(0xdb)
    db  %1(0xe0),%1(0x32),%1(0x3a),%1(0x0a),%1(0x49),%1(0x06),%1(0x24),%1(0x5c)
    db  %1(0xc2),%1(0xd3),%1(0xac),%1(0x62),%1(0x91),%1(0x95),%1(0xe4),%1(0x79)
    db  %1(0xe7),%1(0xc8),%1(0x37),%1(0x6d),%1(0x8d),%1(0xd5),%1(0x4e),%1(0xa9)
    db  %1(0x6c),%1(0x56),%1(0xf4),%1(0xea),%1(0x65),%1(0x7a),%1(0xae),%1(0x08)
    db  %1(0xba),%1(0x78),%1(0x25),%1(0x2e),%1(0x1c),%1(0xa6),%1(0xb4),%1(0xc6)
    db  %1(0xe8),%1(0xdd),%1(0x74),%1(0x1f),%1(0x4b),%1(0xbd),%1(0x8b),%1(0x8a)
    db  %1(0x70),%1(0x3e),%1(0xb5),%1(0x66),%1(0x48),%1(0x03),%1(0xf6),%1(0x0e)
    db  %1(0x61),%1(0x35),%1(0x57),%1(0xb9),%1(0x86),%1(0xc1),%1(0x1d),%1(0x9e)
    db  %1(0xe1),%1(0xf8),%1(0x98),%1(0x11),%1(0x69),%1(0xd9),%1(0x8e),%1(0x94)
    db  %1(0x9b),%1(0x1e),%1(0x87),%1(0xe9),%1(0xce),%1(0x55),%1(0x28),%1(0xdf)
    db  %1(0x8c),%1(0xa1),%1(0x89),%1(0x0d),%1(0xbf),%1(0xe6),%1(0x42),%1(0x68)
    db  %1(0x41),%1(0x99),%1(0x2d),%1(0x0f),%1(0xb0),%1(0x54),%1(0xbb),%1(0x16)
%endmacro

%macro dec_vals 1
    db  %1(0x52),%1(0x09),%1(0x6a),%1(0xd5),%1(0x30),%1(0x36),%1(0xa5),%1(0x38)
    db  %1(0xbf),%1(0x40),%1(0xa3),%1(0x9e),%1(0x81),%1(0xf3),%1(0xd7),%1(0xfb)
    db  %1(0x7c),%1(0xe3),%1(0x39),%1(0x82),%1(0x9b),%1(0x2f),%1(0xff),%1(0x87)
    db  %1(0x34),%1(0x8e),%1(0x43),%1(0x44),%1(0xc4),%1(0xde),%1(0xe9),%1(0xcb)
    db  %1(0x54),%1(0x7b),%1(0x94),%1(0x32),%1(0xa6),%1(0xc2),%1(0x23),%1(0x3d)
    db  %1(0xee),%1(0x4c),%1(0x95),%1(0x0b),%1(0x42),%1(0xfa),%1(0xc3),%1(0x4e)
    db  %1(0x08),%1(0x2e),%1(0xa1),%1(0x66),%1(0x28),%1(0xd9),%1(0x24),%1(0xb2)
    db  %1(0x76),%1(0x5b),%1(0xa2),%1(0x49),%1(0x6d),%1(0x8b),%1(0xd1),%1(0x25)
    db  %1(0x72),%1(0xf8),%1(0xf6),%1(0x64),%1(0x86),%1(0x68),%1(0x98),%1(0x16)
    db  %1(0xd4),%1(0xa4),%1(0x5c),%1(0xcc),%1(0x5d),%1(0x65),%1(0xb6),%1(0x92)
    db  %1(0x6c),%1(0x70),%1(0x48),%1(0x50),%1(0xfd),%1(0xed),%1(0xb9),%1(0xda)
    db  %1(0x5e),%1(0x15),%1(0x46),%1(0x57),%1(0xa7),%1(0x8d),%1(0x9d),%1(0x84)
    db  %1(0x90),%1(0xd8),%1(0xab),%1(0x00),%1(0x8c),%1(0xbc),%1(0xd3),%1(0x0a)
    db  %1(0xf7),%1(0xe4),%1(0x58),%1(0x05),%1(0xb8),%1(0xb3),%1(0x45),%1(0x06)
    db  %1(0xd0),%1(0x2c),%1(0x1e),%1(0x8f),%1(0xca),%1(0x3f),%1(0x0f),%1(0x02)
    db  %1(0xc1),%1(0xaf),%1(0xbd),%1(0x03),%1(0x01),%1(0x13),%1(0x8a),%1(0x6b)
    db  %1(0x3a),%1(0x91),%1(0x11),%1(0x41),%1(0x4f),%1(0x67),%1(0xdc),%1(0xea)
    db  %1(0x97),%1(0xf2),%1(0xcf),%1(0xce),%1(0xf0),%1(0xb4),%1(0xe6),%1(0x73)
    db  %1(0x96),%1(0xac),%1(0x74),%1(0x22),%1(0xe7),%1(0xad),%1(0x35),%1(0x85)
    db  %1(0xe2),%1(0xf9),%1(0x37),%1(0xe8),%1(0x1c),%1(0x75),%1(0xdf),%1(0x6e)
    db  %1(0x47),%1(0xf1),%1(0x1a),%1(0x71),%1(0x1d),%1(0x29),%1(0xc5),%1(0x89)
    db  %1(0x6f),%1(0xb7),%1(0x62),%1(0x0e),%1(0xaa),%1(0x18),%1(0xbe),%1(0x1b)
    db  %1(0xfc),%1(0x56),%1(0x3e),%1(0x4b),%1(0xc6),%1(0xd2),%1(0x79),%1(0x20)
    db  %1(0x9a),%1(0xdb),%1(0xc0),%1(0xfe),%1(0x78),%1(0xcd),%1(0x5a),%1(0xf4)
    db  %1(0x1f),%1(0xdd),%1(0xa8),%1(0x33),%1(0x88),%1(0x07),%1(0xc7),%1(0x31)
    db  %1(0xb1),%1(0x12),%1(0x10),%1(0x59),%1(0x27),%1(0x80),%1(0xec),%1(0x5f)
    db  %1(0x60),%1(0x51),%1(0x7f),%1(0xa9),%1(0x19),%1(0xb5),%1(0x4a),%1(0x0d)
    db  %1(0x2d),%1(0xe5),%1(0x7a),%1(0x9f),%1(0x93),%1(0xc9),%1(0x9c),%1(0xef)
    db  %1(0xa0),%1(0xe0),%1(0x3b),%1(0x4d),%1(0xae),%1(0x2a),%1(0xf5),%1(0xb0)
    db  %1(0xc8),%1(0xeb),%1(0xbb),%1(0x3c),%1(0x83),%1(0x53),%1(0x99),%1(0x61)
    db  %1(0x17),%1(0x2b),%1(0x04),%1(0x7e),%1(0xba),%1(0x77),%1(0xd6),%1(0x26)
    db  %1(0xe1),%1(0x69),%1(0x14),%1(0x63),%1(0x55),%1(0x21),%1(0x0c),%1(0x7d)
%endmacro

%define u8(x)   f2(x), x, x, f3(x), f2(x), x, x, f3(x)
%define v8(x)   fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x
%define w8(x)   x, 0, 0, 0, x, 0, 0, 0

%define tptr    rbp     ; table pointer
%define kptr    r8      ; key schedule pointer
%define fofs    128     ; adjust offset in key schedule to keep |disp| < 128
%define fk_ref(x,y) [kptr-16*x+fofs+4*y]
%ifdef  AES_REV_DKS
%define rofs    128
%define ik_ref(x,y) [kptr-16*x+rofs+4*y]
%else
%define rofs    -128
%define ik_ref(x,y) [kptr+16*x+rofs+4*y]
%endif

%define tab_0(x)   [tptr+8*x]
%define tab_1(x)   [tptr+8*x+3]
%define tab_2(x)   [tptr+8*x+2]
%define tab_3(x)   [tptr+8*x+1]
%define tab_f(x)   byte [tptr+8*x+1]
%define tab_i(x)   byte [tptr+8*x+7]
%define t_ref(x,r) tab_ %+ x(r)

%macro ff_rnd 5                 ; normal forward round
    mov     %1d, fk_ref(%5,0)
    mov     %2d, fk_ref(%5,1)
    mov     %3d, fk_ref(%5,2)
    mov     %4d, fk_ref(%5,3)

    movzx   esi, al
    movzx   edi, ah
    shr     eax, 16
    xor     %1d, t_ref(0,rsi)
    xor     %4d, t_ref(1,rdi)
    movzx   esi, al
    movzx   edi, ah
    xor     %3d, t_ref(2,rsi)
    xor     %2d, t_ref(3,rdi)

    movzx   esi, bl
    movzx   edi, bh
    shr     ebx, 16
    xor     %2d, t_ref(0,rsi)
    xor     %1d, t_ref(1,rdi)
    movzx   esi, bl
    movzx   edi, bh
    xor     %4d, t_ref(2,rsi)
    xor     %3d, t_ref(3,rdi)

    movzx   esi, cl
    movzx   edi, ch
    shr     ecx, 16
    xor     %3d, t_ref(0,rsi)
    xor     %2d, t_ref(1,rdi)
    movzx   esi, cl
    movzx   edi, ch
    xor     %1d, t_ref(2,rsi)
    xor     %4d, t_ref(3,rdi)

    movzx   esi, dl
    movzx   edi, dh
    shr     edx, 16
    xor     %4d, t_ref(0,rsi)
    xor     %3d, t_ref(1,rdi)
    movzx   esi, dl
    movzx   edi, dh
    xor     %2d, t_ref(2,rsi)
    xor     %1d, t_ref(3,rdi)

    mov     eax,%1d
    mov     ebx,%2d
    mov     ecx,%3d
    mov     edx,%4d
%endmacro

%ifdef LAST_ROUND_TABLES

%macro fl_rnd 5                 ; last forward round
    add     tptr, 2048
    mov     %1d, fk_ref(%5,0)
    mov     %2d, fk_ref(%5,1)
    mov     %3d, fk_ref(%5,2)
    mov     %4d, fk_ref(%5,3)

    movzx   esi, al
    movzx   edi, ah
    shr     eax, 16
    xor     %1d, t_ref(0,rsi)
    xor     %4d, t_ref(1,rdi)
    movzx   esi, al
    movzx   edi, ah
    xor     %3d, t_ref(2,rsi)
    xor     %2d, t_ref(3,rdi)

    movzx   esi, bl
    movzx   edi, bh
    shr     ebx, 16
    xor     %2d, t_ref(0,rsi)
    xor     %1d, t_ref(1,rdi)
    movzx   esi, bl
    movzx   edi, bh
    xor     %4d, t_ref(2,rsi)
    xor     %3d, t_ref(3,rdi)

    movzx   esi, cl
    movzx   edi, ch
    shr     ecx, 16
    xor     %3d, t_ref(0,rsi)
    xor     %2d, t_ref(1,rdi)
    movzx   esi, cl
    movzx   edi, ch
    xor     %1d, t_ref(2,rsi)
    xor     %4d, t_ref(3,rdi)

    movzx   esi, dl
    movzx   edi, dh
    shr     edx, 16
    xor     %4d, t_ref(0,rsi)
    xor     %3d, t_ref(1,rdi)
    movzx   esi, dl
    movzx   edi, dh
    xor     %2d, t_ref(2,rsi)
    xor     %1d, t_ref(3,rdi)
%endmacro

%else

%macro fl_rnd 5                 ; last forward round
    mov     %1d, fk_ref(%5,0)
    mov     %2d, fk_ref(%5,1)
    mov     %3d, fk_ref(%5,2)
    mov     %4d, fk_ref(%5,3)

    movzx   esi, al
    movzx   edi, ah
    shr     eax, 16
    movzx   esi, t_ref(f,rsi)
    movzx   edi, t_ref(f,rdi)
    xor     %1d, esi
    rol     edi, 8
    xor     %4d, edi
    movzx   esi, al
    movzx   edi, ah
    movzx   esi, t_ref(f,rsi)
    movzx   edi, t_ref(f,rdi)
    rol     esi, 16
    rol     edi, 24
    xor     %3d, esi
    xor     %2d, edi

    movzx   esi, bl
    movzx   edi, bh
    shr     ebx, 16
    movzx   esi, t_ref(f,rsi)
    movzx   edi, t_ref(f,rdi)
    xor     %2d, esi
    rol     edi, 8
    xor     %1d, edi
    movzx   esi, bl
    movzx   edi, bh
    movzx   esi, t_ref(f,rsi)
    movzx   edi, t_ref(f,rdi)
    rol     esi, 16
    rol     edi, 24
    xor     %4d, esi
    xor     %3d, edi

    movzx   esi, cl
    movzx   edi, ch
    movzx   esi, t_ref(f,rsi)
    movzx   edi, t_ref(f,rdi)
    shr     ecx, 16
    xor     %3d, esi
    rol     edi, 8
    xor     %2d, edi
    movzx   esi, cl
    movzx   edi, ch
    movzx   esi, t_ref(f,rsi)
    movzx   edi, t_ref(f,rdi)
    rol     esi, 16
    rol     edi, 24
    xor     %1d, esi
    xor     %4d, edi

    movzx   esi, dl
    movzx   edi, dh
    movzx   esi, t_ref(f,rsi)
    movzx   edi, t_ref(f,rdi)
    shr     edx, 16
    xor     %4d, esi
    rol     edi, 8
    xor     %3d, edi
    movzx   esi, dl
    movzx   edi, dh
    movzx   esi, t_ref(f,rsi)
    movzx   edi, t_ref(f,rdi)
    rol     esi, 16
    rol     edi, 24
    xor     %2d, esi
    xor     %1d, edi
%endmacro

%endif

%macro ii_rnd 5                 ; normal inverse round
    mov     %1d, ik_ref(%5,0)
    mov     %2d, ik_ref(%5,1)
    mov     %3d, ik_ref(%5,2)
    mov     %4d, ik_ref(%5,3)

    movzx   esi, al
    movzx   edi, ah
    shr     eax, 16
    xor     %1d, t_ref(0,rsi)
    xor     %2d, t_ref(1,rdi)
    movzx   esi, al
    movzx   edi, ah
    xor     %3d, t_ref(2,rsi)
    xor     %4d, t_ref(3,rdi)

    movzx   esi, bl
    movzx   edi, bh
    shr     ebx, 16
    xor     %2d, t_ref(0,rsi)
    xor     %3d, t_ref(1,rdi)
    movzx   esi, bl
    movzx   edi, bh
    xor     %4d, t_ref(2,rsi)
    xor     %1d, t_ref(3,rdi)

    movzx   esi, cl
    movzx   edi, ch
    shr     ecx, 16
    xor     %3d, t_ref(0,rsi)
    xor     %4d, t_ref(1,rdi)
    movzx   esi, cl
    movzx   edi, ch
    xor     %1d, t_ref(2,rsi)
    xor     %2d, t_ref(3,rdi)

    movzx   esi, dl
    movzx   edi, dh
    shr     edx, 16
    xor     %4d, t_ref(0,rsi)
    xor     %1d, t_ref(1,rdi)
    movzx   esi, dl
    movzx   edi, dh
    xor     %2d, t_ref(2,rsi)
    xor     %3d, t_ref(3,rdi)

    mov     eax,%1d
    mov     ebx,%2d
    mov     ecx,%3d
    mov     edx,%4d
%endmacro

%ifdef LAST_ROUND_TABLES

%macro il_rnd 5                 ; last inverse round
    add     tptr, 2048
    mov     %1d, ik_ref(%5,0)
    mov     %2d, ik_ref(%5,1)
    mov     %3d, ik_ref(%5,2)
    mov     %4d, ik_ref(%5,3)

    movzx   esi, al
    movzx   edi, ah
    shr     eax, 16
    xor     %1d, t_ref(0,rsi)
    xor     %2d, t_ref(1,rdi)
    movzx   esi, al
    movzx   edi, ah
    xor     %3d, t_ref(2,rsi)
    xor     %4d, t_ref(3,rdi)

    movzx   esi, bl
    movzx   edi, bh
    shr     ebx, 16
    xor     %2d, t_ref(0,rsi)
    xor     %3d, t_ref(1,rdi)
    movzx   esi, bl
    movzx   edi, bh
    xor     %4d, t_ref(2,rsi)
    xor     %1d, t_ref(3,rdi)

    movzx   esi, cl
    movzx   edi, ch
    shr     ecx, 16
    xor     %3d, t_ref(0,rsi)
    xor     %4d, t_ref(1,rdi)
    movzx   esi, cl
    movzx   edi, ch
    xor     %1d, t_ref(2,rsi)
    xor     %2d, t_ref(3,rdi)

    movzx   esi, dl
    movzx   edi, dh
    shr     edx, 16
    xor     %4d, t_ref(0,rsi)
    xor     %1d, t_ref(1,rdi)
    movzx   esi, dl
    movzx   edi, dh
    xor     %2d, t_ref(2,rsi)
    xor     %3d, t_ref(3,rdi)
%endmacro

%else

%macro il_rnd 5                 ; last inverse round
    mov     %1d, ik_ref(%5,0)
    mov     %2d, ik_ref(%5,1)
    mov     %3d, ik_ref(%5,2)
    mov     %4d, ik_ref(%5,3)

    movzx   esi, al
    movzx   edi, ah
    movzx   esi, t_ref(i,rsi)
    movzx   edi, t_ref(i,rdi)
    shr     eax, 16
    xor     %1d, esi
    rol     edi, 8
    xor     %2d, edi
    movzx   esi, al
    movzx   edi, ah
    movzx   esi, t_ref(i,rsi)
    movzx   edi, t_ref(i,rdi)
    rol     esi, 16
    rol     edi, 24
    xor     %3d, esi
    xor     %4d, edi

    movzx   esi, bl
    movzx   edi, bh
    movzx   esi, t_ref(i,rsi)
    movzx   edi, t_ref(i,rdi)
    shr     ebx, 16
    xor     %2d, esi
    rol     edi, 8
    xor     %3d, edi
    movzx   esi, bl
    movzx   edi, bh
    movzx   esi, t_ref(i,rsi)
    movzx   edi, t_ref(i,rdi)
    rol     esi, 16
    rol     edi, 24
    xor     %4d, esi
    xor     %1d, edi

    movzx   esi, cl
    movzx   edi, ch
    movzx   esi, t_ref(i,rsi)
    movzx   edi, t_ref(i,rdi)
    shr     ecx, 16
    xor     %3d, esi
    rol     edi, 8
    xor     %4d, edi
    movzx   esi, cl
    movzx   edi, ch
    movzx   esi, t_ref(i,rsi)
    movzx   edi, t_ref(i,rdi)
    rol     esi, 16
    rol     edi, 24
    xor     %1d, esi
    xor     %2d, edi

    movzx   esi, dl
    movzx   edi, dh
    movzx   esi, t_ref(i,rsi)
    movzx   edi, t_ref(i,rdi)
    shr     edx, 16
    xor     %4d, esi
    rol     edi, 8
    xor     %1d, edi
    movzx   esi, dl
    movzx   edi, dh
    movzx   esi, t_ref(i,rsi)
    movzx   edi, t_ref(i,rdi)
    rol     esi, 16
    rol     edi, 24
    xor     %2d, esi
    xor     %3d, edi
%endmacro

%endif

%ifdef ENCRYPTION

    global  aes_encrypt
%ifdef DLL_EXPORT
    export  aes_encrypt
%endif

    section .data align=64
    align   64
enc_tab:
    enc_vals u8
%ifdef LAST_ROUND_TABLES
    enc_vals w8
%endif

    section .text align=16
    align   16

%ifdef _SEH_
proc_frame aes_encrypt
	alloc_stack	7*8			; 7 to align stack to 16 bytes
	save_reg	rsi,4*8
	save_reg	rdi,5*8
	save_reg	rbx,1*8
	save_reg	rbp,2*8
	save_reg	r12,3*8
end_prologue
    mov     rdi, rcx        ; input pointer
    mov     [rsp+0*8], rdx  ; output pointer
%else
	aes_encrypt:
	%ifdef __GNUC__
		sub     rsp, 4*8        ; gnu/linux binary interface
		mov     [rsp+0*8], rsi  ; output pointer
		mov     r8, rdx         ; context
	%else
		sub     rsp, 6*8        ; windows binary interface
		mov     [rsp+4*8], rsi
		mov     [rsp+5*8], rdi
		mov     rdi, rcx        ; input pointer
		mov     [rsp+0*8], rdx  ; output pointer
	%endif
		mov     [rsp+1*8], rbx  ; input pointer in rdi
		mov     [rsp+2*8], rbp  ; output pointer in [rsp]
		mov     [rsp+3*8], r12  ; context in r8
%endif

    movzx   esi, byte [kptr+4*KS_LENGTH]
    lea     tptr, [rel enc_tab]
    sub     kptr, fofs

    mov     eax, [rdi+0*4]
    mov     ebx, [rdi+1*4]
    mov     ecx, [rdi+2*4]
    mov     edx, [rdi+3*4]

    xor     eax, [kptr+fofs]
    xor     ebx, [kptr+fofs+4]
    xor     ecx, [kptr+fofs+8]
    xor     edx, [kptr+fofs+12]

    lea     kptr,[kptr+rsi]
    cmp     esi, 10*16
    je      .3
    cmp     esi, 12*16
    je      .2
    cmp     esi, 14*16
    je      .1
    mov     rax, -1
    jmp     .4

.1: ff_rnd  r9, r10, r11, r12, 13
    ff_rnd  r9, r10, r11, r12, 12
.2: ff_rnd  r9, r10, r11, r12, 11
    ff_rnd  r9, r10, r11, r12, 10
.3: ff_rnd  r9, r10, r11, r12, 9
    ff_rnd  r9, r10, r11, r12, 8
    ff_rnd  r9, r10, r11, r12, 7
    ff_rnd  r9, r10, r11, r12, 6
    ff_rnd  r9, r10, r11, r12, 5
    ff_rnd  r9, r10, r11, r12, 4
    ff_rnd  r9, r10, r11, r12, 3
    ff_rnd  r9, r10, r11, r12, 2
    ff_rnd  r9, r10, r11, r12, 1
    fl_rnd  r9, r10, r11, r12, 0

    mov     rbx, [rsp]
    mov     [rbx], r9d
    mov     [rbx+4], r10d
    mov     [rbx+8], r11d
    mov     [rbx+12], r12d
    xor     rax, rax
.4:
    mov     rbx, [rsp+1*8]
    mov     rbp, [rsp+2*8]
    mov     r12, [rsp+3*8]
%ifdef __GNUC__
    add     rsp, 4*8
    ret
%else
		mov     rsi, [rsp+4*8]
		mov     rdi, [rsp+5*8]
	%ifdef _SEH_
		add     rsp, 7*8
		ret
	endproc_frame
	%else
		add     rsp, 6*8
		ret
	%endif
%endif

%endif

%ifdef DECRYPTION

    global  aes_decrypt
%ifdef DLL_EXPORT
    export  aes_decrypt
%endif

    section .data
    align   64
dec_tab:
    dec_vals v8
%ifdef LAST_ROUND_TABLES
    dec_vals w8
%endif

    section .text
    align   16

%ifdef _SEH_
proc_frame aes_decrypt
	alloc_stack	7*8			; 7 to align stack to 16 bytes
	save_reg	rsi,4*8
	save_reg	rdi,5*8
	save_reg	rbx,1*8
	save_reg	rbp,2*8
	save_reg	r12,3*8
end_prologue
    mov     rdi, rcx        ; input pointer
    mov     [rsp+0*8], rdx  ; output pointer
%else
	aes_decrypt:
	%ifdef __GNUC__
		sub     rsp, 4*8        ; gnu/linux binary interface
		mov     [rsp+0*8], rsi  ; output pointer
		mov     r8, rdx         ; context
	%else
		sub     rsp, 6*8        ; windows binary interface
		mov     [rsp+4*8], rsi
		mov     [rsp+5*8], rdi
		mov     rdi, rcx        ; input pointer
		mov     [rsp+0*8], rdx  ; output pointer
	%endif
		mov     [rsp+1*8], rbx  ; input pointer in rdi
		mov     [rsp+2*8], rbp  ; output pointer in [rsp]
		mov     [rsp+3*8], r12  ; context in r8
%endif

    movzx   esi,byte[kptr+4*KS_LENGTH]
    lea     tptr, [rel dec_tab]
    sub     kptr, rofs

    mov     eax, [rdi+0*4]
    mov     ebx, [rdi+1*4]
    mov     ecx, [rdi+2*4]
    mov     edx, [rdi+3*4]

%ifdef      AES_REV_DKS
    mov     rdi, kptr
    lea     kptr,[kptr+rsi]
%else
    lea     rdi,[kptr+rsi]
%endif

    xor     eax, [rdi+rofs]
    xor     ebx, [rdi+rofs+4]
    xor     ecx, [rdi+rofs+8]
    xor     edx, [rdi+rofs+12]

    cmp     esi, 10*16
    je      .3
    cmp     esi, 12*16
    je      .2
    cmp     esi, 14*16
    je      .1
    mov     rax, -1
    jmp     .4

.1: ii_rnd  r9, r10, r11, r12, 13
    ii_rnd  r9, r10, r11, r12, 12
.2: ii_rnd  r9, r10, r11, r12, 11
    ii_rnd  r9, r10, r11, r12, 10
.3: ii_rnd  r9, r10, r11, r12, 9
    ii_rnd  r9, r10, r11, r12, 8
    ii_rnd  r9, r10, r11, r12, 7
    ii_rnd  r9, r10, r11, r12, 6
    ii_rnd  r9, r10, r11, r12, 5
    ii_rnd  r9, r10, r11, r12, 4
    ii_rnd  r9, r10, r11, r12, 3
    ii_rnd  r9, r10, r11, r12, 2
    ii_rnd  r9, r10, r11, r12, 1
    il_rnd  r9, r10, r11, r12, 0

    mov     rbx, [rsp]
    mov     [rbx], r9d
    mov     [rbx+4], r10d
    mov     [rbx+8], r11d
    mov     [rbx+12], r12d
    xor     rax, rax
.4: mov     rbx, [rsp+1*8]
    mov     rbp, [rsp+2*8]
    mov     r12, [rsp+3*8]
%ifdef __GNUC__
    add     rsp, 4*8
    ret
%else
		mov     rsi, [rsp+4*8]
		mov     rdi, [rsp+5*8]
	%ifdef _SEH_
		add     rsp, 7*8
		ret
	endproc_frame
	%else
		add     rsp, 6*8
		ret
	%endif
%endif

%endif