VeraCrypt
aboutsummaryrefslogtreecommitdiff
path: root/src/Crypto/AesSmall_x86.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/Crypto/AesSmall_x86.asm')
-rw-r--r--src/Crypto/AesSmall_x86.asm2888
1 files changed, 1444 insertions, 1444 deletions
diff --git a/src/Crypto/AesSmall_x86.asm b/src/Crypto/AesSmall_x86.asm
index fe7dc47b..872aa013 100644
--- a/src/Crypto/AesSmall_x86.asm
+++ b/src/Crypto/AesSmall_x86.asm
@@ -1,1444 +1,1444 @@
-
-; ---------------------------------------------------------------------------
-; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
-;
-; LICENSE TERMS
-;
-; The free distribution and use of this software is allowed (with or without
-; changes) provided that:
-;
-; 1. source code distributions include the above copyright notice, this
-; list of conditions and the following disclaimer;
-;
-; 2. binary distributions include the above copyright notice, this list
-; of conditions and the following disclaimer in their documentation;
-;
-; 3. the name of the copyright holder is not used to endorse products
-; built using this software without specific written permission.
-;
-; DISCLAIMER
-;
-; This software is provided 'as is' with no explicit or implied warranties
-; in respect of its properties, including, but not limited to, correctness
-; and/or fitness for purpose.
-; ---------------------------------------------------------------------------
-; Issue 20/12/2007
-;
-; This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h
-; and the same define to be set here as well. If AES_V2C is set this file
-; requires the C files aeskey.c and aestab.c for support.
-
-; An AES implementation for x86 processors using the YASM (or NASM) assembler.
-; This is a full assembler implementation covering encryption, decryption and
-; key scheduling. It uses 2k bytes of tables but its encryption and decryption
-; performance is very close to that obtained using large tables. Key schedule
-; expansion is slower for both encryption and decryption but this is likely to
-; be offset by the much smaller load that this version places on the processor
-; cache. I acknowledge the contribution made by Daniel Bernstein to aspects of
-; the design of the AES round function used here.
-;
-; This code provides the standard AES block size (128 bits, 16 bytes) and the
-; three standard AES key sizes (128, 192 and 256 bits). It has the same call
-; interface as my C implementation. The ebx, esi, edi and ebp registers are
-; preserved across calls but eax, ecx and edx and the artihmetic status flags
-; are not. Although this is a full assembler implementation, it can be used
-; in conjunction with my C code which provides faster key scheduling using
-; large tables. In this case aeskey.c should be compiled with ASM_X86_V2C
-; defined. It is also important that the defines below match those used in the
-; C code. This code uses the VC++ register saving conentions; if it is used
-; with another compiler, conventions for using and saving registers may need
-; to be checked (and calling conventions). The YASM command line for the VC++
-; custom build step is:
-;
-; yasm -Xvc -f win32 -D <Z> -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
-;
-; For the cryptlib build this is (pcg):
-;
-; yasm -Xvc -f win32 -D ASM_X86_V2C -o aescrypt2.obj aes_x86_v2.asm
-;
-; where <Z> is ASM_X86_V2 or ASM_X86_V2C. The calling intefaces are:
-;
-; AES_RETURN aes_encrypt(const unsigned char in_blk[],
-; unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
-;
-; AES_RETURN aes_decrypt(const unsigned char in_blk[],
-; unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
-;
-; AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
-; const aes_encrypt_ctx cx[1]);
-;
-; AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
-; const aes_decrypt_ctx cx[1]);
-;
-; AES_RETURN aes_encrypt_key(const unsigned char key[],
-; unsigned int len, const aes_decrypt_ctx cx[1]);
-;
-; AES_RETURN aes_decrypt_key(const unsigned char key[],
-; unsigned int len, const aes_decrypt_ctx cx[1]);
-;
-; where <NNN> is 128, 102 or 256. In the last two calls the length can be in
-; either bits or bytes.
-
-; The DLL interface must use the _stdcall convention in which the number
-; of bytes of parameter space is added after an @ to the sutine's name.
-; We must also remove our parameters from the stack before return (see
-; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
-
-;
-; Adapted for TrueCrypt:
-; - All tables generated at run-time
-; - Adapted for 16-bit environment
-;
-
-CPU 386
-USE16
-SEGMENT _TEXT PUBLIC CLASS=CODE USE16
-SEGMENT _DATA PUBLIC CLASS=DATA USE16
-
-GROUP DGROUP _TEXT _DATA
-
-extern _aes_dec_tab ; Aestab.c
-extern _aes_enc_tab
-
-; %define DLL_EXPORT
-
-; The size of the code can be reduced by using functions for the encryption
-; and decryption rounds in place of macro expansion
-
-%define REDUCE_CODE_SIZE
-
-; Comment in/out the following lines to obtain the desired subroutines. These
-; selections MUST match those in the C header file aes.h
-
-; %define AES_128 ; define if AES with 128 bit keys is needed
-; %define AES_192 ; define if AES with 192 bit keys is needed
-%define AES_256 ; define if AES with 256 bit keys is needed
-; %define AES_VAR ; define if a variable key size is needed
-%define ENCRYPTION ; define if encryption is needed
-%define DECRYPTION ; define if decryption is needed
-; %define AES_REV_DKS ; define if key decryption schedule is reversed
-
-%ifndef ASM_X86_V2C
-%define ENCRYPTION_KEY_SCHEDULE ; define if encryption key expansion is needed
-%define DECRYPTION_KEY_SCHEDULE ; define if decryption key expansion is needed
-%endif
-
-; The encryption key schedule has the following in memory layout where N is the
-; number of rounds (10, 12 or 14):
-;
-; lo: | input key (round 0) | ; each round is four 32-bit words
-; | encryption round 1 |
-; | encryption round 2 |
-; ....
-; | encryption round N-1 |
-; hi: | encryption round N |
-;
-; The decryption key schedule is normally set up so that it has the same
-; layout as above by actually reversing the order of the encryption key
-; schedule in memory (this happens when AES_REV_DKS is set):
-;
-; lo: | decryption round 0 | = | encryption round N |
-; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
-; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
-; .... ....
-; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
-; hi: | decryption round N | = | input key (round 0) |
-;
-; with rounds except the first and last modified using inv_mix_column()
-; But if AES_REV_DKS is NOT set the order of keys is left as it is for
-; encryption so that it has to be accessed in reverse when used for
-; decryption (although the inverse mix column modifications are done)
-;
-; lo: | decryption round 0 | = | input key (round 0) |
-; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
-; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
-; .... ....
-; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
-; hi: | decryption round N | = | encryption round N |
-;
-; This layout is faster when the assembler key scheduling provided here
-; is used.
-;
-; End of user defines
-
-%ifdef AES_VAR
-%ifndef AES_128
-%define AES_128
-%endif
-%ifndef AES_192
-%define AES_192
-%endif
-%ifndef AES_256
-%define AES_256
-%endif
-%endif
-
-%ifdef AES_VAR
-%define KS_LENGTH 60
-%elifdef AES_256
-%define KS_LENGTH 60
-%elifdef AES_192
-%define KS_LENGTH 52
-%else
-%define KS_LENGTH 44
-%endif
-
-; These macros implement stack based local variables
-
-%macro save 2
- mov [esp+4*%1],%2
-%endmacro
-
-%macro restore 2
- mov %1,[esp+4*%2]
-%endmacro
-
-%ifdef REDUCE_CODE_SIZE
- %macro mf_call 1
- call %1
- %endmacro
-%else
- %macro mf_call 1
- %1
- %endmacro
-%endif
-
-; the DLL has to implement the _stdcall calling interface on return
-; In this case we have to take our parameters (3 4-byte pointers)
-; off the stack
-
-%define parms 12
-
-%macro do_name 1-2 parms
-%ifndef DLL_EXPORT
- global %1
-%1:
-%else
- global %1@%2
- export %1@%2
-%1@%2:
-%endif
-%endmacro
-
-%macro do_call 1-2 parms
-%ifndef DLL_EXPORT
- call %1
- add esp,%2
-%else
- call %1@%2
-%endif
-%endmacro
-
-%macro do_exit 0-1 parms
-%ifdef DLL_EXPORT
- ret %1
-%else
- ret
-%endif
-%endmacro
-
-; finite field multiplies by {02}, {04} and {08}
-
-%define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
-%define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
-%define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
-
-; finite field multiplies required in table generation
-
-%define f3(x) (f2(x) ^ x)
-%define f9(x) (f8(x) ^ x)
-%define fb(x) (f8(x) ^ f2(x) ^ x)
-%define fd(x) (f8(x) ^ f4(x) ^ x)
-%define fe(x) (f8(x) ^ f4(x) ^ f2(x))
-
-%define etab_0(x) [_aes_enc_tab+4+8*x]
-%define etab_1(x) [_aes_enc_tab+3+8*x]
-%define etab_2(x) [_aes_enc_tab+2+8*x]
-%define etab_3(x) [_aes_enc_tab+1+8*x]
-%define etab_b(x) byte [_aes_enc_tab+1+8*x] ; used with movzx for 0x000000xx
-%define etab_w(x) word [_aes_enc_tab+8*x] ; used with movzx for 0x0000xx00
-
-%define btab_0(x) [_aes_enc_tab+6+8*x]
-%define btab_1(x) [_aes_enc_tab+5+8*x]
-%define btab_2(x) [_aes_enc_tab+4+8*x]
-%define btab_3(x) [_aes_enc_tab+3+8*x]
-
-; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
-; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
-;
-; Input:
-;
-; EAX column[0]
-; EBX column[1]
-; ECX column[2]
-; EDX column[3]
-; ESI column key[round][2]
-; EDI column key[round][3]
-; EBP scratch
-;
-; Output:
-;
-; EBP column[0] unkeyed
-; EBX column[1] unkeyed
-; ESI column[2] keyed
-; EDI column[3] keyed
-; EAX scratch
-; ECX scratch
-; EDX scratch
-
-%macro rnd_fun 2
-
- rol ebx,16
- %1 esi, cl, 0, ebp
- %1 esi, dh, 1, ebp
- %1 esi, bh, 3, ebp
- %1 edi, dl, 0, ebp
- %1 edi, ah, 1, ebp
- %1 edi, bl, 2, ebp
- %2 ebp, al, 0, ebp
- shr ebx,16
- and eax,0xffff0000
- or eax,ebx
- shr edx,16
- %1 ebp, ah, 1, ebx
- %1 ebp, dh, 3, ebx
- %2 ebx, dl, 2, ebx
- %1 ebx, ch, 1, edx
- %1 ebx, al, 0, edx
- shr eax,16
- shr ecx,16
- %1 ebp, cl, 2, edx
- %1 edi, ch, 3, edx
- %1 esi, al, 2, edx
- %1 ebx, ah, 3, edx
-
-%endmacro
-
-; Basic MOV and XOR Operations for normal rounds
-
-%macro nr_xor 4
- movzx %4,%2
- xor %1,etab_%3(%4)
-%endmacro
-
-%macro nr_mov 4
- movzx %4,%2
- mov %1,etab_%3(%4)
-%endmacro
-
-; Basic MOV and XOR Operations for last round
-
-%if 1
-
- %macro lr_xor 4
- movzx %4,%2
- movzx %4,etab_b(%4)
- %if %3 != 0
- shl %4,8*%3
- %endif
- xor %1,%4
- %endmacro
-
- %macro lr_mov 4
- movzx %4,%2
- movzx %1,etab_b(%4)
- %if %3 != 0
- shl %1,8*%3
- %endif
- %endmacro
-
-%else ; less effective but worth leaving as an option
-
- %macro lr_xor 4
- movzx %4,%2
- mov %4,btab_%3(%4)
- and %4,0x000000ff << 8 * %3
- xor %1,%4
- %endmacro
-
- %macro lr_mov 4
- movzx %4,%2
- mov %1,btab_%3(%4)
- and %1,0x000000ff << 8 * %3
- %endmacro
-
-%endif
-
-; Apply S-Box to the 4 bytes in a 32-bit word and rotate byte positions
-
-%ifdef REDUCE_CODE_SIZE
-
-l3s_col:
- movzx ecx,al ; in eax
- movzx ecx, etab_b(ecx) ; out eax
- xor edx,ecx ; scratch ecx,edx
- movzx ecx,ah
- movzx ecx, etab_b(ecx)
- shl ecx,8
- xor edx,ecx
- shr eax,16
- movzx ecx,al
- movzx ecx, etab_b(ecx)
- shl ecx,16
- xor edx,ecx
- movzx ecx,ah
- movzx ecx, etab_b(ecx)
- shl ecx,24
- xor edx,ecx
- mov eax,edx
- ret
-
-%else
-
-%macro l3s_col 0
-
- movzx ecx,al ; in eax
- movzx ecx, etab_b(ecx) ; out eax
- xor edx,ecx ; scratch ecx,edx
- movzx ecx,ah
- movzx ecx, etab_b(ecx)
- shl ecx,8
- xor edx,ecx
- shr eax,16
- movzx ecx,al
- movzx ecx, etab_b(ecx)
- shl ecx,16
- xor edx,ecx
- movzx ecx,ah
- movzx ecx, etab_b(ecx)
- shl ecx,24
- xor edx,ecx
- mov eax,edx
-
-%endmacro
-
-%endif
-
-; offsets to parameters
-
-in_blk equ 2 ; input byte array address parameter
-out_blk equ 4 ; output byte array address parameter
-ctx equ 6 ; AES context structure
-stk_spc equ 20 ; stack space
-
-%ifdef ENCRYPTION
-
-; %define ENCRYPTION_TABLE
-
-%ifdef REDUCE_CODE_SIZE
-
-enc_round:
- sub sp, 2
- add ebp,16
- save 1,ebp
- mov esi,[ebp+8]
- mov edi,[ebp+12]
-
- rnd_fun nr_xor, nr_mov
-
- mov eax,ebp
- mov ecx,esi
- mov edx,edi
- restore ebp,1
- xor eax,[ebp]
- xor ebx,[ebp+4]
- add sp, 2
- ret
-
-%else
-
-%macro enc_round 0
-
- add ebp,16
- save 0,ebp
- mov esi,[ebp+8]
- mov edi,[ebp+12]
-
- rnd_fun nr_xor, nr_mov
-
- mov eax,ebp
- mov ecx,esi
- mov edx,edi
- restore ebp,0
- xor eax,[ebp]
- xor ebx,[ebp+4]
-
-%endmacro
-
-%endif
-
-%macro enc_last_round 0
-
- add ebp,16
- save 0,ebp
- mov esi,[ebp+8]
- mov edi,[ebp+12]
-
- rnd_fun lr_xor, lr_mov
-
- mov eax,ebp
- restore ebp,0
- xor eax,[ebp]
- xor ebx,[ebp+4]
-
-%endmacro
-
- section _TEXT
-
-; AES Encryption Subroutine
-
- do_name _aes_encrypt,12
-
- mov ax, sp
- movzx esp, ax
-
- sub esp,stk_spc
- mov [esp+16],ebp
- mov [esp+12],ebx
- mov [esp+ 8],esi
- mov [esp+ 4],edi
-
- movzx esi,word [esp+in_blk+stk_spc] ; input pointer
- mov eax,[esi ]
- mov ebx,[esi+ 4]
- mov ecx,[esi+ 8]
- mov edx,[esi+12]
-
- movzx ebp,word [esp+ctx+stk_spc] ; key pointer
- movzx edi,byte [ebp+4*KS_LENGTH]
- xor eax,[ebp ]
- xor ebx,[ebp+ 4]
- xor ecx,[ebp+ 8]
- xor edx,[ebp+12]
-
-; determine the number of rounds
-
-%ifndef AES_256
- cmp edi,10*16
- je .3
- cmp edi,12*16
- je .2
- cmp edi,14*16
- je .1
- mov eax,-1
- jmp .5
-%endif
-
-.1: mf_call enc_round
- mf_call enc_round
-.2: mf_call enc_round
- mf_call enc_round
-.3: mf_call enc_round
- mf_call enc_round
- mf_call enc_round
- mf_call enc_round
- mf_call enc_round
- mf_call enc_round
- mf_call enc_round
- mf_call enc_round
- mf_call enc_round
- enc_last_round
-
- movzx edx,word [esp+out_blk+stk_spc]
- mov [edx],eax
- mov [edx+4],ebx
- mov [edx+8],esi
- mov [edx+12],edi
- xor eax,eax
-
-.5: mov ebp,[esp+16]
- mov ebx,[esp+12]
- mov esi,[esp+ 8]
- mov edi,[esp+ 4]
- add esp,stk_spc
- do_exit 12
-
-%endif
-
-%macro f_key 2
-
- push ecx
- push edx
- mov edx,esi
- ror eax,8
- mf_call l3s_col
- mov esi,eax
- pop edx
- pop ecx
- xor esi,rc_val
-
- mov [ebp+%1*%2],esi
- xor edi,esi
- mov [ebp+%1*%2+4],edi
- xor ecx,edi
- mov [ebp+%1*%2+8],ecx
- xor edx,ecx
- mov [ebp+%1*%2+12],edx
- mov eax,edx
-
-%if %2 == 24
-
-%if %1 < 7
- xor eax,[ebp+%1*%2+16-%2]
- mov [ebp+%1*%2+16],eax
- xor eax,[ebp+%1*%2+20-%2]
- mov [ebp+%1*%2+20],eax
-%endif
-
-%elif %2 == 32
-
-%if %1 < 6
- push ecx
- push edx
- mov edx,[ebp+%1*%2+16-%2]
- mf_call l3s_col
- pop edx
- pop ecx
- mov [ebp+%1*%2+16],eax
- xor eax,[ebp+%1*%2+20-%2]
- mov [ebp+%1*%2+20],eax
- xor eax,[ebp+%1*%2+24-%2]
- mov [ebp+%1*%2+24],eax
- xor eax,[ebp+%1*%2+28-%2]
- mov [ebp+%1*%2+28],eax
-%endif
-
-%endif
-
-%assign rc_val f2(rc_val)
-
-%endmacro
-
-%ifdef ENCRYPTION_KEY_SCHEDULE
-
-%ifdef AES_128
-
-%ifndef ENCRYPTION_TABLE
-; %define ENCRYPTION_TABLE
-%endif
-
-%assign rc_val 1
-
- do_name _aes_encrypt_key128,8
-
- push ebp
- push ebx
- push esi
- push edi
-
- mov ebp,[esp+24]
- mov [ebp+4*KS_LENGTH],dword 10*16
- mov ebx,[esp+20]
-
- mov esi,[ebx]
- mov [ebp],esi
- mov edi,[ebx+4]
- mov [ebp+4],edi
- mov ecx,[ebx+8]
- mov [ebp+8],ecx
- mov edx,[ebx+12]
- mov [ebp+12],edx
- add ebp,16
- mov eax,edx
-
- f_key 0,16 ; 11 * 4 = 44 unsigned longs
- f_key 1,16 ; 4 + 4 * 10 generated = 44
- f_key 2,16
- f_key 3,16
- f_key 4,16
- f_key 5,16
- f_key 6,16
- f_key 7,16
- f_key 8,16
- f_key 9,16
-
- pop edi
- pop esi
- pop ebx
- pop ebp
- xor eax,eax
- do_exit 8
-
-%endif
-
-%ifdef AES_192
-
-%ifndef ENCRYPTION_TABLE
-; %define ENCRYPTION_TABLE
-%endif
-
-%assign rc_val 1
-
- do_name _aes_encrypt_key192,8
-
- push ebp
- push ebx
- push esi
- push edi
-
- mov ebp,[esp+24]
- mov [ebp+4*KS_LENGTH],dword 12 * 16
- mov ebx,[esp+20]
-
- mov esi,[ebx]
- mov [ebp],esi
- mov edi,[ebx+4]
- mov [ebp+4],edi
- mov ecx,[ebx+8]
- mov [ebp+8],ecx
- mov edx,[ebx+12]
- mov [ebp+12],edx
- mov eax,[ebx+16]
- mov [ebp+16],eax
- mov eax,[ebx+20]
- mov [ebp+20],eax
- add ebp,24
-
- f_key 0,24 ; 13 * 4 = 52 unsigned longs
- f_key 1,24 ; 6 + 6 * 8 generated = 54
- f_key 2,24
- f_key 3,24
- f_key 4,24
- f_key 5,24
- f_key 6,24
- f_key 7,24
-
- pop edi
- pop esi
- pop ebx
- pop ebp
- xor eax,eax
- do_exit 8
-
-%endif
-
-%ifdef AES_256
-
-%ifndef ENCRYPTION_TABLE
-; %define ENCRYPTION_TABLE
-%endif
-
-%assign rc_val 1
-
- do_name _aes_encrypt_key256,8
-
- mov ax, sp
- movzx esp, ax
-
- push ebp
- push ebx
- push esi
- push edi
-
- movzx ebp, word [esp+20] ; ks
- mov [ebp+4*KS_LENGTH],dword 14 * 16
- movzx ebx, word [esp+18] ; key
-
- mov esi,[ebx]
- mov [ebp],esi
- mov edi,[ebx+4]
- mov [ebp+4],edi
- mov ecx,[ebx+8]
- mov [ebp+8],ecx
- mov edx,[ebx+12]
- mov [ebp+12],edx
- mov eax,[ebx+16]
- mov [ebp+16],eax
- mov eax,[ebx+20]
- mov [ebp+20],eax
- mov eax,[ebx+24]
- mov [ebp+24],eax
- mov eax,[ebx+28]
- mov [ebp+28],eax
- add ebp,32
-
- f_key 0,32 ; 15 * 4 = 60 unsigned longs
- f_key 1,32 ; 8 + 8 * 7 generated = 64
- f_key 2,32
- f_key 3,32
- f_key 4,32
- f_key 5,32
- f_key 6,32
-
- pop edi
- pop esi
- pop ebx
- pop ebp
- xor eax,eax
- do_exit 8
-
-%endif
-
-%ifdef AES_VAR
-
-%ifndef ENCRYPTION_TABLE
-; %define ENCRYPTION_TABLE
-%endif
-
- do_name _aes_encrypt_key,12
-
- mov ecx,[esp+4]
- mov eax,[esp+8]
- mov edx,[esp+12]
- push edx
- push ecx
-
- cmp eax,16
- je .1
- cmp eax,128
- je .1
-
- cmp eax,24
- je .2
- cmp eax,192
- je .2
-
- cmp eax,32
- je .3
- cmp eax,256
- je .3
- mov eax,-1
- add esp,8
- do_exit 12
-
-.1: do_call _aes_encrypt_key128,8
- do_exit 12
-.2: do_call _aes_encrypt_key192,8
- do_exit 12
-.3: do_call _aes_encrypt_key256,8
- do_exit 12
-
-%endif
-
-%endif
-
-%ifdef ENCRYPTION_TABLE
-
-; S-box data - 256 entries
-
- section _DATA
-
-%define u8(x) 0, x, x, f3(x), f2(x), x, x, f3(x)
-
-_aes_enc_tab:
- db u8(0x63),u8(0x7c),u8(0x77),u8(0x7b),u8(0xf2),u8(0x6b),u8(0x6f),u8(0xc5)
- db u8(0x30),u8(0x01),u8(0x67),u8(0x2b),u8(0xfe),u8(0xd7),u8(0xab),u8(0x76)
- db u8(0xca),u8(0x82),u8(0xc9),u8(0x7d),u8(0xfa),u8(0x59),u8(0x47),u8(0xf0)
- db u8(0xad),u8(0xd4),u8(0xa2),u8(0xaf),u8(0x9c),u8(0xa4),u8(0x72),u8(0xc0)
- db u8(0xb7),u8(0xfd),u8(0x93),u8(0x26),u8(0x36),u8(0x3f),u8(0xf7),u8(0xcc)
- db u8(0x34),u8(0xa5),u8(0xe5),u8(0xf1),u8(0x71),u8(0xd8),u8(0x31),u8(0x15)
- db u8(0x04),u8(0xc7),u8(0x23),u8(0xc3),u8(0x18),u8(0x96),u8(0x05),u8(0x9a)
- db u8(0x07),u8(0x12),u8(0x80),u8(0xe2),u8(0xeb),u8(0x27),u8(0xb2),u8(0x75)
- db u8(0x09),u8(0x83),u8(0x2c),u8(0x1a),u8(0x1b),u8(0x6e),u8(0x5a),u8(0xa0)
- db u8(0x52),u8(0x3b),u8(0xd6),u8(0xb3),u8(0x29),u8(0xe3),u8(0x2f),u8(0x84)
- db u8(0x53),u8(0xd1),u8(0x00),u8(0xed),u8(0x20),u8(0xfc),u8(0xb1),u8(0x5b)
- db u8(0x6a),u8(0xcb),u8(0xbe),u8(0x39),u8(0x4a),u8(0x4c),u8(0x58),u8(0xcf)
- db u8(0xd0),u8(0xef),u8(0xaa),u8(0xfb),u8(0x43),u8(0x4d),u8(0x33),u8(0x85)
- db u8(0x45),u8(0xf9),u8(0x02),u8(0x7f),u8(0x50),u8(0x3c),u8(0x9f),u8(0xa8)
- db u8(0x51),u8(0xa3),u8(0x40),u8(0x8f),u8(0x92),u8(0x9d),u8(0x38),u8(0xf5)
- db u8(0xbc),u8(0xb6),u8(0xda),u8(0x21),u8(0x10),u8(0xff),u8(0xf3),u8(0xd2)
- db u8(0xcd),u8(0x0c),u8(0x13),u8(0xec),u8(0x5f),u8(0x97),u8(0x44),u8(0x17)
- db u8(0xc4),u8(0xa7),u8(0x7e),u8(0x3d),u8(0x64),u8(0x5d),u8(0x19),u8(0x73)
- db u8(0x60),u8(0x81),u8(0x4f),u8(0xdc),u8(0x22),u8(0x2a),u8(0x90),u8(0x88)
- db u8(0x46),u8(0xee),u8(0xb8),u8(0x14),u8(0xde),u8(0x5e),u8(0x0b),u8(0xdb)
- db u8(0xe0),u8(0x32),u8(0x3a),u8(0x0a),u8(0x49),u8(0x06),u8(0x24),u8(0x5c)
- db u8(0xc2),u8(0xd3),u8(0xac),u8(0x62),u8(0x91),u8(0x95),u8(0xe4),u8(0x79)
- db u8(0xe7),u8(0xc8),u8(0x37),u8(0x6d),u8(0x8d),u8(0xd5),u8(0x4e),u8(0xa9)
- db u8(0x6c),u8(0x56),u8(0xf4),u8(0xea),u8(0x65),u8(0x7a),u8(0xae),u8(0x08)
- db u8(0xba),u8(0x78),u8(0x25),u8(0x2e),u8(0x1c),u8(0xa6),u8(0xb4),u8(0xc6)
- db u8(0xe8),u8(0xdd),u8(0x74),u8(0x1f),u8(0x4b),u8(0xbd),u8(0x8b),u8(0x8a)
- db u8(0x70),u8(0x3e),u8(0xb5),u8(0x66),u8(0x48),u8(0x03),u8(0xf6),u8(0x0e)
- db u8(0x61),u8(0x35),u8(0x57),u8(0xb9),u8(0x86),u8(0xc1),u8(0x1d),u8(0x9e)
- db u8(0xe1),u8(0xf8),u8(0x98),u8(0x11),u8(0x69),u8(0xd9),u8(0x8e),u8(0x94)
- db u8(0x9b),u8(0x1e),u8(0x87),u8(0xe9),u8(0xce),u8(0x55),u8(0x28),u8(0xdf)
- db u8(0x8c),u8(0xa1),u8(0x89),u8(0x0d),u8(0xbf),u8(0xe6),u8(0x42),u8(0x68)
- db u8(0x41),u8(0x99),u8(0x2d),u8(0x0f),u8(0xb0),u8(0x54),u8(0xbb),u8(0x16)
-
-%endif
-
-%ifdef DECRYPTION
-
-; %define DECRYPTION_TABLE
-
-%define dtab_0(x) [_aes_dec_tab+ 8*x]
-%define dtab_1(x) [_aes_dec_tab+3+8*x]
-%define dtab_2(x) [_aes_dec_tab+2+8*x]
-%define dtab_3(x) [_aes_dec_tab+1+8*x]
-%define dtab_x(x) byte [_aes_dec_tab+7+8*x]
-
-%macro irn_fun 2
-
- rol eax,16
- %1 esi, cl, 0, ebp
- %1 esi, bh, 1, ebp
- %1 esi, al, 2, ebp
- %1 edi, dl, 0, ebp
- %1 edi, ch, 1, ebp
- %1 edi, ah, 3, ebp
- %2 ebp, bl, 0, ebp
- shr eax,16
- and ebx,0xffff0000
- or ebx,eax
- shr ecx,16
- %1 ebp, bh, 1, eax
- %1 ebp, ch, 3, eax
- %2 eax, cl, 2, ecx
- %1 eax, bl, 0, ecx
- %1 eax, dh, 1, ecx
- shr ebx,16
- shr edx,16
- %1 esi, dh, 3, ecx
- %1 ebp, dl, 2, ecx
- %1 eax, bh, 3, ecx
- %1 edi, bl, 2, ecx
-
-%endmacro
-
-; Basic MOV and XOR Operations for normal rounds
-
-%macro ni_xor 4
- movzx %4,%2
- xor %1,dtab_%3(%4)
-%endmacro
-
-%macro ni_mov 4
- movzx %4,%2
- mov %1,dtab_%3(%4)
-%endmacro
-
-; Basic MOV and XOR Operations for last round
-
-%macro li_xor 4
- movzx %4,%2
- movzx %4,dtab_x(%4)
-%if %3 != 0
- shl %4,8*%3
-%endif
- xor %1,%4
-%endmacro
-
-%macro li_mov 4
- movzx %4,%2
- movzx %1,dtab_x(%4)
-%if %3 != 0
- shl %1,8*%3
-%endif
-%endmacro
-
-%ifdef REDUCE_CODE_SIZE
-
-dec_round:
- sub sp, 2
-%ifdef AES_REV_DKS
- add ebp,16
-%else
- sub ebp,16
-%endif
- save 1,ebp
- mov esi,[ebp+8]
- mov edi,[ebp+12]
-
- irn_fun ni_xor, ni_mov
-
- mov ebx,ebp
- mov ecx,esi
- mov edx,edi
- restore ebp,1
- xor eax,[ebp]
- xor ebx,[ebp+4]
- add sp, 2
- ret
-
-%else
-
-%macro dec_round 0
-
-%ifdef AES_REV_DKS
- add ebp,16
-%else
- sub ebp,16
-%endif
- save 0,ebp
- mov esi,[ebp+8]
- mov edi,[ebp+12]
-
- irn_fun ni_xor, ni_mov
-
- mov ebx,ebp
- mov ecx,esi
- mov edx,edi
- restore ebp,0
- xor eax,[ebp]
- xor ebx,[ebp+4]
-
-%endmacro
-
-%endif
-
-%macro dec_last_round 0
-
-%ifdef AES_REV_DKS
- add ebp,16
-%else
- sub ebp,16
-%endif
- save 0,ebp
- mov esi,[ebp+8]
- mov edi,[ebp+12]
-
- irn_fun li_xor, li_mov
-
- mov ebx,ebp
- restore ebp,0
- xor eax,[ebp]
- xor ebx,[ebp+4]
-
-%endmacro
-
- section _TEXT
-
-; AES Decryption Subroutine
-
- do_name _aes_decrypt,12
-
- mov ax, sp
- movzx esp, ax
-
- sub esp,stk_spc
- mov [esp+16],ebp
- mov [esp+12],ebx
- mov [esp+ 8],esi
- mov [esp+ 4],edi
-
-; input four columns and xor in first round key
-
- movzx esi,word [esp+in_blk+stk_spc] ; input pointer
- mov eax,[esi ]
- mov ebx,[esi+ 4]
- mov ecx,[esi+ 8]
- mov edx,[esi+12]
- lea esi,[esi+16]
-
- movzx ebp, word [esp+ctx+stk_spc] ; key pointer
- movzx edi,byte[ebp+4*KS_LENGTH]
-%ifndef AES_REV_DKS ; if decryption key schedule is not reversed
- lea ebp,[ebp+edi] ; we have to access it from the top down
-%endif
- xor eax,[ebp ] ; key schedule
- xor ebx,[ebp+ 4]
- xor ecx,[ebp+ 8]
- xor edx,[ebp+12]
-
-; determine the number of rounds
-
-%ifndef AES_256
- cmp edi,10*16
- je .3
- cmp edi,12*16
- je .2
- cmp edi,14*16
- je .1
- mov eax,-1
- jmp .5
-%endif
-
-.1: mf_call dec_round
- mf_call dec_round
-.2: mf_call dec_round
- mf_call dec_round
-.3: mf_call dec_round
- mf_call dec_round
- mf_call dec_round
- mf_call dec_round
- mf_call dec_round
- mf_call dec_round
- mf_call dec_round
- mf_call dec_round
- mf_call dec_round
- dec_last_round
-
-; move final values to the output array.
-
- movzx ebp,word [esp+out_blk+stk_spc]
- mov [ebp],eax
- mov [ebp+4],ebx
- mov [ebp+8],esi
- mov [ebp+12],edi
- xor eax,eax
-
-.5: mov ebp,[esp+16]
- mov ebx,[esp+12]
- mov esi,[esp+ 8]
- mov edi,[esp+ 4]
- add esp,stk_spc
- do_exit 12
-
-%endif
-
-%ifdef REDUCE_CODE_SIZE
-
-inv_mix_col:
- movzx ecx,dl ; input eax, edx
- movzx ecx,etab_b(ecx) ; output eax
- mov eax,dtab_0(ecx) ; used ecx
- movzx ecx,dh
- shr edx,16
- movzx ecx,etab_b(ecx)
- xor eax,dtab_1(ecx)
- movzx ecx,dl
- movzx ecx,etab_b(ecx)
- xor eax,dtab_2(ecx)
- movzx ecx,dh
- movzx ecx,etab_b(ecx)
- xor eax,dtab_3(ecx)
- ret
-
-%else
-
-%macro inv_mix_col 0
-
- movzx ecx,dl ; input eax, edx
- movzx ecx,etab_b(ecx) ; output eax
- mov eax,dtab_0(ecx) ; used ecx
- movzx ecx,dh
- shr edx,16
- movzx ecx,etab_b(ecx)
- xor eax,dtab_1(ecx)
- movzx ecx,dl
- movzx ecx,etab_b(ecx)
- xor eax,dtab_2(ecx)
- movzx ecx,dh
- movzx ecx,etab_b(ecx)
- xor eax,dtab_3(ecx)
-
-%endmacro
-
-%endif
-
-%ifdef DECRYPTION_KEY_SCHEDULE
-
-%ifdef AES_128
-
-%ifndef DECRYPTION_TABLE
-; %define DECRYPTION_TABLE
-%endif
-
- do_name _aes_decrypt_key128,8
-
- push ebp
- push ebx
- push esi
- push edi
- mov eax,[esp+24] ; context
- mov edx,[esp+20] ; key
- push eax
- push edx
- do_call _aes_encrypt_key128,8 ; generate expanded encryption key
- mov eax,10*16
- mov esi,[esp+24] ; pointer to first round key
- lea edi,[esi+eax] ; pointer to last round key
- add esi,32
- ; the inverse mix column transformation
- mov edx,[esi-16] ; needs to be applied to all round keys
- mf_call inv_mix_col ; except first and last. Hence start by
- mov [esi-16],eax ; transforming the four sub-keys in the
- mov edx,[esi-12] ; second round key
- mf_call inv_mix_col
- mov [esi-12],eax ; transformations for subsequent rounds
- mov edx,[esi-8] ; can then be made more efficient by
- mf_call inv_mix_col ; noting that for three of the four sub-keys
- mov [esi-8],eax ; in the encryption round key ek[r]:
- mov edx,[esi-4] ;
- mf_call inv_mix_col ; ek[r][n] = ek[r][n-1] ^ ek[r-1][n]
- mov [esi-4],eax ;
- ; where n is 1..3. Hence the corresponding
-.0: mov edx,[esi] ; subkeys in the decryption round key dk[r]
- mf_call inv_mix_col ; also obey since inv_mix_col is linear in
- mov [esi],eax ; GF(256):
- xor eax,[esi-12] ;
- mov [esi+4],eax ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n]
- xor eax,[esi-8] ;
- mov [esi+8],eax ; So we only need one inverse mix column
- xor eax,[esi-4] ; operation (n = 0) for each four word cycle
- mov [esi+12],eax ; in the expanded key.
- add esi,16
- cmp edi,esi
- jg .0
- jmp dec_end
-
-%endif
-
-%ifdef AES_192
-
-%ifndef DECRYPTION_TABLE
-; %define DECRYPTION_TABLE
-%endif
-
- do_name _aes_decrypt_key192,8
-
- push ebp
- push ebx
- push esi
- push edi
- mov eax,[esp+24] ; context
- mov edx,[esp+20] ; key
- push eax
- push edx
- do_call _aes_encrypt_key192,8 ; generate expanded encryption key
- mov eax,12*16
- mov esi,[esp+24] ; first round key
- lea edi,[esi+eax] ; last round key
- add esi,48 ; the first 6 words are the key, of
- ; which the top 2 words are part of
- mov edx,[esi-32] ; the second round key and hence
- mf_call inv_mix_col ; need to be modified. After this we
- mov [esi-32],eax ; need to do a further six values prior
- mov edx,[esi-28] ; to using a more efficient technique
- mf_call inv_mix_col ; based on:
- mov [esi-28],eax ;
- ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n]
- mov edx,[esi-24] ;
- mf_call inv_mix_col ; for n = 1 .. 5 where the key expansion
- mov [esi-24],eax ; cycle is now 6 words long
- mov edx,[esi-20]
- mf_call inv_mix_col
- mov [esi-20],eax
- mov edx,[esi-16]
- mf_call inv_mix_col
- mov [esi-16],eax
- mov edx,[esi-12]
- mf_call inv_mix_col
- mov [esi-12],eax
- mov edx,[esi-8]
- mf_call inv_mix_col
- mov [esi-8],eax
- mov edx,[esi-4]
- mf_call inv_mix_col
- mov [esi-4],eax
-
-.0: mov edx,[esi] ; the expanded key is 13 * 4 = 44 32-bit words
- mf_call inv_mix_col ; of which 11 * 4 = 44 have to be modified
- mov [esi],eax ; using inv_mix_col. We have already done 8
- xor eax,[esi-20] ; of these so 36 are left - hence we need
- mov [esi+4],eax ; exactly 6 loops of six here
- xor eax,[esi-16]
- mov [esi+8],eax
- xor eax,[esi-12]
- mov [esi+12],eax
- xor eax,[esi-8]
- mov [esi+16],eax
- xor eax,[esi-4]
- mov [esi+20],eax
- add esi,24
- cmp edi,esi
- jg .0
- jmp dec_end
-
-%endif
-
-%ifdef AES_256
-
-%ifndef DECRYPTION_TABLE
-; %define DECRYPTION_TABLE
-%endif
-
- do_name _aes_decrypt_key256,8
-
- mov ax, sp
- movzx esp, ax
- push ebp
- push ebx
- push esi
- push edi
-
- movzx eax, word [esp+20] ; ks
- movzx edx, word [esp+18] ; key
- push ax
- push dx
- do_call _aes_encrypt_key256,4 ; generate expanded encryption key
- mov eax,14*16
- movzx esi, word [esp+20] ; ks
- lea edi,[esi+eax]
- add esi,64
-
- mov edx,[esi-48] ; the primary key is 8 words, of which
- mf_call inv_mix_col ; the top four require modification
- mov [esi-48],eax
- mov edx,[esi-44]
- mf_call inv_mix_col
- mov [esi-44],eax
- mov edx,[esi-40]
- mf_call inv_mix_col
- mov [esi-40],eax
- mov edx,[esi-36]
- mf_call inv_mix_col
- mov [esi-36],eax
-
- mov edx,[esi-32] ; the encryption key expansion cycle is
- mf_call inv_mix_col ; now eight words long so we need to
- mov [esi-32],eax ; start by doing one complete block
- mov edx,[esi-28]
- mf_call inv_mix_col
- mov [esi-28],eax
- mov edx,[esi-24]
- mf_call inv_mix_col
- mov [esi-24],eax
- mov edx,[esi-20]
- mf_call inv_mix_col
- mov [esi-20],eax
- mov edx,[esi-16]
- mf_call inv_mix_col
- mov [esi-16],eax
- mov edx,[esi-12]
- mf_call inv_mix_col
- mov [esi-12],eax
- mov edx,[esi-8]
- mf_call inv_mix_col
- mov [esi-8],eax
- mov edx,[esi-4]
- mf_call inv_mix_col
- mov [esi-4],eax
-
-.0: mov edx,[esi] ; we can now speed up the remaining
- mf_call inv_mix_col ; rounds by using the technique
- mov [esi],eax ; outlined earlier. But note that
- xor eax,[esi-28] ; there is one extra inverse mix
- mov [esi+4],eax ; column operation as the 256 bit
- xor eax,[esi-24] ; key has an extra non-linear step
- mov [esi+8],eax ; for the midway element.
- xor eax,[esi-20]
- mov [esi+12],eax ; the expanded key is 15 * 4 = 60
- mov edx,[esi+16] ; 32-bit words of which 52 need to
- mf_call inv_mix_col ; be modified. We have already done
- mov [esi+16],eax ; 12 so 40 are left - which means
- xor eax,[esi-12] ; that we need exactly 5 loops of 8
- mov [esi+20],eax
- xor eax,[esi-8]
- mov [esi+24],eax
- xor eax,[esi-4]
- mov [esi+28],eax
- add esi,32
- cmp edi,esi
- jg .0
-
-%endif
-
-dec_end:
-
-%ifdef AES_REV_DKS
-
- movzx esi,word [esp+20] ; this reverses the order of the
-.1: mov eax,[esi] ; round keys if required
- mov ebx,[esi+4]
- mov ebp,[edi]
- mov edx,[edi+4]
- mov [esi],ebp
- mov [esi+4],edx
- mov [edi],eax
- mov [edi+4],ebx
-
- mov eax,[esi+8]
- mov ebx,[esi+12]
- mov ebp,[edi+8]
- mov edx,[edi+12]
- mov [esi+8],ebp
- mov [esi+12],edx
- mov [edi+8],eax
- mov [edi+12],ebx
-
- add esi,16
- sub edi,16
- cmp edi,esi
- jg .1
-
-%endif
-
- pop edi
- pop esi
- pop ebx
- pop ebp
- xor eax,eax
- do_exit 8
-
-%ifdef AES_VAR
-
- do_name _aes_decrypt_key,12
-
- mov ecx,[esp+4]
- mov eax,[esp+8]
- mov edx,[esp+12]
- push edx
- push ecx
-
- cmp eax,16
- je .1
- cmp eax,128
- je .1
-
- cmp eax,24
- je .2
- cmp eax,192
- je .2
-
- cmp eax,32
- je .3
- cmp eax,256
- je .3
- mov eax,-1
- add esp,8
- do_exit 12
-
-.1: do_call _aes_decrypt_key128,8
- do_exit 12
-.2: do_call _aes_decrypt_key192,8
- do_exit 12
-.3: do_call _aes_decrypt_key256,8
- do_exit 12
-
-%endif
-
-%endif
-
-%ifdef DECRYPTION_TABLE
-
-; Inverse S-box data - 256 entries
-
- section _DATA
-
-%define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x
-
-_aes_dec_tab:
- db v8(0x52),v8(0x09),v8(0x6a),v8(0xd5),v8(0x30),v8(0x36),v8(0xa5),v8(0x38)
- db v8(0xbf),v8(0x40),v8(0xa3),v8(0x9e),v8(0x81),v8(0xf3),v8(0xd7),v8(0xfb)
- db v8(0x7c),v8(0xe3),v8(0x39),v8(0x82),v8(0x9b),v8(0x2f),v8(0xff),v8(0x87)
- db v8(0x34),v8(0x8e),v8(0x43),v8(0x44),v8(0xc4),v8(0xde),v8(0xe9),v8(0xcb)
- db v8(0x54),v8(0x7b),v8(0x94),v8(0x32),v8(0xa6),v8(0xc2),v8(0x23),v8(0x3d)
- db v8(0xee),v8(0x4c),v8(0x95),v8(0x0b),v8(0x42),v8(0xfa),v8(0xc3),v8(0x4e)
- db v8(0x08),v8(0x2e),v8(0xa1),v8(0x66),v8(0x28),v8(0xd9),v8(0x24),v8(0xb2)
- db v8(0x76),v8(0x5b),v8(0xa2),v8(0x49),v8(0x6d),v8(0x8b),v8(0xd1),v8(0x25)
- db v8(0x72),v8(0xf8),v8(0xf6),v8(0x64),v8(0x86),v8(0x68),v8(0x98),v8(0x16)
- db v8(0xd4),v8(0xa4),v8(0x5c),v8(0xcc),v8(0x5d),v8(0x65),v8(0xb6),v8(0x92)
- db v8(0x6c),v8(0x70),v8(0x48),v8(0x50),v8(0xfd),v8(0xed),v8(0xb9),v8(0xda)
- db v8(0x5e),v8(0x15),v8(0x46),v8(0x57),v8(0xa7),v8(0x8d),v8(0x9d),v8(0x84)
- db v8(0x90),v8(0xd8),v8(0xab),v8(0x00),v8(0x8c),v8(0xbc),v8(0xd3),v8(0x0a)
- db v8(0xf7),v8(0xe4),v8(0x58),v8(0x05),v8(0xb8),v8(0xb3),v8(0x45),v8(0x06)
- db v8(0xd0),v8(0x2c),v8(0x1e),v8(0x8f),v8(0xca),v8(0x3f),v8(0x0f),v8(0x02)
- db v8(0xc1),v8(0xaf),v8(0xbd),v8(0x03),v8(0x01),v8(0x13),v8(0x8a),v8(0x6b)
- db v8(0x3a),v8(0x91),v8(0x11),v8(0x41),v8(0x4f),v8(0x67),v8(0xdc),v8(0xea)
- db v8(0x97),v8(0xf2),v8(0xcf),v8(0xce),v8(0xf0),v8(0xb4),v8(0xe6),v8(0x73)
- db v8(0x96),v8(0xac),v8(0x74),v8(0x22),v8(0xe7),v8(0xad),v8(0x35),v8(0x85)
- db v8(0xe2),v8(0xf9),v8(0x37),v8(0xe8),v8(0x1c),v8(0x75),v8(0xdf),v8(0x6e)
- db v8(0x47),v8(0xf1),v8(0x1a),v8(0x71),v8(0x1d),v8(0x29),v8(0xc5),v8(0x89)
- db v8(0x6f),v8(0xb7),v8(0x62),v8(0x0e),v8(0xaa),v8(0x18),v8(0xbe),v8(0x1b)
- db v8(0xfc),v8(0x56),v8(0x3e),v8(0x4b),v8(0xc6),v8(0xd2),v8(0x79),v8(0x20)
- db v8(0x9a),v8(0xdb),v8(0xc0),v8(0xfe),v8(0x78),v8(0xcd),v8(0x5a),v8(0xf4)
- db v8(0x1f),v8(0xdd),v8(0xa8),v8(0x33),v8(0x88),v8(0x07),v8(0xc7),v8(0x31)
- db v8(0xb1),v8(0x12),v8(0x10),v8(0x59),v8(0x27),v8(0x80),v8(0xec),v8(0x5f)
- db v8(0x60),v8(0x51),v8(0x7f),v8(0xa9),v8(0x19),v8(0xb5),v8(0x4a),v8(0x0d)
- db v8(0x2d),v8(0xe5),v8(0x7a),v8(0x9f),v8(0x93),v8(0xc9),v8(0x9c),v8(0xef)
- db v8(0xa0),v8(0xe0),v8(0x3b),v8(0x4d),v8(0xae),v8(0x2a),v8(0xf5),v8(0xb0)
- db v8(0xc8),v8(0xeb),v8(0xbb),v8(0x3c),v8(0x83),v8(0x53),v8(0x99),v8(0x61)
- db v8(0x17),v8(0x2b),v8(0x04),v8(0x7e),v8(0xba),v8(0x77),v8(0xd6),v8(0x26)
- db v8(0xe1),v8(0x69),v8(0x14),v8(0x63),v8(0x55),v8(0x21),v8(0x0c),v8(0x7d)
-
-%endif
+
+; ---------------------------------------------------------------------------
+; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+;
+; LICENSE TERMS
+;
+; The free distribution and use of this software is allowed (with or without
+; changes) provided that:
+;
+; 1. source code distributions include the above copyright notice, this
+; list of conditions and the following disclaimer;
+;
+; 2. binary distributions include the above copyright notice, this list
+; of conditions and the following disclaimer in their documentation;
+;
+; 3. the name of the copyright holder is not used to endorse products
+; built using this software without specific written permission.
+;
+; DISCLAIMER
+;
+; This software is provided 'as is' with no explicit or implied warranties
+; in respect of its properties, including, but not limited to, correctness
+; and/or fitness for purpose.
+; ---------------------------------------------------------------------------
+; Issue 20/12/2007
+;
+; This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h
+; and the same define to be set here as well. If AES_V2C is set this file
+; requires the C files aeskey.c and aestab.c for support.
+
+; An AES implementation for x86 processors using the YASM (or NASM) assembler.
+; This is a full assembler implementation covering encryption, decryption and
+; key scheduling. It uses 2k bytes of tables but its encryption and decryption
+; performance is very close to that obtained using large tables. Key schedule
+; expansion is slower for both encryption and decryption but this is likely to
+; be offset by the much smaller load that this version places on the processor
+; cache. I acknowledge the contribution made by Daniel Bernstein to aspects of
+; the design of the AES round function used here.
+;
+; This code provides the standard AES block size (128 bits, 16 bytes) and the
+; three standard AES key sizes (128, 192 and 256 bits). It has the same call
+; interface as my C implementation. The ebx, esi, edi and ebp registers are
+; preserved across calls but eax, ecx and edx and the artihmetic status flags
+; are not. Although this is a full assembler implementation, it can be used
+; in conjunction with my C code which provides faster key scheduling using
+; large tables. In this case aeskey.c should be compiled with ASM_X86_V2C
+; defined. It is also important that the defines below match those used in the
+; C code. This code uses the VC++ register saving conentions; if it is used
+; with another compiler, conventions for using and saving registers may need
+; to be checked (and calling conventions). The YASM command line for the VC++
+; custom build step is:
+;
+; yasm -Xvc -f win32 -D <Z> -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
+;
+; For the cryptlib build this is (pcg):
+;
+; yasm -Xvc -f win32 -D ASM_X86_V2C -o aescrypt2.obj aes_x86_v2.asm
+;
+; where <Z> is ASM_X86_V2 or ASM_X86_V2C. The calling intefaces are:
+;
+; AES_RETURN aes_encrypt(const unsigned char in_blk[],
+; unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
+;
+; AES_RETURN aes_decrypt(const unsigned char in_blk[],
+; unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
+;
+; AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
+; const aes_encrypt_ctx cx[1]);
+;
+; AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
+; const aes_decrypt_ctx cx[1]);
+;
+; AES_RETURN aes_encrypt_key(const unsigned char key[],
+; unsigned int len, const aes_decrypt_ctx cx[1]);
+;
+; AES_RETURN aes_decrypt_key(const unsigned char key[],
+; unsigned int len, const aes_decrypt_ctx cx[1]);
+;
+; where <NNN> is 128, 102 or 256. In the last two calls the length can be in
+; either bits or bytes.
+
+; The DLL interface must use the _stdcall convention in which the number
+; of bytes of parameter space is added after an @ to the sutine's name.
+; We must also remove our parameters from the stack before return (see
+; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
+
+;
+; Adapted for TrueCrypt:
+; - All tables generated at run-time
+; - Adapted for 16-bit environment
+;
+
+CPU 386
+USE16
+SEGMENT _TEXT PUBLIC CLASS=CODE USE16
+SEGMENT _DATA PUBLIC CLASS=DATA USE16
+
+GROUP DGROUP _TEXT _DATA
+
+extern _aes_dec_tab ; Aestab.c
+extern _aes_enc_tab
+
+; %define DLL_EXPORT
+
+; The size of the code can be reduced by using functions for the encryption
+; and decryption rounds in place of macro expansion
+
+%define REDUCE_CODE_SIZE
+
+; Comment in/out the following lines to obtain the desired subroutines. These
+; selections MUST match those in the C header file aes.h
+
+; %define AES_128 ; define if AES with 128 bit keys is needed
+; %define AES_192 ; define if AES with 192 bit keys is needed
+%define AES_256 ; define if AES with 256 bit keys is needed
+; %define AES_VAR ; define if a variable key size is needed
+%define ENCRYPTION ; define if encryption is needed
+%define DECRYPTION ; define if decryption is needed
+; %define AES_REV_DKS ; define if key decryption schedule is reversed
+
+%ifndef ASM_X86_V2C
+%define ENCRYPTION_KEY_SCHEDULE ; define if encryption key expansion is needed
+%define DECRYPTION_KEY_SCHEDULE ; define if decryption key expansion is needed
+%endif
+
+; The encryption key schedule has the following in memory layout where N is the
+; number of rounds (10, 12 or 14):
+;
+; lo: | input key (round 0) | ; each round is four 32-bit words
+; | encryption round 1 |
+; | encryption round 2 |
+; ....
+; | encryption round N-1 |
+; hi: | encryption round N |
+;
+; The decryption key schedule is normally set up so that it has the same
+; layout as above by actually reversing the order of the encryption key
+; schedule in memory (this happens when AES_REV_DKS is set):
+;
+; lo: | decryption round 0 | = | encryption round N |
+; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
+; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
+; .... ....
+; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
+; hi: | decryption round N | = | input key (round 0) |
+;
+; with rounds except the first and last modified using inv_mix_column()
+; But if AES_REV_DKS is NOT set the order of keys is left as it is for
+; encryption so that it has to be accessed in reverse when used for
+; decryption (although the inverse mix column modifications are done)
+;
+; lo: | decryption round 0 | = | input key (round 0) |
+; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
+; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
+; .... ....
+; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
+; hi: | decryption round N | = | encryption round N |
+;
+; This layout is faster when the assembler key scheduling provided here
+; is used.
+;
+; End of user defines
+
+%ifdef AES_VAR
+%ifndef AES_128
+%define AES_128
+%endif
+%ifndef AES_192
+%define AES_192
+%endif
+%ifndef AES_256
+%define AES_256
+%endif
+%endif
+
+%ifdef AES_VAR
+%define KS_LENGTH 60
+%elifdef AES_256
+%define KS_LENGTH 60
+%elifdef AES_192
+%define KS_LENGTH 52
+%else
+%define KS_LENGTH 44
+%endif
+
+; These macros implement stack based local variables
+
+%macro save 2
+ mov [esp+4*%1],%2
+%endmacro
+
+%macro restore 2
+ mov %1,[esp+4*%2]
+%endmacro
+
+%ifdef REDUCE_CODE_SIZE
+ %macro mf_call 1
+ call %1
+ %endmacro
+%else
+ %macro mf_call 1
+ %1
+ %endmacro
+%endif
+
+; the DLL has to implement the _stdcall calling interface on return
+; In this case we have to take our parameters (3 4-byte pointers)
+; off the stack
+
+%define parms 12
+
+%macro do_name 1-2 parms
+%ifndef DLL_EXPORT
+ global %1
+%1:
+%else
+ global %1@%2
+ export %1@%2
+%1@%2:
+%endif
+%endmacro
+
+%macro do_call 1-2 parms
+%ifndef DLL_EXPORT
+ call %1
+ add esp,%2
+%else
+ call %1@%2
+%endif
+%endmacro
+
+%macro do_exit 0-1 parms
+%ifdef DLL_EXPORT
+ ret %1
+%else
+ ret
+%endif
+%endmacro
+
+; finite field multiplies by {02}, {04} and {08}
+
+%define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
+%define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
+%define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
+
+; finite field multiplies required in table generation
+
+%define f3(x) (f2(x) ^ x)
+%define f9(x) (f8(x) ^ x)
+%define fb(x) (f8(x) ^ f2(x) ^ x)
+%define fd(x) (f8(x) ^ f4(x) ^ x)
+%define fe(x) (f8(x) ^ f4(x) ^ f2(x))
+
+%define etab_0(x) [_aes_enc_tab+4+8*x]
+%define etab_1(x) [_aes_enc_tab+3+8*x]
+%define etab_2(x) [_aes_enc_tab+2+8*x]
+%define etab_3(x) [_aes_enc_tab+1+8*x]
+%define etab_b(x) byte [_aes_enc_tab+1+8*x] ; used with movzx for 0x000000xx
+%define etab_w(x) word [_aes_enc_tab+8*x] ; used with movzx for 0x0000xx00
+
+%define btab_0(x) [_aes_enc_tab+6+8*x]
+%define btab_1(x) [_aes_enc_tab+5+8*x]
+%define btab_2(x) [_aes_enc_tab+4+8*x]
+%define btab_3(x) [_aes_enc_tab+3+8*x]
+
+; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
+; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
+;
+; Input:
+;
+; EAX column[0]
+; EBX column[1]
+; ECX column[2]
+; EDX column[3]
+; ESI column key[round][2]
+; EDI column key[round][3]
+; EBP scratch
+;
+; Output:
+;
+; EBP column[0] unkeyed
+; EBX column[1] unkeyed
+; ESI column[2] keyed
+; EDI column[3] keyed
+; EAX scratch
+; ECX scratch
+; EDX scratch
+
+%macro rnd_fun 2
+
+ rol ebx,16
+ %1 esi, cl, 0, ebp
+ %1 esi, dh, 1, ebp
+ %1 esi, bh, 3, ebp
+ %1 edi, dl, 0, ebp
+ %1 edi, ah, 1, ebp
+ %1 edi, bl, 2, ebp
+ %2 ebp, al, 0, ebp
+ shr ebx,16
+ and eax,0xffff0000
+ or eax,ebx
+ shr edx,16
+ %1 ebp, ah, 1, ebx
+ %1 ebp, dh, 3, ebx
+ %2 ebx, dl, 2, ebx
+ %1 ebx, ch, 1, edx
+ %1 ebx, al, 0, edx
+ shr eax,16
+ shr ecx,16
+ %1 ebp, cl, 2, edx
+ %1 edi, ch, 3, edx
+ %1 esi, al, 2, edx
+ %1 ebx, ah, 3, edx
+
+%endmacro
+
+; Basic MOV and XOR Operations for normal rounds
+
+%macro nr_xor 4
+ movzx %4,%2
+ xor %1,etab_%3(%4)
+%endmacro
+
+%macro nr_mov 4
+ movzx %4,%2
+ mov %1,etab_%3(%4)
+%endmacro
+
+; Basic MOV and XOR Operations for last round
+
+%if 1
+
+ %macro lr_xor 4
+ movzx %4,%2
+ movzx %4,etab_b(%4)
+ %if %3 != 0
+ shl %4,8*%3
+ %endif
+ xor %1,%4
+ %endmacro
+
+ %macro lr_mov 4
+ movzx %4,%2
+ movzx %1,etab_b(%4)
+ %if %3 != 0
+ shl %1,8*%3
+ %endif
+ %endmacro
+
+%else ; less effective but worth leaving as an option
+
+ %macro lr_xor 4
+ movzx %4,%2
+ mov %4,btab_%3(%4)
+ and %4,0x000000ff << 8 * %3
+ xor %1,%4
+ %endmacro
+
+ %macro lr_mov 4
+ movzx %4,%2
+ mov %1,btab_%3(%4)
+ and %1,0x000000ff << 8 * %3
+ %endmacro
+
+%endif
+
+; Apply S-Box to the 4 bytes in a 32-bit word and rotate byte positions
+
+%ifdef REDUCE_CODE_SIZE
+
+l3s_col:
+ movzx ecx,al ; in eax
+ movzx ecx, etab_b(ecx) ; out eax
+ xor edx,ecx ; scratch ecx,edx
+ movzx ecx,ah
+ movzx ecx, etab_b(ecx)
+ shl ecx,8
+ xor edx,ecx
+ shr eax,16
+ movzx ecx,al
+ movzx ecx, etab_b(ecx)
+ shl ecx,16
+ xor edx,ecx
+ movzx ecx,ah
+ movzx ecx, etab_b(ecx)
+ shl ecx,24
+ xor edx,ecx
+ mov eax,edx
+ ret
+
+%else
+
+%macro l3s_col 0
+
+ movzx ecx,al ; in eax
+ movzx ecx, etab_b(ecx) ; out eax
+ xor edx,ecx ; scratch ecx,edx
+ movzx ecx,ah
+ movzx ecx, etab_b(ecx)
+ shl ecx,8
+ xor edx,ecx
+ shr eax,16
+ movzx ecx,al
+ movzx ecx, etab_b(ecx)
+ shl ecx,16
+ xor edx,ecx
+ movzx ecx,ah
+ movzx ecx, etab_b(ecx)
+ shl ecx,24
+ xor edx,ecx
+ mov eax,edx
+
+%endmacro
+
+%endif
+
+; offsets to parameters
+
+in_blk equ 2 ; input byte array address parameter
+out_blk equ 4 ; output byte array address parameter
+ctx equ 6 ; AES context structure
+stk_spc equ 20 ; stack space
+
+%ifdef ENCRYPTION
+
+; %define ENCRYPTION_TABLE
+
+%ifdef REDUCE_CODE_SIZE
+
+enc_round:
+ sub sp, 2
+ add ebp,16
+ save 1,ebp
+ mov esi,[ebp+8]
+ mov edi,[ebp+12]
+
+ rnd_fun nr_xor, nr_mov
+
+ mov eax,ebp
+ mov ecx,esi
+ mov edx,edi
+ restore ebp,1
+ xor eax,[ebp]
+ xor ebx,[ebp+4]
+ add sp, 2
+ ret
+
+%else
+
+%macro enc_round 0
+
+ add ebp,16
+ save 0,ebp
+ mov esi,[ebp+8]
+ mov edi,[ebp+12]
+
+ rnd_fun nr_xor, nr_mov
+
+ mov eax,ebp
+ mov ecx,esi
+ mov edx,edi
+ restore ebp,0
+ xor eax,[ebp]
+ xor ebx,[ebp+4]
+
+%endmacro
+
+%endif
+
+%macro enc_last_round 0
+
+ add ebp,16
+ save 0,ebp
+ mov esi,[ebp+8]
+ mov edi,[ebp+12]
+
+ rnd_fun lr_xor, lr_mov
+
+ mov eax,ebp
+ restore ebp,0
+ xor eax,[ebp]
+ xor ebx,[ebp+4]
+
+%endmacro
+
+ section _TEXT
+
+; AES Encryption Subroutine
+
+ do_name _aes_encrypt,12
+
+ mov ax, sp
+ movzx esp, ax
+
+ sub esp,stk_spc
+ mov [esp+16],ebp
+ mov [esp+12],ebx
+ mov [esp+ 8],esi
+ mov [esp+ 4],edi
+
+ movzx esi,word [esp+in_blk+stk_spc] ; input pointer
+ mov eax,[esi ]
+ mov ebx,[esi+ 4]
+ mov ecx,[esi+ 8]
+ mov edx,[esi+12]
+
+ movzx ebp,word [esp+ctx+stk_spc] ; key pointer
+ movzx edi,byte [ebp+4*KS_LENGTH]
+ xor eax,[ebp ]
+ xor ebx,[ebp+ 4]
+ xor ecx,[ebp+ 8]
+ xor edx,[ebp+12]
+
+; determine the number of rounds
+
+%ifndef AES_256
+ cmp edi,10*16
+ je .3
+ cmp edi,12*16
+ je .2
+ cmp edi,14*16
+ je .1
+ mov eax,-1
+ jmp .5
+%endif
+
+.1: mf_call enc_round
+ mf_call enc_round
+.2: mf_call enc_round
+ mf_call enc_round
+.3: mf_call enc_round
+ mf_call enc_round
+ mf_call enc_round
+ mf_call enc_round
+ mf_call enc_round
+ mf_call enc_round
+ mf_call enc_round
+ mf_call enc_round
+ mf_call enc_round
+ enc_last_round
+
+ movzx edx,word [esp+out_blk+stk_spc]
+ mov [edx],eax
+ mov [edx+4],ebx
+ mov [edx+8],esi
+ mov [edx+12],edi
+ xor eax,eax
+
+.5: mov ebp,[esp+16]
+ mov ebx,[esp+12]
+ mov esi,[esp+ 8]
+ mov edi,[esp+ 4]
+ add esp,stk_spc
+ do_exit 12
+
+%endif
+
+%macro f_key 2
+
+ push ecx
+ push edx
+ mov edx,esi
+ ror eax,8
+ mf_call l3s_col
+ mov esi,eax
+ pop edx
+ pop ecx
+ xor esi,rc_val
+
+ mov [ebp+%1*%2],esi
+ xor edi,esi
+ mov [ebp+%1*%2+4],edi
+ xor ecx,edi
+ mov [ebp+%1*%2+8],ecx
+ xor edx,ecx
+ mov [ebp+%1*%2+12],edx
+ mov eax,edx
+
+%if %2 == 24
+
+%if %1 < 7
+ xor eax,[ebp+%1*%2+16-%2]
+ mov [ebp+%1*%2+16],eax
+ xor eax,[ebp+%1*%2+20-%2]
+ mov [ebp+%1*%2+20],eax
+%endif
+
+%elif %2 == 32
+
+%if %1 < 6
+ push ecx
+ push edx
+ mov edx,[ebp+%1*%2+16-%2]
+ mf_call l3s_col
+ pop edx
+ pop ecx
+ mov [ebp+%1*%2+16],eax
+ xor eax,[ebp+%1*%2+20-%2]
+ mov [ebp+%1*%2+20],eax
+ xor eax,[ebp+%1*%2+24-%2]
+ mov [ebp+%1*%2+24],eax
+ xor eax,[ebp+%1*%2+28-%2]
+ mov [ebp+%1*%2+28],eax
+%endif
+
+%endif
+
+%assign rc_val f2(rc_val)
+
+%endmacro
+
+%ifdef ENCRYPTION_KEY_SCHEDULE
+
+%ifdef AES_128
+
+%ifndef ENCRYPTION_TABLE
+; %define ENCRYPTION_TABLE
+%endif
+
+%assign rc_val 1
+
+ do_name _aes_encrypt_key128,8
+
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov ebp,[esp+24]
+ mov [ebp+4*KS_LENGTH],dword 10*16
+ mov ebx,[esp+20]
+
+ mov esi,[ebx]
+ mov [ebp],esi
+ mov edi,[ebx+4]
+ mov [ebp+4],edi
+ mov ecx,[ebx+8]
+ mov [ebp+8],ecx
+ mov edx,[ebx+12]
+ mov [ebp+12],edx
+ add ebp,16
+ mov eax,edx
+
+ f_key 0,16 ; 11 * 4 = 44 unsigned longs
+ f_key 1,16 ; 4 + 4 * 10 generated = 44
+ f_key 2,16
+ f_key 3,16
+ f_key 4,16
+ f_key 5,16
+ f_key 6,16
+ f_key 7,16
+ f_key 8,16
+ f_key 9,16
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ xor eax,eax
+ do_exit 8
+
+%endif
+
+%ifdef AES_192
+
+%ifndef ENCRYPTION_TABLE
+; %define ENCRYPTION_TABLE
+%endif
+
+%assign rc_val 1
+
+ do_name _aes_encrypt_key192,8
+
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov ebp,[esp+24]
+ mov [ebp+4*KS_LENGTH],dword 12 * 16
+ mov ebx,[esp+20]
+
+ mov esi,[ebx]
+ mov [ebp],esi
+ mov edi,[ebx+4]
+ mov [ebp+4],edi
+ mov ecx,[ebx+8]
+ mov [ebp+8],ecx
+ mov edx,[ebx+12]
+ mov [ebp+12],edx
+ mov eax,[ebx+16]
+ mov [ebp+16],eax
+ mov eax,[ebx+20]
+ mov [ebp+20],eax
+ add ebp,24
+
+ f_key 0,24 ; 13 * 4 = 52 unsigned longs
+ f_key 1,24 ; 6 + 6 * 8 generated = 54
+ f_key 2,24
+ f_key 3,24
+ f_key 4,24
+ f_key 5,24
+ f_key 6,24
+ f_key 7,24
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ xor eax,eax
+ do_exit 8
+
+%endif
+
+%ifdef AES_256
+
+%ifndef ENCRYPTION_TABLE
+; %define ENCRYPTION_TABLE
+%endif
+
+%assign rc_val 1
+
+ do_name _aes_encrypt_key256,8
+
+ mov ax, sp
+ movzx esp, ax
+
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ movzx ebp, word [esp+20] ; ks
+ mov [ebp+4*KS_LENGTH],dword 14 * 16
+ movzx ebx, word [esp+18] ; key
+
+ mov esi,[ebx]
+ mov [ebp],esi
+ mov edi,[ebx+4]
+ mov [ebp+4],edi
+ mov ecx,[ebx+8]
+ mov [ebp+8],ecx
+ mov edx,[ebx+12]
+ mov [ebp+12],edx
+ mov eax,[ebx+16]
+ mov [ebp+16],eax
+ mov eax,[ebx+20]
+ mov [ebp+20],eax
+ mov eax,[ebx+24]
+ mov [ebp+24],eax
+ mov eax,[ebx+28]
+ mov [ebp+28],eax
+ add ebp,32
+
+ f_key 0,32 ; 15 * 4 = 60 unsigned longs
+ f_key 1,32 ; 8 + 8 * 7 generated = 64
+ f_key 2,32
+ f_key 3,32
+ f_key 4,32
+ f_key 5,32
+ f_key 6,32
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ xor eax,eax
+ do_exit 8
+
+%endif
+
+%ifdef AES_VAR
+
+%ifndef ENCRYPTION_TABLE
+; %define ENCRYPTION_TABLE
+%endif
+
+ do_name _aes_encrypt_key,12
+
+ mov ecx,[esp+4]
+ mov eax,[esp+8]
+ mov edx,[esp+12]
+ push edx
+ push ecx
+
+ cmp eax,16
+ je .1
+ cmp eax,128
+ je .1
+
+ cmp eax,24
+ je .2
+ cmp eax,192
+ je .2
+
+ cmp eax,32
+ je .3
+ cmp eax,256
+ je .3
+ mov eax,-1
+ add esp,8
+ do_exit 12
+
+.1: do_call _aes_encrypt_key128,8
+ do_exit 12
+.2: do_call _aes_encrypt_key192,8
+ do_exit 12
+.3: do_call _aes_encrypt_key256,8
+ do_exit 12
+
+%endif
+
+%endif
+
+%ifdef ENCRYPTION_TABLE
+
+; S-box data - 256 entries
+
+ section _DATA
+
+%define u8(x) 0, x, x, f3(x), f2(x), x, x, f3(x)
+
+_aes_enc_tab:
+ db u8(0x63),u8(0x7c),u8(0x77),u8(0x7b),u8(0xf2),u8(0x6b),u8(0x6f),u8(0xc5)
+ db u8(0x30),u8(0x01),u8(0x67),u8(0x2b),u8(0xfe),u8(0xd7),u8(0xab),u8(0x76)
+ db u8(0xca),u8(0x82),u8(0xc9),u8(0x7d),u8(0xfa),u8(0x59),u8(0x47),u8(0xf0)
+ db u8(0xad),u8(0xd4),u8(0xa2),u8(0xaf),u8(0x9c),u8(0xa4),u8(0x72),u8(0xc0)
+ db u8(0xb7),u8(0xfd),u8(0x93),u8(0x26),u8(0x36),u8(0x3f),u8(0xf7),u8(0xcc)
+ db u8(0x34),u8(0xa5),u8(0xe5),u8(0xf1),u8(0x71),u8(0xd8),u8(0x31),u8(0x15)
+ db u8(0x04),u8(0xc7),u8(0x23),u8(0xc3),u8(0x18),u8(0x96),u8(0x05),u8(0x9a)
+ db u8(0x07),u8(0x12),u8(0x80),u8(0xe2),u8(0xeb),u8(0x27),u8(0xb2),u8(0x75)
+ db u8(0x09),u8(0x83),u8(0x2c),u8(0x1a),u8(0x1b),u8(0x6e),u8(0x5a),u8(0xa0)
+ db u8(0x52),u8(0x3b),u8(0xd6),u8(0xb3),u8(0x29),u8(0xe3),u8(0x2f),u8(0x84)
+ db u8(0x53),u8(0xd1),u8(0x00),u8(0xed),u8(0x20),u8(0xfc),u8(0xb1),u8(0x5b)
+ db u8(0x6a),u8(0xcb),u8(0xbe),u8(0x39),u8(0x4a),u8(0x4c),u8(0x58),u8(0xcf)
+ db u8(0xd0),u8(0xef),u8(0xaa),u8(0xfb),u8(0x43),u8(0x4d),u8(0x33),u8(0x85)
+ db u8(0x45),u8(0xf9),u8(0x02),u8(0x7f),u8(0x50),u8(0x3c),u8(0x9f),u8(0xa8)
+ db u8(0x51),u8(0xa3),u8(0x40),u8(0x8f),u8(0x92),u8(0x9d),u8(0x38),u8(0xf5)
+ db u8(0xbc),u8(0xb6),u8(0xda),u8(0x21),u8(0x10),u8(0xff),u8(0xf3),u8(0xd2)
+ db u8(0xcd),u8(0x0c),u8(0x13),u8(0xec),u8(0x5f),u8(0x97),u8(0x44),u8(0x17)
+ db u8(0xc4),u8(0xa7),u8(0x7e),u8(0x3d),u8(0x64),u8(0x5d),u8(0x19),u8(0x73)
+ db u8(0x60),u8(0x81),u8(0x4f),u8(0xdc),u8(0x22),u8(0x2a),u8(0x90),u8(0x88)
+ db u8(0x46),u8(0xee),u8(0xb8),u8(0x14),u8(0xde),u8(0x5e),u8(0x0b),u8(0xdb)
+ db u8(0xe0),u8(0x32),u8(0x3a),u8(0x0a),u8(0x49),u8(0x06),u8(0x24),u8(0x5c)
+ db u8(0xc2),u8(0xd3),u8(0xac),u8(0x62),u8(0x91),u8(0x95),u8(0xe4),u8(0x79)
+ db u8(0xe7),u8(0xc8),u8(0x37),u8(0x6d),u8(0x8d),u8(0xd5),u8(0x4e),u8(0xa9)
+ db u8(0x6c),u8(0x56),u8(0xf4),u8(0xea),u8(0x65),u8(0x7a),u8(0xae),u8(0x08)
+ db u8(0xba),u8(0x78),u8(0x25),u8(0x2e),u8(0x1c),u8(0xa6),u8(0xb4),u8(0xc6)
+ db u8(0xe8),u8(0xdd),u8(0x74),u8(0x1f),u8(0x4b),u8(0xbd),u8(0x8b),u8(0x8a)
+ db u8(0x70),u8(0x3e),u8(0xb5),u8(0x66),u8(0x48),u8(0x03),u8(0xf6),u8(0x0e)
+ db u8(0x61),u8(0x35),u8(0x57),u8(0xb9),u8(0x86),u8(0xc1),u8(0x1d),u8(0x9e)
+ db u8(0xe1),u8(0xf8),u8(0x98),u8(0x11),u8(0x69),u8(0xd9),u8(0x8e),u8(0x94)
+ db u8(0x9b),u8(0x1e),u8(0x87),u8(0xe9),u8(0xce),u8(0x55),u8(0x28),u8(0xdf)
+ db u8(0x8c),u8(0xa1),u8(0x89),u8(0x0d),u8(0xbf),u8(0xe6),u8(0x42),u8(0x68)
+ db u8(0x41),u8(0x99),u8(0x2d),u8(0x0f),u8(0xb0),u8(0x54),u8(0xbb),u8(0x16)
+
+%endif
+
+%ifdef DECRYPTION
+
+; %define DECRYPTION_TABLE
+
+%define dtab_0(x) [_aes_dec_tab+ 8*x]
+%define dtab_1(x) [_aes_dec_tab+3+8*x]
+%define dtab_2(x) [_aes_dec_tab+2+8*x]
+%define dtab_3(x) [_aes_dec_tab+1+8*x]
+%define dtab_x(x) byte [_aes_dec_tab+7+8*x]
+
+%macro irn_fun 2
+
+ rol eax,16
+ %1 esi, cl, 0, ebp
+ %1 esi, bh, 1, ebp
+ %1 esi, al, 2, ebp
+ %1 edi, dl, 0, ebp
+ %1 edi, ch, 1, ebp
+ %1 edi, ah, 3, ebp
+ %2 ebp, bl, 0, ebp
+ shr eax,16
+ and ebx,0xffff0000
+ or ebx,eax
+ shr ecx,16
+ %1 ebp, bh, 1, eax
+ %1 ebp, ch, 3, eax
+ %2 eax, cl, 2, ecx
+ %1 eax, bl, 0, ecx
+ %1 eax, dh, 1, ecx
+ shr ebx,16
+ shr edx,16
+ %1 esi, dh, 3, ecx
+ %1 ebp, dl, 2, ecx
+ %1 eax, bh, 3, ecx
+ %1 edi, bl, 2, ecx
+
+%endmacro
+
+; Basic MOV and XOR Operations for normal rounds
+
+%macro ni_xor 4
+ movzx %4,%2
+ xor %1,dtab_%3(%4)
+%endmacro
+
+%macro ni_mov 4
+ movzx %4,%2
+ mov %1,dtab_%3(%4)
+%endmacro
+
+; Basic MOV and XOR Operations for last round
+
+%macro li_xor 4
+ movzx %4,%2
+ movzx %4,dtab_x(%4)
+%if %3 != 0
+ shl %4,8*%3
+%endif
+ xor %1,%4
+%endmacro
+
+%macro li_mov 4
+ movzx %4,%2
+ movzx %1,dtab_x(%4)
+%if %3 != 0
+ shl %1,8*%3
+%endif
+%endmacro
+
+%ifdef REDUCE_CODE_SIZE
+
+dec_round:
+ sub sp, 2
+%ifdef AES_REV_DKS
+ add ebp,16
+%else
+ sub ebp,16
+%endif
+ save 1,ebp
+ mov esi,[ebp+8]
+ mov edi,[ebp+12]
+
+ irn_fun ni_xor, ni_mov
+
+ mov ebx,ebp
+ mov ecx,esi
+ mov edx,edi
+ restore ebp,1
+ xor eax,[ebp]
+ xor ebx,[ebp+4]
+ add sp, 2
+ ret
+
+%else
+
+%macro dec_round 0
+
+%ifdef AES_REV_DKS
+ add ebp,16
+%else
+ sub ebp,16
+%endif
+ save 0,ebp
+ mov esi,[ebp+8]
+ mov edi,[ebp+12]
+
+ irn_fun ni_xor, ni_mov
+
+ mov ebx,ebp
+ mov ecx,esi
+ mov edx,edi
+ restore ebp,0
+ xor eax,[ebp]
+ xor ebx,[ebp+4]
+
+%endmacro
+
+%endif
+
+%macro dec_last_round 0
+
+%ifdef AES_REV_DKS
+ add ebp,16
+%else
+ sub ebp,16
+%endif
+ save 0,ebp
+ mov esi,[ebp+8]
+ mov edi,[ebp+12]
+
+ irn_fun li_xor, li_mov
+
+ mov ebx,ebp
+ restore ebp,0
+ xor eax,[ebp]
+ xor ebx,[ebp+4]
+
+%endmacro
+
+ section _TEXT
+
+; AES Decryption Subroutine
+
+ do_name _aes_decrypt,12
+
+ mov ax, sp
+ movzx esp, ax
+
+ sub esp,stk_spc
+ mov [esp+16],ebp
+ mov [esp+12],ebx
+ mov [esp+ 8],esi
+ mov [esp+ 4],edi
+
+; input four columns and xor in first round key
+
+ movzx esi,word [esp+in_blk+stk_spc] ; input pointer
+ mov eax,[esi ]
+ mov ebx,[esi+ 4]
+ mov ecx,[esi+ 8]
+ mov edx,[esi+12]
+ lea esi,[esi+16]
+
+ movzx ebp, word [esp+ctx+stk_spc] ; key pointer
+ movzx edi,byte[ebp+4*KS_LENGTH]
+%ifndef AES_REV_DKS ; if decryption key schedule is not reversed
+ lea ebp,[ebp+edi] ; we have to access it from the top down
+%endif
+ xor eax,[ebp ] ; key schedule
+ xor ebx,[ebp+ 4]
+ xor ecx,[ebp+ 8]
+ xor edx,[ebp+12]
+
+; determine the number of rounds
+
+%ifndef AES_256
+ cmp edi,10*16
+ je .3
+ cmp edi,12*16
+ je .2
+ cmp edi,14*16
+ je .1
+ mov eax,-1
+ jmp .5
+%endif
+
+.1: mf_call dec_round
+ mf_call dec_round
+.2: mf_call dec_round
+ mf_call dec_round
+.3: mf_call dec_round
+ mf_call dec_round
+ mf_call dec_round
+ mf_call dec_round
+ mf_call dec_round
+ mf_call dec_round
+ mf_call dec_round
+ mf_call dec_round
+ mf_call dec_round
+ dec_last_round
+
+; move final values to the output array.
+
+ movzx ebp,word [esp+out_blk+stk_spc]
+ mov [ebp],eax
+ mov [ebp+4],ebx
+ mov [ebp+8],esi
+ mov [ebp+12],edi
+ xor eax,eax
+
+.5: mov ebp,[esp+16]
+ mov ebx,[esp+12]
+ mov esi,[esp+ 8]
+ mov edi,[esp+ 4]
+ add esp,stk_spc
+ do_exit 12
+
+%endif
+
+%ifdef REDUCE_CODE_SIZE
+
+inv_mix_col:
+ movzx ecx,dl ; input eax, edx
+ movzx ecx,etab_b(ecx) ; output eax
+ mov eax,dtab_0(ecx) ; used ecx
+ movzx ecx,dh
+ shr edx,16
+ movzx ecx,etab_b(ecx)
+ xor eax,dtab_1(ecx)
+ movzx ecx,dl
+ movzx ecx,etab_b(ecx)
+ xor eax,dtab_2(ecx)
+ movzx ecx,dh
+ movzx ecx,etab_b(ecx)
+ xor eax,dtab_3(ecx)
+ ret
+
+%else
+
+%macro inv_mix_col 0
+
+ movzx ecx,dl ; input eax, edx
+ movzx ecx,etab_b(ecx) ; output eax
+ mov eax,dtab_0(ecx) ; used ecx
+ movzx ecx,dh
+ shr edx,16
+ movzx ecx,etab_b(ecx)
+ xor eax,dtab_1(ecx)
+ movzx ecx,dl
+ movzx ecx,etab_b(ecx)
+ xor eax,dtab_2(ecx)
+ movzx ecx,dh
+ movzx ecx,etab_b(ecx)
+ xor eax,dtab_3(ecx)
+
+%endmacro
+
+%endif
+
+%ifdef DECRYPTION_KEY_SCHEDULE
+
+%ifdef AES_128
+
+%ifndef DECRYPTION_TABLE
+; %define DECRYPTION_TABLE
+%endif
+
+ do_name _aes_decrypt_key128,8
+
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov eax,[esp+24] ; context
+ mov edx,[esp+20] ; key
+ push eax
+ push edx
+ do_call _aes_encrypt_key128,8 ; generate expanded encryption key
+ mov eax,10*16
+ mov esi,[esp+24] ; pointer to first round key
+ lea edi,[esi+eax] ; pointer to last round key
+ add esi,32
+ ; the inverse mix column transformation
+ mov edx,[esi-16] ; needs to be applied to all round keys
+ mf_call inv_mix_col ; except first and last. Hence start by
+ mov [esi-16],eax ; transforming the four sub-keys in the
+ mov edx,[esi-12] ; second round key
+ mf_call inv_mix_col
+ mov [esi-12],eax ; transformations for subsequent rounds
+ mov edx,[esi-8] ; can then be made more efficient by
+ mf_call inv_mix_col ; noting that for three of the four sub-keys
+ mov [esi-8],eax ; in the encryption round key ek[r]:
+ mov edx,[esi-4] ;
+ mf_call inv_mix_col ; ek[r][n] = ek[r][n-1] ^ ek[r-1][n]
+ mov [esi-4],eax ;
+ ; where n is 1..3. Hence the corresponding
+.0: mov edx,[esi] ; subkeys in the decryption round key dk[r]
+ mf_call inv_mix_col ; also obey since inv_mix_col is linear in
+ mov [esi],eax ; GF(256):
+ xor eax,[esi-12] ;
+ mov [esi+4],eax ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n]
+ xor eax,[esi-8] ;
+ mov [esi+8],eax ; So we only need one inverse mix column
+ xor eax,[esi-4] ; operation (n = 0) for each four word cycle
+ mov [esi+12],eax ; in the expanded key.
+ add esi,16
+ cmp edi,esi
+ jg .0
+ jmp dec_end
+
+%endif
+
+%ifdef AES_192
+
+%ifndef DECRYPTION_TABLE
+; %define DECRYPTION_TABLE
+%endif
+
+ do_name _aes_decrypt_key192,8
+
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov eax,[esp+24] ; context
+ mov edx,[esp+20] ; key
+ push eax
+ push edx
+ do_call _aes_encrypt_key192,8 ; generate expanded encryption key
+ mov eax,12*16
+ mov esi,[esp+24] ; first round key
+ lea edi,[esi+eax] ; last round key
+ add esi,48 ; the first 6 words are the key, of
+ ; which the top 2 words are part of
+ mov edx,[esi-32] ; the second round key and hence
+ mf_call inv_mix_col ; need to be modified. After this we
+ mov [esi-32],eax ; need to do a further six values prior
+ mov edx,[esi-28] ; to using a more efficient technique
+ mf_call inv_mix_col ; based on:
+ mov [esi-28],eax ;
+ ; dk[r][n] = dk[r][n-1] ^ dk[r-1][n]
+ mov edx,[esi-24] ;
+ mf_call inv_mix_col ; for n = 1 .. 5 where the key expansion
+ mov [esi-24],eax ; cycle is now 6 words long
+ mov edx,[esi-20]
+ mf_call inv_mix_col
+ mov [esi-20],eax
+ mov edx,[esi-16]
+ mf_call inv_mix_col
+ mov [esi-16],eax
+ mov edx,[esi-12]
+ mf_call inv_mix_col
+ mov [esi-12],eax
+ mov edx,[esi-8]
+ mf_call inv_mix_col
+ mov [esi-8],eax
+ mov edx,[esi-4]
+ mf_call inv_mix_col
+ mov [esi-4],eax
+
+.0: mov edx,[esi] ; the expanded key is 13 * 4 = 44 32-bit words
+ mf_call inv_mix_col ; of which 11 * 4 = 44 have to be modified
+ mov [esi],eax ; using inv_mix_col. We have already done 8
+ xor eax,[esi-20] ; of these so 36 are left - hence we need
+ mov [esi+4],eax ; exactly 6 loops of six here
+ xor eax,[esi-16]
+ mov [esi+8],eax
+ xor eax,[esi-12]
+ mov [esi+12],eax
+ xor eax,[esi-8]
+ mov [esi+16],eax
+ xor eax,[esi-4]
+ mov [esi+20],eax
+ add esi,24
+ cmp edi,esi
+ jg .0
+ jmp dec_end
+
+%endif
+
+%ifdef AES_256
+
+%ifndef DECRYPTION_TABLE
+; %define DECRYPTION_TABLE
+%endif
+
+ do_name _aes_decrypt_key256,8
+
+ mov ax, sp
+ movzx esp, ax
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ movzx eax, word [esp+20] ; ks
+ movzx edx, word [esp+18] ; key
+ push ax
+ push dx
+ do_call _aes_encrypt_key256,4 ; generate expanded encryption key
+ mov eax,14*16
+ movzx esi, word [esp+20] ; ks
+ lea edi,[esi+eax]
+ add esi,64
+
+ mov edx,[esi-48] ; the primary key is 8 words, of which
+ mf_call inv_mix_col ; the top four require modification
+ mov [esi-48],eax
+ mov edx,[esi-44]
+ mf_call inv_mix_col
+ mov [esi-44],eax
+ mov edx,[esi-40]
+ mf_call inv_mix_col
+ mov [esi-40],eax
+ mov edx,[esi-36]
+ mf_call inv_mix_col
+ mov [esi-36],eax
+
+ mov edx,[esi-32] ; the encryption key expansion cycle is
+ mf_call inv_mix_col ; now eight words long so we need to
+ mov [esi-32],eax ; start by doing one complete block
+ mov edx,[esi-28]
+ mf_call inv_mix_col
+ mov [esi-28],eax
+ mov edx,[esi-24]
+ mf_call inv_mix_col
+ mov [esi-24],eax
+ mov edx,[esi-20]
+ mf_call inv_mix_col
+ mov [esi-20],eax
+ mov edx,[esi-16]
+ mf_call inv_mix_col
+ mov [esi-16],eax
+ mov edx,[esi-12]
+ mf_call inv_mix_col
+ mov [esi-12],eax
+ mov edx,[esi-8]
+ mf_call inv_mix_col
+ mov [esi-8],eax
+ mov edx,[esi-4]
+ mf_call inv_mix_col
+ mov [esi-4],eax
+
+.0: mov edx,[esi] ; we can now speed up the remaining
+ mf_call inv_mix_col ; rounds by using the technique
+ mov [esi],eax ; outlined earlier. But note that
+ xor eax,[esi-28] ; there is one extra inverse mix
+ mov [esi+4],eax ; column operation as the 256 bit
+ xor eax,[esi-24] ; key has an extra non-linear step
+ mov [esi+8],eax ; for the midway element.
+ xor eax,[esi-20]
+ mov [esi+12],eax ; the expanded key is 15 * 4 = 60
+ mov edx,[esi+16] ; 32-bit words of which 52 need to
+ mf_call inv_mix_col ; be modified. We have already done
+ mov [esi+16],eax ; 12 so 40 are left - which means
+ xor eax,[esi-12] ; that we need exactly 5 loops of 8
+ mov [esi+20],eax
+ xor eax,[esi-8]
+ mov [esi+24],eax
+ xor eax,[esi-4]
+ mov [esi+28],eax
+ add esi,32
+ cmp edi,esi
+ jg .0
+
+%endif
+
+dec_end:
+
+%ifdef AES_REV_DKS
+
+ movzx esi,word [esp+20] ; this reverses the order of the
+.1: mov eax,[esi] ; round keys if required
+ mov ebx,[esi+4]
+ mov ebp,[edi]
+ mov edx,[edi+4]
+ mov [esi],ebp
+ mov [esi+4],edx
+ mov [edi],eax
+ mov [edi+4],ebx
+
+ mov eax,[esi+8]
+ mov ebx,[esi+12]
+ mov ebp,[edi+8]
+ mov edx,[edi+12]
+ mov [esi+8],ebp
+ mov [esi+12],edx
+ mov [edi+8],eax
+ mov [edi+12],ebx
+
+ add esi,16
+ sub edi,16
+ cmp edi,esi
+ jg .1
+
+%endif
+
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ xor eax,eax
+ do_exit 8
+
+%ifdef AES_VAR
+
+ do_name _aes_decrypt_key,12
+
+ mov ecx,[esp+4]
+ mov eax,[esp+8]
+ mov edx,[esp+12]
+ push edx
+ push ecx
+
+ cmp eax,16
+ je .1
+ cmp eax,128
+ je .1
+
+ cmp eax,24
+ je .2
+ cmp eax,192
+ je .2
+
+ cmp eax,32
+ je .3
+ cmp eax,256
+ je .3
+ mov eax,-1
+ add esp,8
+ do_exit 12
+
+.1: do_call _aes_decrypt_key128,8
+ do_exit 12
+.2: do_call _aes_decrypt_key192,8
+ do_exit 12
+.3: do_call _aes_decrypt_key256,8
+ do_exit 12
+
+%endif
+
+%endif
+
+%ifdef DECRYPTION_TABLE
+
+; Inverse S-box data - 256 entries
+
+ section _DATA
+
+%define v8(x) fe(x), f9(x), fd(x), fb(x), fe(x), f9(x), fd(x), x
+
+_aes_dec_tab:
+ db v8(0x52),v8(0x09),v8(0x6a),v8(0xd5),v8(0x30),v8(0x36),v8(0xa5),v8(0x38)
+ db v8(0xbf),v8(0x40),v8(0xa3),v8(0x9e),v8(0x81),v8(0xf3),v8(0xd7),v8(0xfb)
+ db v8(0x7c),v8(0xe3),v8(0x39),v8(0x82),v8(0x9b),v8(0x2f),v8(0xff),v8(0x87)
+ db v8(0x34),v8(0x8e),v8(0x43),v8(0x44),v8(0xc4),v8(0xde),v8(0xe9),v8(0xcb)
+ db v8(0x54),v8(0x7b),v8(0x94),v8(0x32),v8(0xa6),v8(0xc2),v8(0x23),v8(0x3d)
+ db v8(0xee),v8(0x4c),v8(0x95),v8(0x0b),v8(0x42),v8(0xfa),v8(0xc3),v8(0x4e)
+ db v8(0x08),v8(0x2e),v8(0xa1),v8(0x66),v8(0x28),v8(0xd9),v8(0x24),v8(0xb2)
+ db v8(0x76),v8(0x5b),v8(0xa2),v8(0x49),v8(0x6d),v8(0x8b),v8(0xd1),v8(0x25)
+ db v8(0x72),v8(0xf8),v8(0xf6),v8(0x64),v8(0x86),v8(0x68),v8(0x98),v8(0x16)
+ db v8(0xd4),v8(0xa4),v8(0x5c),v8(0xcc),v8(0x5d),v8(0x65),v8(0xb6),v8(0x92)
+ db v8(0x6c),v8(0x70),v8(0x48),v8(0x50),v8(0xfd),v8(0xed),v8(0xb9),v8(0xda)
+ db v8(0x5e),v8(0x15),v8(0x46),v8(0x57),v8(0xa7),v8(0x8d),v8(0x9d),v8(0x84)
+ db v8(0x90),v8(0xd8),v8(0xab),v8(0x00),v8(0x8c),v8(0xbc),v8(0xd3),v8(0x0a)
+ db v8(0xf7),v8(0xe4),v8(0x58),v8(0x05),v8(0xb8),v8(0xb3),v8(0x45),v8(0x06)
+ db v8(0xd0),v8(0x2c),v8(0x1e),v8(0x8f),v8(0xca),v8(0x3f),v8(0x0f),v8(0x02)
+ db v8(0xc1),v8(0xaf),v8(0xbd),v8(0x03),v8(0x01),v8(0x13),v8(0x8a),v8(0x6b)
+ db v8(0x3a),v8(0x91),v8(0x11),v8(0x41),v8(0x4f),v8(0x67),v8(0xdc),v8(0xea)
+ db v8(0x97),v8(0xf2),v8(0xcf),v8(0xce),v8(0xf0),v8(0xb4),v8(0xe6),v8(0x73)
+ db v8(0x96),v8(0xac),v8(0x74),v8(0x22),v8(0xe7),v8(0xad),v8(0x35),v8(0x85)
+ db v8(0xe2),v8(0xf9),v8(0x37),v8(0xe8),v8(0x1c),v8(0x75),v8(0xdf),v8(0x6e)
+ db v8(0x47),v8(0xf1),v8(0x1a),v8(0x71),v8(0x1d),v8(0x29),v8(0xc5),v8(0x89)
+ db v8(0x6f),v8(0xb7),v8(0x62),v8(0x0e),v8(0xaa),v8(0x18),v8(0xbe),v8(0x1b)
+ db v8(0xfc),v8(0x56),v8(0x3e),v8(0x4b),v8(0xc6),v8(0xd2),v8(0x79),v8(0x20)
+ db v8(0x9a),v8(0xdb),v8(0xc0),v8(0xfe),v8(0x78),v8(0xcd),v8(0x5a),v8(0xf4)
+ db v8(0x1f),v8(0xdd),v8(0xa8),v8(0x33),v8(0x88),v8(0x07),v8(0xc7),v8(0x31)
+ db v8(0xb1),v8(0x12),v8(0x10),v8(0x59),v8(0x27),v8(0x80),v8(0xec),v8(0x5f)
+ db v8(0x60),v8(0x51),v8(0x7f),v8(0xa9),v8(0x19),v8(0xb5),v8(0x4a),v8(0x0d)
+ db v8(0x2d),v8(0xe5),v8(0x7a),v8(0x9f),v8(0x93),v8(0xc9),v8(0x9c),v8(0xef)
+ db v8(0xa0),v8(0xe0),v8(0x3b),v8(0x4d),v8(0xae),v8(0x2a),v8(0xf5),v8(0xb0)
+ db v8(0xc8),v8(0xeb),v8(0xbb),v8(0x3c),v8(0x83),v8(0x53),v8(0x99),v8(0x61)
+ db v8(0x17),v8(0x2b),v8(0x04),v8(0x7e),v8(0xba),v8(0x77),v8(0xd6),v8(0x26)
+ db v8(0xe1),v8(0x69),v8(0x14),v8(0x63),v8(0x55),v8(0x21),v8(0x0c),v8(0x7d)
+
+%endif