VeraCrypt
aboutsummaryrefslogtreecommitdiff
path: root/src/Crypto/chacha_u4.h
blob: 8eef5dc5cda4f9eac72d70a0c9f23d2f743458b3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
/*
u4.h version $Date: 2014/11/11 10:46:58 $
D. J. Bernstein
Romain Dolbeau
Public domain.
*/

// Modified by kerukuro for use in cppcrypto.

#define VEC4_ROT(a,imm) _mm_or_si128(_mm_slli_epi32(a,imm),_mm_srli_epi32(a,(32-imm)))

/* same, but replace 2 of the shift/shift/or "rotation" by byte shuffles (8 & 16) (better) */
#define VEC4_QUARTERROUND_SHUFFLE(a,b,c,d)                                \
   x_##a = _mm_add_epi32(x_##a, x_##b); t_##a = _mm_xor_si128(x_##d, x_##a); x_##d = _mm_shuffle_epi8(t_##a, rot16); \
   x_##c = _mm_add_epi32(x_##c, x_##d); t_##c = _mm_xor_si128(x_##b, x_##c); x_##b = VEC4_ROT(t_##c, 12); \
   x_##a = _mm_add_epi32(x_##a, x_##b); t_##a = _mm_xor_si128(x_##d, x_##a); x_##d = _mm_shuffle_epi8(t_##a, rot8); \
   x_##c = _mm_add_epi32(x_##c, x_##d); t_##c = _mm_xor_si128(x_##b, x_##c); x_##b = VEC4_ROT(t_##c,  7)

#define VEC4_QUARTERROUND(a,b,c,d) VEC4_QUARTERROUND_SHUFFLE(a,b,c,d)


//  if (!bytes) return;
if (bytes>=256) {
  /* constant for shuffling bytes (replacing multiple-of-8 rotates) */
  __m128i rot16 = _mm_set_epi8(13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2);
  __m128i rot8  = _mm_set_epi8(14,13,12,15,10,9,8,11,6,5,4,7,2,1,0,3);
  uint32 in12, in13;
  __m128i x_0 = _mm_set1_epi32(x[0]);
  __m128i x_1 = _mm_set1_epi32(x[1]);
  __m128i x_2 = _mm_set1_epi32(x[2]);
  __m128i x_3 = _mm_set1_epi32(x[3]);
  __m128i x_4 = _mm_set1_epi32(x[4]);
  __m128i x_5 = _mm_set1_epi32(x[5]);
  __m128i x_6 = _mm_set1_epi32(x[6]);
  __m128i x_7 = _mm_set1_epi32(x[7]);
  __m128i x_8 = _mm_set1_epi32(x[8]);
  __m128i x_9 = _mm_set1_epi32(x[9]);
  __m128i x_10 = _mm_set1_epi32(x[10]);
  __m128i x_11 = _mm_set1_epi32(x[11]);
  __m128i x_12;// = _mm_set1_epi32(x[12]); /* useless */
  __m128i x_13;// = _mm_set1_epi32(x[13]); /* useless */
  __m128i x_14 = _mm_set1_epi32(x[14]);
  __m128i x_15 = _mm_set1_epi32(x[15]);
  __m128i orig0 = x_0;
  __m128i orig1 = x_1;
  __m128i orig2 = x_2;
  __m128i orig3 = x_3;
  __m128i orig4 = x_4;
  __m128i orig5 = x_5;
  __m128i orig6 = x_6;
  __m128i orig7 = x_7;
  __m128i orig8 = x_8;
  __m128i orig9 = x_9;
  __m128i orig10 = x_10;
  __m128i orig11 = x_11;
  __m128i orig12;// = x_12; /* useless */
  __m128i orig13;// = x_13; /* useless */
  __m128i orig14 = x_14;
  __m128i orig15 = x_15;
  __m128i t_0;
  __m128i t_1;
  __m128i t_2;
  __m128i t_3;
  __m128i t_4;
  __m128i t_5;
  __m128i t_6;
  __m128i t_7;
  __m128i t_8;
  __m128i t_9;
  __m128i t_10;
  __m128i t_11;
  __m128i t_12;
  __m128i t_13;
  __m128i t_14;
  __m128i t_15;

  while (bytes >= 256) {
    const __m128i addv12 = _mm_set_epi64x(1,0);
    const __m128i addv13 = _mm_set_epi64x(3,2);
    __m128i t12, t13;
	uint64 in1213;

    x_0 = orig0;
    x_1 = orig1;
    x_2 = orig2;
    x_3 = orig3;
    x_4 = orig4;
    x_5 = orig5;
    x_6 = orig6;
    x_7 = orig7;
    x_8 = orig8;
    x_9 = orig9;
    x_10 = orig10;
    x_11 = orig11;
    //x_12 = orig12; /* useless */
    //x_13 = orig13; /* useless */
    x_14 = orig14;
    x_15 = orig15;




    in12 = x[12];
    in13 = x[13];
    in1213 = ((uint64)in12) | (((uint64)in13) << 32);
    t12 = _mm_set1_epi64x(in1213);
    t13 = _mm_set1_epi64x(in1213);

    x_12 = _mm_add_epi64(addv12, t12);
    x_13 = _mm_add_epi64(addv13, t13);

    t12 = _mm_unpacklo_epi32(x_12, x_13);
    t13 = _mm_unpackhi_epi32(x_12, x_13);

    x_12 = _mm_unpacklo_epi32(t12, t13);
    x_13 = _mm_unpackhi_epi32(t12, t13);

    orig12 = x_12;
    orig13 = x_13;

    in1213 += 4;
    
    x[12] = in1213 & 0xFFFFFFFF;
    x[13] = (in1213>>32)&0xFFFFFFFF;

    for (i = 0 ; i < r ; ++i) {
      VEC4_QUARTERROUND( 0, 4, 8,12);
      VEC4_QUARTERROUND( 1, 5, 9,13);
      VEC4_QUARTERROUND( 2, 6,10,14);
      VEC4_QUARTERROUND( 3, 7,11,15);
      VEC4_QUARTERROUND( 0, 5,10,15);
      VEC4_QUARTERROUND( 1, 6,11,12);
      VEC4_QUARTERROUND( 2, 7, 8,13);
      VEC4_QUARTERROUND( 3, 4, 9,14);
    }

#define ONEQUAD_TRANSPOSE(a,b,c,d)                                      \
    {                                                                   \
      __m128i t0, t1, t2, t3;                                           \
      x_##a = _mm_add_epi32(x_##a, orig##a);                         \
      x_##b = _mm_add_epi32(x_##b, orig##b);                         \
      x_##c = _mm_add_epi32(x_##c, orig##c);                         \
      x_##d = _mm_add_epi32(x_##d, orig##d);                         \
      t_##a = _mm_unpacklo_epi32(x_##a, x_##b);                      \
      t_##b = _mm_unpacklo_epi32(x_##c, x_##d);                      \
      t_##c = _mm_unpackhi_epi32(x_##a, x_##b);                      \
      t_##d = _mm_unpackhi_epi32(x_##c, x_##d);                      \
      x_##a = _mm_unpacklo_epi64(t_##a, t_##b);                      \
      x_##b = _mm_unpackhi_epi64(t_##a, t_##b);                      \
      x_##c = _mm_unpacklo_epi64(t_##c, t_##d);                      \
      x_##d = _mm_unpackhi_epi64(t_##c, t_##d);                      \
      t0 = _mm_xor_si128(x_##a, _mm_loadu_si128((__m128i*)(m+0)));       \
      _mm_storeu_si128((__m128i*)(out+0),t0);                            \
      t1 = _mm_xor_si128(x_##b, _mm_loadu_si128((__m128i*)(m+64)));      \
      _mm_storeu_si128((__m128i*)(out+64),t1);                           \
      t2 = _mm_xor_si128(x_##c, _mm_loadu_si128((__m128i*)(m+128)));     \
      _mm_storeu_si128((__m128i*)(out+128),t2);                          \
      t3 = _mm_xor_si128(x_##d, _mm_loadu_si128((__m128i*)(m+192)));     \
      _mm_storeu_si128((__m128i*)(out+192),t3);                          \
    }
    
#define ONEQUAD(a,b,c,d) ONEQUAD_TRANSPOSE(a,b,c,d)

    ONEQUAD(0,1,2,3);
    m+=16;
    out+=16;
    ONEQUAD(4,5,6,7);
    m+=16;
    out+=16;
    ONEQUAD(8,9,10,11);
    m+=16;
    out+=16;
    ONEQUAD(12,13,14,15);
    m-=48;
    out-=48;
    
#undef ONEQUAD
#undef ONEQUAD_TRANSPOSE

    bytes -= 256;
    out += 256;
    m += 256;
  }
 }
#undef VEC4_ROT
#undef VEC4_QUARTERROUND
#undef VEC4_QUARTERROUND_SHUFFLE
span class="n">filelist[0]) * (size_t)survivors)) == NULL) return -1; unchanged_offset = ZIP_UINT64_MAX; /* create list of files with index into original archive */ for (i = j = 0; i < za->nentry; i++) { if (za->entry[i].orig != NULL && ZIP_ENTRY_HAS_CHANGES(&za->entry[i])) { unchanged_offset = ZIP_MIN(unchanged_offset, za->entry[i].orig->offset); } if (za->entry[i].deleted) { continue; } if (j >= survivors) { free(filelist); zip_error_set(&za->error, ZIP_ER_INTERNAL, 0); return -1; } filelist[j].idx = i; j++; } if (j < survivors) { free(filelist); zip_error_set(&za->error, ZIP_ER_INTERNAL, 0); return -1; } if ((zip_source_supports(za->src) & ZIP_SOURCE_MAKE_COMMAND_BITMASK(ZIP_SOURCE_BEGIN_WRITE_CLONING)) == 0) { unchanged_offset = 0; } else { if (unchanged_offset == ZIP_UINT64_MAX) { /* we're keeping all file data, find the end of the last one */ zip_uint64_t last_index = ZIP_UINT64_MAX; unchanged_offset = 0; for (i = 0; i < za->nentry; i++) { if (za->entry[i].orig != NULL) { if (za->entry[i].orig->offset >= unchanged_offset) { unchanged_offset = za->entry[i].orig->offset; last_index = i; } } } if (last_index != ZIP_UINT64_MAX) { if ((unchanged_offset = _zip_file_get_end(za, last_index, &za->error)) == 0) { free(filelist); return -1; } } } if (unchanged_offset > 0) { if (zip_source_begin_write_cloning(za->src, unchanged_offset) < 0) { /* cloning not supported, need to copy everything */ unchanged_offset = 0; } } } if (unchanged_offset == 0) { if (zip_source_begin_write(za->src) < 0) { _zip_error_set_from_source(&za->error, za->src); free(filelist); return -1; } } _zip_progress_start(za->progress); error = 0; for (j = 0; j < survivors; j++) { int new_data; zip_entry_t *entry; zip_dirent_t *de; _zip_progress_subrange(za->progress, (double)j / (double)survivors, (double)(j + 1) / (double)survivors); i = filelist[j].idx; entry = za->entry + i; if (entry->orig != NULL && entry->orig->offset < unchanged_offset) { /* already implicitly copied by cloning */ continue; } new_data = (ZIP_ENTRY_DATA_CHANGED(entry) || ZIP_ENTRY_CHANGED(entry, ZIP_DIRENT_COMP_METHOD) || ZIP_ENTRY_CHANGED(entry, ZIP_DIRENT_ENCRYPTION_METHOD)); /* create new local directory entry */ if (entry->changes == NULL) { if ((entry->changes = _zip_dirent_clone(entry->orig)) == NULL) { zip_error_set(&za->error, ZIP_ER_MEMORY, 0); error = 1; break; } } de = entry->changes; if (_zip_read_local_ef(za, i) < 0) { error = 1; break; } if ((off = zip_source_tell_write(za->src)) < 0) { error = 1; break; } de->offset = (zip_uint64_t)off; if (new_data) { zip_source_t *zs; zs = NULL; if (!ZIP_ENTRY_DATA_CHANGED(entry)) { if ((zs = _zip_source_zip_new(za, za, i, ZIP_FL_UNCHANGED, 0, 0, NULL)) == NULL) { error = 1; break; } } /* add_data writes dirent */ if (add_data(za, zs ? zs : entry->source, de) < 0) { error = 1; if (zs) zip_source_free(zs); break; } if (zs) zip_source_free(zs); } else { zip_uint64_t offset; /* when copying data, all sizes are known -> no data descriptor needed */ de->bitflags &= (zip_uint16_t)~ZIP_GPBF_DATA_DESCRIPTOR; if (_zip_dirent_write(za, de, ZIP_FL_LOCAL) < 0) { error = 1; break; } if ((offset = _zip_file_get_offset(za, i, &za->error)) == 0) { error = 1; break; } if (zip_source_seek(za->src, (zip_int64_t)offset, SEEK_SET) < 0) { _zip_error_set_from_source(&za->error, za->src); error = 1; break; } if (copy_data(za, de->comp_size) < 0) { error = 1; break; } } } if (!error) { if (write_cdir(za, filelist, survivors) < 0) error = 1; } free(filelist); if (!error) { if (zip_source_commit_write(za->src) != 0) { _zip_error_set_from_source(&za->error, za->src); error = 1; } } _zip_progress_end(za->progress); if (error) { zip_source_rollback_write(za->src); return -1; } zip_discard(za); return 0; } static int add_data(zip_t *za, zip_source_t *src, zip_dirent_t *de) { zip_int64_t offstart, offdata, offend, data_length; struct zip_stat st; zip_source_t *src_final, *src_tmp; int ret; int is_zip64; zip_flags_t flags; zip_int8_t compression_flags; bool needs_recompress, needs_decompress, needs_crc, needs_compress, needs_reencrypt, needs_decrypt, needs_encrypt; if (zip_source_stat(src, &st) < 0) { _zip_error_set_from_source(&za->error, src); return -1; } if ((st.valid & ZIP_STAT_COMP_METHOD) == 0) { st.valid |= ZIP_STAT_COMP_METHOD; st.comp_method = ZIP_CM_STORE; } if (ZIP_CM_IS_DEFAULT(de->comp_method) && st.comp_method != ZIP_CM_STORE) de->comp_method = st.comp_method; else if (de->comp_method == ZIP_CM_STORE && (st.valid & ZIP_STAT_SIZE)) { st.valid |= ZIP_STAT_COMP_SIZE; st.comp_size = st.size; } else { /* we'll recompress */ st.valid &= ~ZIP_STAT_COMP_SIZE; } if ((st.valid & ZIP_STAT_ENCRYPTION_METHOD) == 0) { st.valid |= ZIP_STAT_ENCRYPTION_METHOD; st.encryption_method = ZIP_EM_NONE; } flags = ZIP_EF_LOCAL; if ((st.valid & ZIP_STAT_SIZE) == 0) { flags |= ZIP_FL_FORCE_ZIP64; data_length = -1; } else { de->uncomp_size = st.size; /* this is technically incorrect (copy_source counts compressed data), but it's the best we have */ data_length = (zip_int64_t)st.size; if ((st.valid & ZIP_STAT_COMP_SIZE) == 0) { zip_uint64_t max_size; switch (ZIP_CM_ACTUAL(de->comp_method)) { case ZIP_CM_BZIP2: /* computed by looking at increase of 10 random files of size 1MB when * compressed with bzip2, rounded up: 1.006 */ max_size = 4269351188u; break; case ZIP_CM_DEFLATE: /* max deflate size increase: size + ceil(size/16k)*5+6 */ max_size = 4293656963u; break; case ZIP_CM_STORE: max_size = 0xffffffffu; break; default: max_size = 0; } if (st.size > max_size) { flags |= ZIP_FL_FORCE_ZIP64; } } else de->comp_size = st.comp_size; } if ((offstart = zip_source_tell_write(za->src)) < 0) { _zip_error_set_from_source(&za->error, za->src); return -1; } /* as long as we don't support non-seekable output, clear data descriptor bit */ de->bitflags &= (zip_uint16_t)~ZIP_GPBF_DATA_DESCRIPTOR; if ((is_zip64 = _zip_dirent_write(za, de, flags)) < 0) { return -1; } needs_recompress = st.comp_method != ZIP_CM_ACTUAL(de->comp_method); needs_decompress = needs_recompress && (st.comp_method != ZIP_CM_STORE); needs_crc = (st.comp_method == ZIP_CM_STORE) || needs_decompress; needs_compress = needs_recompress && (de->comp_method != ZIP_CM_STORE); needs_reencrypt = needs_recompress || (de->changed & ZIP_DIRENT_PASSWORD) || (de->encryption_method != st.encryption_method); needs_decrypt = needs_reencrypt && (st.encryption_method != ZIP_EM_NONE); needs_encrypt = needs_reencrypt && (de->encryption_method != ZIP_EM_NONE); src_final = src; zip_source_keep(src_final); if (needs_decrypt) { zip_encryption_implementation impl; if ((impl = _zip_get_encryption_implementation(st.encryption_method, ZIP_CODEC_DECODE)) == NULL) { zip_error_set(&za->error, ZIP_ER_ENCRNOTSUPP, 0); zip_source_free(src_final); return -1; } if ((src_tmp = impl(za, src_final, st.encryption_method, ZIP_CODEC_DECODE, za->default_password)) == NULL) { /* error set by impl */ zip_source_free(src_final); return -1; } zip_source_free(src_final); src_final = src_tmp; } if (needs_decompress) { if ((src_tmp = zip_source_decompress(za, src_final, st.comp_method)) == NULL) { zip_source_free(src_final); return -1; } zip_source_free(src_final); src_final = src_tmp; } if (needs_crc) { if ((src_tmp = zip_source_crc(za, src_final, 0)) == NULL) { zip_source_free(src_final); return -1; } zip_source_free(src_final); src_final = src_tmp; } if (needs_compress) { if ((src_tmp = zip_source_compress(za, src_final, de->comp_method, de->compression_level)) == NULL) { zip_source_free(src_final); return -1; } zip_source_free(src_final); src_final = src_tmp; } if (needs_encrypt) { zip_encryption_implementation impl; const char *password = NULL; if (de->password) { password = de->password; } else if (za->default_password) { password = za->default_password; } if ((impl = _zip_get_encryption_implementation(de->encryption_method, ZIP_CODEC_ENCODE)) == NULL) { zip_error_set(&za->error, ZIP_ER_ENCRNOTSUPP, 0); zip_source_free(src_final); return -1; } if ((src_tmp = impl(za, src_final, de->encryption_method, ZIP_CODEC_ENCODE, password)) == NULL) { /* error set by impl */ zip_source_free(src_final); return -1; } zip_source_free(src_final); src_final = src_tmp; } if ((offdata = zip_source_tell_write(za->src)) < 0) { _zip_error_set_from_source(&za->error, za->src); return -1; } ret = copy_source(za, src_final, data_length); if (zip_source_stat(src_final, &st) < 0) { _zip_error_set_from_source(&za->error, src_final); ret = -1; } if ((compression_flags = zip_source_get_compression_flags(src_final)) < 0) { _zip_error_set_from_source(&za->error, src_final); ret = -1; } zip_source_free(src_final); if (ret < 0) { return -1; } if ((offend = zip_source_tell_write(za->src)) < 0) { _zip_error_set_from_source(&za->error, za->src); return -1; } if (zip_source_seek_write(za->src, offstart, SEEK_SET) < 0) { _zip_error_set_from_source(&za->error, za->src); return -1; } if ((st.valid & (ZIP_STAT_COMP_METHOD | ZIP_STAT_CRC | ZIP_STAT_SIZE)) != (ZIP_STAT_COMP_METHOD | ZIP_STAT_CRC | ZIP_STAT_SIZE)) { zip_error_set(&za->error, ZIP_ER_INTERNAL, 0); return -1; } if ((de->changed & ZIP_DIRENT_LAST_MOD) == 0) { if (st.valid & ZIP_STAT_MTIME) de->last_mod = st.mtime; else time(&de->last_mod); } de->comp_method = st.comp_method; de->crc = st.crc; de->uncomp_size = st.size; de->comp_size = (zip_uint64_t)(offend - offdata); de->bitflags = (zip_uint16_t)((de->bitflags & (zip_uint16_t)~6) | ((zip_uint8_t)compression_flags << 1)); _zip_dirent_set_version_needed(de, (flags & ZIP_FL_FORCE_ZIP64) != 0); if ((ret = _zip_dirent_write(za, de, flags)) < 0) return -1; if (is_zip64 != ret) { /* Zip64 mismatch between preliminary file header written before data and final file header written afterwards */ zip_error_set(&za->error, ZIP_ER_INTERNAL, 0); return -1; } if (zip_source_seek_write(za->src, offend, SEEK_SET) < 0) { _zip_error_set_from_source(&za->error, za->src); return -1; } return 0; } static int copy_data(zip_t *za, zip_uint64_t len) { zip_uint8_t buf[BUFSIZE]; size_t n; double total = (double)len; while (len > 0) { n = len > sizeof(buf) ? sizeof(buf) : len; if (_zip_read(za->src, buf, n, &za->error) < 0) { return -1; } if (_zip_write(za, buf, n) < 0) { return -1; } len -= n; _zip_progress_update(za->progress, (total - (double)len) / total); } return 0; } static int copy_source(zip_t *za, zip_source_t *src, zip_int64_t data_length) { zip_uint8_t buf[BUFSIZE]; zip_int64_t n, current; int ret; if (zip_source_open(src) < 0) { _zip_error_set_from_source(&za->error, src); return -1; } ret = 0; current = 0; while ((n = zip_source_read(src, buf, sizeof(buf))) > 0) { if (_zip_write(za, buf, (zip_uint64_t)n) < 0) { ret = -1; break; } if (n == sizeof(buf) && za->progress && data_length > 0) { current += n; _zip_progress_update(za->progress, (double)current / (double)data_length); } } if (n < 0) { _zip_error_set_from_source(&za->error, src); ret = -1; } zip_source_close(src); return ret; } static int write_cdir(zip_t *za, const zip_filelist_t *filelist, zip_uint64_t survivors) { zip_int64_t cd_start, end, size; if ((cd_start = zip_source_tell_write(za->src)) < 0) { return -1; } if ((size = _zip_cdir_write(za, filelist, survivors)) < 0) { return -1; } if ((end = zip_source_tell_write(za->src)) < 0) { return -1; } return 0; } int _zip_changed(const zip_t *za, zip_uint64_t *survivorsp) { int changed; zip_uint64_t i, survivors; changed = 0; survivors = 0; if (za->comment_changed || za->ch_flags != za->flags) { changed = 1; } for (i = 0; i < za->nentry; i++) { if (ZIP_ENTRY_HAS_CHANGES(&za->entry[i])) { changed = 1; } if (!za->entry[i].deleted) { survivors++; } } if (survivorsp) { *survivorsp = survivors; } return changed; }