diff options
Diffstat (limited to 'src/Common/libzip/zip_utf-8.c')
-rw-r--r-- | src/Common/libzip/zip_utf-8.c | 116 |
1 files changed, 58 insertions, 58 deletions
diff --git a/src/Common/libzip/zip_utf-8.c b/src/Common/libzip/zip_utf-8.c index 099db9bf..678912f6 100644 --- a/src/Common/libzip/zip_utf-8.c +++ b/src/Common/libzip/zip_utf-8.c @@ -1,9 +1,9 @@ /* zip_utf-8.c -- UTF-8 support functions for libzip - Copyright (C) 2011-2019 Dieter Baron and Thomas Klausner + Copyright (C) 2011-2021 Dieter Baron and Thomas Klausner This file is part of libzip, a library to manipulate ZIP archives. - The authors can be contacted at <libzip@nih.at> + The authors can be contacted at <info@libzip.org> Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -103,54 +103,54 @@ _zip_guess_encoding(zip_string_t *str, zip_encoding_type_t expected_encoding) { zip_uint32_t i, j, ulen; if (str == NULL) - return ZIP_ENCODING_ASCII; + return ZIP_ENCODING_ASCII; name = str->raw; if (str->encoding != ZIP_ENCODING_UNKNOWN) - enc = str->encoding; + enc = str->encoding; else { - enc = ZIP_ENCODING_ASCII; - for (i = 0; i < str->length; i++) { - if ((name[i] > 31 && name[i] < 128) || name[i] == '\r' || name[i] == '\n' || name[i] == '\t') - continue; - - enc = ZIP_ENCODING_UTF8_GUESSED; - if ((name[i] & UTF_8_LEN_2_MASK) == UTF_8_LEN_2_MATCH) - ulen = 1; - else if ((name[i] & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_MATCH) - ulen = 2; - else if ((name[i] & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_MATCH) - ulen = 3; - else { - enc = ZIP_ENCODING_CP437; - break; - } - - if (i + ulen >= str->length) { - enc = ZIP_ENCODING_CP437; - break; - } - - for (j = 1; j <= ulen; j++) { - if ((name[i + j] & UTF_8_CONTINUE_MASK) != UTF_8_CONTINUE_MATCH) { - enc = ZIP_ENCODING_CP437; - goto done; - } - } - i += ulen; - } + enc = ZIP_ENCODING_ASCII; + for (i = 0; i < str->length; i++) { + if ((name[i] > 31 && name[i] < 128) || name[i] == '\r' || name[i] == '\n' || name[i] == '\t') + continue; + + enc = ZIP_ENCODING_UTF8_GUESSED; + if ((name[i] & UTF_8_LEN_2_MASK) == UTF_8_LEN_2_MATCH) + ulen = 1; + else if ((name[i] & UTF_8_LEN_3_MASK) == UTF_8_LEN_3_MATCH) + ulen = 2; + else if ((name[i] & UTF_8_LEN_4_MASK) == UTF_8_LEN_4_MATCH) + ulen = 3; + else { + enc = ZIP_ENCODING_CP437; + break; + } + + if (i + ulen >= str->length) { + enc = ZIP_ENCODING_CP437; + break; + } + + for (j = 1; j <= ulen; j++) { + if ((name[i + j] & UTF_8_CONTINUE_MASK) != UTF_8_CONTINUE_MATCH) { + enc = ZIP_ENCODING_CP437; + goto done; + } + } + i += ulen; + } } done: str->encoding = enc; if (expected_encoding != ZIP_ENCODING_UNKNOWN) { - if (expected_encoding == ZIP_ENCODING_UTF8_KNOWN && enc == ZIP_ENCODING_UTF8_GUESSED) - str->encoding = enc = ZIP_ENCODING_UTF8_KNOWN; + if (expected_encoding == ZIP_ENCODING_UTF8_KNOWN && enc == ZIP_ENCODING_UTF8_GUESSED) + str->encoding = enc = ZIP_ENCODING_UTF8_KNOWN; - if (expected_encoding != enc && enc != ZIP_ENCODING_ASCII) - return ZIP_ENCODING_ERROR; + if (expected_encoding != enc && enc != ZIP_ENCODING_ASCII) + return ZIP_ENCODING_ERROR; } return enc; @@ -160,11 +160,11 @@ done: static zip_uint32_t _zip_unicode_to_utf8_len(zip_uint32_t codepoint) { if (codepoint < 0x0080) - return 1; + return 1; if (codepoint < 0x0800) - return 2; + return 2; if (codepoint < 0x10000) - return 3; + return 3; return 4; } @@ -172,19 +172,19 @@ _zip_unicode_to_utf8_len(zip_uint32_t codepoint) { static zip_uint32_t _zip_unicode_to_utf8(zip_uint32_t codepoint, zip_uint8_t *buf) { if (codepoint < 0x0080) { - buf[0] = codepoint & 0xff; - return 1; + buf[0] = codepoint & 0xff; + return 1; } if (codepoint < 0x0800) { - buf[0] = (zip_uint8_t)(UTF_8_LEN_2_MATCH | ((codepoint >> 6) & 0x1f)); - buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | (codepoint & 0x3f)); - return 2; + buf[0] = (zip_uint8_t)(UTF_8_LEN_2_MATCH | ((codepoint >> 6) & 0x1f)); + buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | (codepoint & 0x3f)); + return 2; } if (codepoint < 0x10000) { - buf[0] = (zip_uint8_t)(UTF_8_LEN_3_MATCH | ((codepoint >> 12) & 0x0f)); - buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | ((codepoint >> 6) & 0x3f)); - buf[2] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | (codepoint & 0x3f)); - return 3; + buf[0] = (zip_uint8_t)(UTF_8_LEN_3_MATCH | ((codepoint >> 12) & 0x0f)); + buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | ((codepoint >> 6) & 0x3f)); + buf[2] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | (codepoint & 0x3f)); + return 3; } buf[0] = (zip_uint8_t)(UTF_8_LEN_4_MATCH | ((codepoint >> 18) & 0x07)); buf[1] = (zip_uint8_t)(UTF_8_CONTINUE_MATCH | ((codepoint >> 12) & 0x3f)); @@ -201,26 +201,26 @@ _zip_cp437_to_utf8(const zip_uint8_t *const _cp437buf, zip_uint32_t len, zip_uin zip_uint32_t buflen, i, offset; if (len == 0) { - if (utf8_lenp) - *utf8_lenp = 0; - return NULL; + if (utf8_lenp) + *utf8_lenp = 0; + return NULL; } buflen = 1; for (i = 0; i < len; i++) - buflen += _zip_unicode_to_utf8_len(_cp437_to_unicode[cp437buf[i]]); + buflen += _zip_unicode_to_utf8_len(_cp437_to_unicode[cp437buf[i]]); if ((utf8buf = (zip_uint8_t *)malloc(buflen)) == NULL) { - zip_error_set(error, ZIP_ER_MEMORY, 0); - return NULL; + zip_error_set(error, ZIP_ER_MEMORY, 0); + return NULL; } offset = 0; for (i = 0; i < len; i++) - offset += _zip_unicode_to_utf8(_cp437_to_unicode[cp437buf[i]], utf8buf + offset); + offset += _zip_unicode_to_utf8(_cp437_to_unicode[cp437buf[i]], utf8buf + offset); utf8buf[buflen - 1] = 0; if (utf8_lenp) - *utf8_lenp = buflen - 1; + *utf8_lenp = buflen - 1; return utf8buf; } |