/* LibTomCrypt, modular cryptographic library -- Tom St Denis */
/* SPDX-License-Identifier: Unlicense */
#include "tomcrypt_private.h"
/**
@file der_decode_utf8_string.c
ASN.1 DER, encode a UTF8 STRING, Tom St Denis
*/
#ifdef LTC_DER
/**
Decode a UTF8 STRING and recover an array of unicode characters.
@param in The DER encoded UTF8 STRING
@param inlen The size of the DER UTF8 STRING
@param out [out] The array of unicode characters (wchar_t*)
@param outlen [in/out] The number of unicode characters in the array
@return CRYPT_OK if successful
*/
int der_decode_utf8_string(const unsigned char *in, unsigned long inlen,
wchar_t *out, unsigned long *outlen)
{
wchar_t tmp;
unsigned long x, y, z, len;
int err;
LTC_ARGCHK(in != NULL);
LTC_ARGCHK(out != NULL);
LTC_ARGCHK(outlen != NULL);
/* must have header at least */
if (inlen < 2) {
return CRYPT_INVALID_PACKET;
}
/* check for 0x0C */
if ((in[0] & 0x1F) != 0x0C) {
return CRYPT_INVALID_PACKET;
}
x = 1;
/* get the length of the data */
y = inlen - x;
if ((err = der_decode_asn1_length(in + x, &y, &len)) != CRYPT_OK) {
return err;
}
x += y;
if (len > (inlen - x)) {
return CRYPT_INVALID_PACKET;
}
/* proceed to recover unicode characters from utf8 data.
for reference see Section 3 of RFC 3629:
https://tools.ietf.org/html/rfc3629#section-3
*/
len += x;
for (y = 0; x < len; ) {
/* read first byte */
tmp = in[x++];
/* a unicode character is recovered from a sequence of 1 to 4 utf8 bytes.
the form of those bytes must match a row in the following table:
0xxxxxxx
110xxxxx 10xxxxxx
1110xxxx 10xxxxxx 10xxxxxx
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
the number of leading ones in the first byte (0,2,3,4) determines the
number of remaining bytes to read (0,1,2,3)
*/
/* determine z, the number of leading ones.
this is done by left-shifting tmp, which clears the ms-bits */
for (z = 0; (tmp & 0x80) && (z <= 4); z++, tmp = (tmp << 1) & 0xFF);
/* z should be in {0,2,3,4} */
if (z == 1 || z > 4) {
return CRYPT_INVALID_PACKET;
}
/* right-shift tmp to restore least-sig bits */
tmp >>= z;
/* now update z so it equals the number of additional bytes to read */
if (z > 0) { --z; }
if (x + z > len) {
return CRYPT_INVALID_PACKET;
}
/* read remaining bytes */
while (z-- != 0) {
if ((in[x] & 0xC0) != 0x80) {
return CRYPT_INVALID_PACKET;
}
tmp = (tmp << 6) | ((wchar_t)in[x++] & 0x3F);
}
if (y < *outlen) {
out[y] = tmp;
}
y++;
}
if (y > *outlen) {
err = CRYPT_BUFFER_OVERFLOW;
} else {
err = CRYPT_OK;
}
*outlen = y;
return err;
}
#endif