@@ -137,10 +137,42 @@ return 4; } return 0; } + +of_unichar_t +of_string_utf8_to_unicode(const char *buf_, size_t len) +{ + const uint8_t *buf = (const uint8_t*)buf_; + + if (*buf < 0x80) + return buf[0]; + + switch (*buf & 0xF0) { + case 0xC0: + case 0xD0: + if (OF_UNLIKELY(len < 2)) + return OF_INVALID_UNICHAR; + + return ((buf[0] & 0x1F) << 6) | (buf[1] & 0x3F); + case 0xE0: + if (OF_UNLIKELY(len < 3)) + return OF_INVALID_UNICHAR; + + return ((buf[0] & 0x0F) << 12) | ((buf[1] & 0x3F) << 6) | + (buf[2] & 0x3F); + case 0xF0: + if (OF_UNLIKELY(len < 4)) + return OF_INVALID_UNICHAR; + + return ((buf[0] & 0x07) << 18) | ((buf[1] & 0x3F) << 12) | + ((buf[2] & 0x3F) << 6) | (buf[3] & 0x3F); + } + + return OF_INVALID_UNICHAR; +} size_t of_string_position_to_index(const char *str, size_t pos) { size_t i, idx = pos; @@ -555,10 +587,26 @@ OF_HASH_ADD(hash, string[i]); OF_HASH_FINALIZE(hash); return hash; } + +- (of_unichar_t)characterAtIndex: (size_t)index +{ + of_unichar_t c; + + index = of_string_index_to_position(string, index, length); + + if (index >= length) + @throw [OFOutOfRangeException newWithClass: isa]; + + if ((c = of_string_utf8_to_unicode(string + index, length - index)) == + OF_INVALID_UNICHAR) + @throw [OFInvalidEncodingException newWithClass: isa]; + + return c; +} - (size_t)indexOfFirstOccurrenceOfString: (OFString*)str { const char *str_c = [str cString]; size_t str_len = [str cStringLength];