Index: src/OFMutableString.h ================================================================== --- src/OFMutableString.h +++ src/OFMutableString.h @@ -102,24 +102,33 @@ * \param string An OFString to prepend */ - (void)prependString: (OFString*)string; /** - * \brief Reverses the OFMutableString. + * \brief Reverses the string. */ - (void)reverse; /** - * \brief Converts the OFMutableString to uppercase. + * \brief Converts the string to uppercase. */ - (void)uppercase; /** - * \brief Converts the OFMutableString to lowercase. + * \brief Converts the string to lowercase. */ - (void)lowercase; +/** + * \brief Capitalizes the string. + * + * \note This only considers spaces, tabs and newlines to be word delimiters! + * Also note that this might change in the future to all word delimiters + * specified by Unicode! + */ +- (void)capitalize; + /** * \brief Inserts a string at the specified index. * * \param string The string to insert * \param index The index Index: src/OFMutableString.m ================================================================== --- src/OFMutableString.m +++ src/OFMutableString.m @@ -252,23 +252,48 @@ return (id)&placeholder; return [super alloc]; } -- (void)_applyTable: (const of_unichar_t* const[])table - withSize: (size_t)tableSize +- (void)_convertWithWordStartTable: (const of_unichar_t *const[])startTable + wordMiddleTable: (const of_unichar_t *const[])middleTable + wordStartTableSize: (size_t)startTableSize + wordMiddleTableSize: (size_t)middleTableSize { OFAutoreleasePool *pool = [[OFAutoreleasePool alloc] init]; const of_unichar_t *string = [self unicodeString]; size_t i, length = [self length]; + BOOL isStart = YES; for (i = 0; i < length; i++) { + const of_unichar_t *const *table; + size_t tableSize; of_unichar_t c = string[i]; + + if (isStart) { + table = startTable; + tableSize = middleTableSize; + } else { + table = middleTable; + tableSize = middleTableSize; + } if (c >> 8 < tableSize && table[c >> 8][c & 0xFF]) [self setCharacter: table[c >> 8][c & 0xFF] atIndex: i]; + + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': + isStart = YES; + break; + default: + isStart = NO; + break; + } } [pool release]; } @@ -380,18 +405,30 @@ } } - (void)uppercase { - [self _applyTable: of_unicode_uppercase_table - withSize: OF_UNICODE_UPPERCASE_TABLE_SIZE]; + [self _convertWithWordStartTable: of_unicode_uppercase_table + wordMiddleTable: of_unicode_uppercase_table + wordStartTableSize: OF_UNICODE_UPPERCASE_TABLE_SIZE + wordMiddleTableSize: OF_UNICODE_UPPERCASE_TABLE_SIZE]; } - (void)lowercase { - [self _applyTable: of_unicode_lowercase_table - withSize: OF_UNICODE_LOWERCASE_TABLE_SIZE]; + [self _convertWithWordStartTable: of_unicode_lowercase_table + wordMiddleTable: of_unicode_lowercase_table + wordStartTableSize: OF_UNICODE_LOWERCASE_TABLE_SIZE + wordMiddleTableSize: OF_UNICODE_LOWERCASE_TABLE_SIZE]; +} + +- (void)capitalize +{ + [self _convertWithWordStartTable: of_unicode_titlecase_table + wordMiddleTable: of_unicode_lowercase_table + wordStartTableSize: OF_UNICODE_TITLECASE_TABLE_SIZE + wordMiddleTableSize: OF_UNICODE_LOWERCASE_TABLE_SIZE]; } - (void)insertString: (OFString*)string atIndex: (size_t)index { Index: src/OFMutableString_UTF8.m ================================================================== --- src/OFMutableString_UTF8.m +++ src/OFMutableString_UTF8.m @@ -42,30 +42,54 @@ { if (self == [OFMutableString_UTF8 class]) [self inheritMethodsFromClass: [OFString_UTF8 class]]; } -- (void)_applyTable: (const of_unichar_t* const[])table - withSize: (size_t)tableSize +- (void)_convertWithWordStartTable: (const of_unichar_t *const[])startTable + wordMiddleTable: (const of_unichar_t *const[])middleTable + wordStartTableSize: (size_t)startTableSize + wordMiddleTableSize: (size_t)middleTableSize { - of_unichar_t c; of_unichar_t *unicodeString; - size_t unicodeLen, newCStringLength, cLen; - size_t i, j, d; + size_t unicodeLen, newCStringLength; + size_t i, j; char *newCString; + BOOL isStart = YES; if (!s->UTF8) { - assert(tableSize >= 1); - - uint8_t *p = (uint8_t*)s->cString + s->cStringLength; uint8_t t; + const of_unichar_t *const *table; + size_t tableSize; + + assert(startTableSize >= 1 && middleTableSize >= 1); s->hashed = NO; - while (--p >= (uint8_t*)s->cString) - if ((t = table[0][*p]) != 0) - *p = t; + for (i = 0; i < s->cStringLength; i++) { + if (isStart) { + table = startTable; + tableSize = middleTableSize; + } else { + table = middleTable; + tableSize = middleTableSize; + } + + switch (s->cString[i]) { + case ' ': + case '\t': + case '\n': + case '\r': + isStart = YES; + break; + default: + isStart = NO; + break; + } + + if ((t = table[0][(uint8_t)s->cString[i]]) != 0) + s->cString[i] = t; + } return; } unicodeLen = [self length]; @@ -74,18 +98,43 @@ i = j = 0; newCStringLength = 0; while (i < s->cStringLength) { + const of_unichar_t *const *table; + size_t tableSize; + of_unichar_t c; + size_t cLen; + + if (isStart) { + table = startTable; + tableSize = middleTableSize; + } else { + table = middleTable; + tableSize = middleTableSize; + } + cLen = of_string_utf8_to_unicode(s->cString + i, s->cStringLength - i, &c); if (cLen == 0 || c > 0x10FFFF) { [self freeMemory: unicodeString]; @throw [OFInvalidEncodingException exceptionWithClass: isa]; } + + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': + isStart = YES; + break; + default: + isStart = NO; + break; + } if (c >> 8 < tableSize) { of_unichar_t tc = table[c >> 8][c & 0xFF]; if (tc) @@ -118,10 +167,12 @@ } j = 0; for (i = 0; i < unicodeLen; i++) { + size_t d; + if ((d = of_string_unicode_to_utf8(unicodeString[i], newCString + j)) == 0) { [self freeMemory: unicodeString]; [self freeMemory: newCString]; @throw [OFInvalidEncodingException Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -690,10 +690,21 @@ * * \return The string in lowercase */ - (OFString*)lowercaseString; +/** + * \brief Returns the string capitalized. + * + * \note This only considers spaces, tab and newlines to be word delimiters! + * Also note that this might change in the future to all word delimiters + * specified by Unicode! + * + * \return The capitalized string + */ +- (OFString*)capitalizedString; + /** * \brief Creates a new string by deleting leading whitespaces. * * \return A new autoreleased OFString with leading whitespaces deleted */ Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -1476,10 +1476,21 @@ - (OFString*)lowercaseString { OFMutableString *new = [[self mutableCopy] autorelease]; [new lowercase]; + + [new makeImmutable]; + + return new; +} + +- (OFString*)capitalizedString +{ + OFMutableString *new = [[self mutableCopy] autorelease]; + + [new capitalize]; [new makeImmutable]; return new; }