Index: src/OFHTTPResponse.m ================================================================== --- src/OFHTTPResponse.m +++ src/OFHTTPResponse.m @@ -21,14 +21,119 @@ #import "OFDictionary.h" #import "OFArray.h" #import "OFDataArray.h" #import "OFHTTPCookie.h" +#import "OFInvalidEncodingException.h" #import "OFInvalidFormatException.h" #import "OFOutOfRangeException.h" #import "OFTruncatedDataException.h" #import "OFUnsupportedVersionException.h" + +static of_string_encoding_t +encodingForContentType(OFString *contentType) +{ + const char *UTF8String = [contentType UTF8String]; + size_t last, length = [contentType UTF8StringLength]; + enum { + STATE_TYPE, + STATE_BEFORE_PARAM_NAME, + STATE_PARAM_NAME, + STATE_PARAM_VALUE_OR_QUOTE, + STATE_PARAM_VALUE, + STATE_PARAM_QUOTED_VALUE, + STATE_AFTER_PARAM_VALUE + } state = STATE_TYPE; + OFString *name, *value, *charset = nil; + + last = 0; + for (size_t i = 0; i < length; i++) { + switch (state) { + case STATE_TYPE: + if (UTF8String[i] == ';') { + state = STATE_BEFORE_PARAM_NAME; + last = i + 1; + } + break; + case STATE_BEFORE_PARAM_NAME: + if (UTF8String[i] == ' ') + last = i + 1; + else { + state = STATE_PARAM_NAME; + i--; + } + break; + case STATE_PARAM_NAME: + if (UTF8String[i] == '=') { + name = [OFString + stringWithUTF8String: UTF8String + last + length: i - last]; + + state = STATE_PARAM_VALUE_OR_QUOTE; + last = i + 1; + } + break; + case STATE_PARAM_VALUE_OR_QUOTE: + if (UTF8String[i] == '"') { + state = STATE_PARAM_QUOTED_VALUE; + last = i + 1; + } else { + state = STATE_PARAM_VALUE; + i--; + } + break; + case STATE_PARAM_VALUE: + if (UTF8String[i] == ';') { + value = [OFString + stringWithUTF8String: UTF8String + last + length: i - last]; + value = [value + stringByDeletingTrailingWhitespaces]; + + if ([name isEqual: @"charset"]) + charset = value; + + state = STATE_BEFORE_PARAM_NAME; + last = i + 1; + } + break; + case STATE_PARAM_QUOTED_VALUE: + if (UTF8String[i] == '"') { + value = [OFString + stringWithUTF8String: UTF8String + last + length: i - last]; + + if ([name isEqual: @"charset"]) + charset = value; + + state = STATE_AFTER_PARAM_VALUE; + } + break; + case STATE_AFTER_PARAM_VALUE: + if (UTF8String[i] == ';') { + state = STATE_BEFORE_PARAM_NAME; + last = i + 1; + } else if (UTF8String[i] != ' ') + return OF_STRING_ENCODING_AUTODETECT; + break; + } + } + if (state == STATE_PARAM_VALUE) { + value = [OFString stringWithUTF8String: UTF8String + last + length: length - last]; + value = [value stringByDeletingTrailingWhitespaces]; + + if ([name isEqual: @"charset"]) + charset = value; + } + + @try { + return of_string_parse_encoding(charset); + } @catch (OFInvalidEncodingException *e) { + return OF_STRING_ENCODING_AUTODETECT; + } +} @implementation OFHTTPResponse @synthesize statusCode = _statusCode, headers = _headers, cookies = _cookies; - init @@ -106,44 +211,12 @@ void *pool = objc_autoreleasePoolPush(); OFString *contentType, *contentLength, *ret; OFDataArray *data; if (encoding == OF_STRING_ENCODING_AUTODETECT && - (contentType = [_headers objectForKey: @"Content-Type"]) != nil) { - contentType = [contentType lowercaseString]; - - if ([contentType hasSuffix: @"charset=utf-8"]) - encoding = OF_STRING_ENCODING_UTF_8; - else if ([contentType hasSuffix: @"charset=iso-8859-1"] || - [contentType hasSuffix: @"charset=iso_8859-1"]) - encoding = OF_STRING_ENCODING_ISO_8859_1; - else if ([contentType hasSuffix: @"charset=iso-8859-2"] || - [contentType hasSuffix: @"charset=iso_8859-2"]) - encoding = OF_STRING_ENCODING_ISO_8859_2; - else if ([contentType hasSuffix: @"charset=iso-8859-15"] || - [contentType hasSuffix: @"charset=iso_8859-15"]) - encoding = OF_STRING_ENCODING_ISO_8859_15; - else if ([contentType hasSuffix: @"charset=windows-1251"] || - [contentType hasSuffix: @"charset=cp1251"] || - [contentType hasSuffix: @"charset=cp-1251"]) - encoding = OF_STRING_ENCODING_WINDOWS_1251; - else if ([contentType hasSuffix: @"charset=windows-1252"] || - [contentType hasSuffix: @"charset=cp1252"] || - [contentType hasSuffix: @"charset=cp-1252"]) - encoding = OF_STRING_ENCODING_WINDOWS_1252; - else if ([contentType hasSuffix: @"charset=cp437"] || - [contentType hasSuffix: @"charset=cp-437"]) - encoding = OF_STRING_ENCODING_CODEPAGE_437; - else if ([contentType hasSuffix: @"charset=cp850"] || - [contentType hasSuffix: @"charset=cp-850"]) - encoding = OF_STRING_ENCODING_CODEPAGE_850; - else if ([contentType hasSuffix: @"charset=cp858"] || - [contentType hasSuffix: @"charset=cp-858"]) - encoding = OF_STRING_ENCODING_CODEPAGE_858; - else if ([contentType hasSuffix: @"charset=macintosh"]) - encoding = OF_STRING_ENCODING_MAC_ROMAN; - } + (contentType = [_headers objectForKey: @"Content-Type"]) != nil) + encoding = encodingForContentType(contentType); if (encoding == OF_STRING_ENCODING_AUTODETECT) encoding = OF_STRING_ENCODING_UTF_8; data = [self readDataArrayTillEndOfStream]; Index: src/OFLocalization.m ================================================================== --- src/OFLocalization.m +++ src/OFLocalization.m @@ -22,10 +22,11 @@ #import "OFString.h" #import "OFArray.h" #import "OFDictionary.h" #import "OFInvalidArgumentException.h" +#import "OFInvalidEncodingException.h" static OFLocalization *sharedLocalization = nil; @implementation OFLocalization @synthesize language = _language, territory = _territory, encoding = _encoding; @@ -90,43 +91,19 @@ /* Encoding */ if ((tmp = strrchr(locale, '.')) != NULL) { *tmp++ = '\0'; - tmpLen = strlen(tmp); - for (size_t i = 0; i < tmpLen; i++) - tmp[i] = of_ascii_tolower(tmp[i]); - - if (strcmp(tmp, "utf8") == 0 || - strcmp(tmp, "utf-8") == 0) - _encoding = OF_STRING_ENCODING_UTF_8; - else if (strcmp(tmp, "ascii") == 0 || - strcmp(tmp, "us-ascii") == 0) - _encoding = OF_STRING_ENCODING_ASCII; - else if (strcmp(tmp, "iso8859-1") == 0 || - strcmp(tmp, "iso-8859-1") == 0 || - strcmp(tmp, "iso_8859-1") == 0) - _encoding = OF_STRING_ENCODING_ISO_8859_1; - else if (strcmp(tmp, "iso8859-2") == 0 || - strcmp(tmp, "iso-8859-2") == 0 || - strcmp(tmp, "iso_8859-2") == 0) - _encoding = OF_STRING_ENCODING_ISO_8859_2; - else if (strcmp(tmp, "iso8859-15") == 0 || - strcmp(tmp, "iso-8859-15") == 0 || - strcmp(tmp, "iso_8859-15") == 0) - _encoding = OF_STRING_ENCODING_ISO_8859_15; - /* Windows and DJGPP use a codepage */ - else if (strcmp(tmp, "1251") == 0) - _encoding = OF_STRING_ENCODING_WINDOWS_1251; - else if (strcmp(tmp, "1252") == 0) - _encoding = OF_STRING_ENCODING_WINDOWS_1252; - else if (strcmp(tmp, "437") == 0) - _encoding = OF_STRING_ENCODING_CODEPAGE_437; - else if (strcmp(tmp, "850") == 0) - _encoding = OF_STRING_ENCODING_CODEPAGE_850; - else if (strcmp(tmp, "858") == 0) - _encoding = OF_STRING_ENCODING_CODEPAGE_858; + @try { + const of_string_encoding_t ascii = + OF_STRING_ENCODING_ASCII; + + _encoding = of_string_parse_encoding([OFString + stringWithCString: tmp + encoding: ascii]); + } @catch (OFInvalidEncodingException *e) { + } } /* Territory */ if ((tmp = strrchr(locale, '_')) != NULL) { *tmp++ = '\0'; Index: src/OFString.h ================================================================== --- src/OFString.h +++ src/OFString.h @@ -1126,10 +1126,11 @@ @end #ifdef __cplusplus extern "C" { #endif +extern of_string_encoding_t of_string_parse_encoding(OFString*); extern size_t of_string_utf8_encode(of_unichar_t, char*); extern ssize_t of_string_utf8_decode(const char*, size_t, of_unichar_t*); extern size_t of_string_utf16_length(const of_char16_t*); extern size_t of_string_utf32_length(const of_char32_t*); #ifdef __cplusplus Index: src/OFString.m ================================================================== --- src/OFString.m +++ src/OFString.m @@ -121,10 +121,58 @@ void _reference_to_OFConstantString(void) { [OFConstantString class]; } + +of_string_encoding_t +of_string_parse_encoding(OFString *string) +{ + void *pool = objc_autoreleasePoolPush(); + of_string_encoding_t encoding; + + string = [string lowercaseString]; + + if ([string isEqual: @"utf8"] || [string isEqual: @"utf-8"]) + encoding = OF_STRING_ENCODING_UTF_8; + else if ([string isEqual: @"ascii"] || [string isEqual: @"us-ascii"]) + encoding = OF_STRING_ENCODING_ASCII; + else if ([string isEqual: @"iso-8859-1"] || + [string isEqual: @"iso_8859-1"]) + encoding = OF_STRING_ENCODING_ISO_8859_1; + else if ([string isEqual: @"iso-8859-2"] || + [string isEqual: @"iso_8859-2"]) + encoding = OF_STRING_ENCODING_ISO_8859_2; + else if ([string isEqual: @"iso-8859-15"] || + [string isEqual: @"iso_8859-15"]) + encoding = OF_STRING_ENCODING_ISO_8859_15; + else if ([string isEqual: @"windows-1251"] || + [string isEqual: @"cp1251"] || [string isEqual: @"cp-1251"] || + [string isEqual: @"1251"]) + encoding = OF_STRING_ENCODING_WINDOWS_1251; + else if ([string isEqual: @"windows-1252"] || + [string isEqual: @"cp1252"] || [string isEqual: @"cp-1252"] || + [string isEqual: @"1252"]) + encoding = OF_STRING_ENCODING_WINDOWS_1252; + else if ([string isEqual: @"cp437"] || [string isEqual: @"cp-437"] || + [string isEqual: @"ibm437"] || [string isEqual: @"437"]) + encoding = OF_STRING_ENCODING_CODEPAGE_437; + else if ([string isEqual: @"cp850"] || [string isEqual: @"cp-850"] || + [string isEqual: @"ibm850"] || [string isEqual: @"850"]) + encoding = OF_STRING_ENCODING_CODEPAGE_850; + else if ([string isEqual: @"cp858"] || [string isEqual: @"cp-858"] || + [string isEqual: @"ibm858"] || [string isEqual: @"858"]) + encoding = OF_STRING_ENCODING_CODEPAGE_858; + else if ([string isEqual: @"macintosh"] || [string isEqual: @"mac"]) + encoding = OF_STRING_ENCODING_MAC_ROMAN; + else + @throw [OFInvalidEncodingException exception]; + + objc_autoreleasePoolPop(pool); + + return encoding; +} size_t of_string_utf8_encode(of_unichar_t character, char *buffer) { size_t i = 0; Index: src/OFXMLParser.m ================================================================== --- src/OFXMLParser.m +++ src/OFXMLParser.m @@ -459,55 +459,12 @@ return false; hasVersion = true; } - if ([attribute isEqual: @"encoding"]) { - [value lowercase]; - - if ([value isEqual: @"utf-8"]) - _encoding = OF_STRING_ENCODING_UTF_8; - else if ([value isEqual: @"iso-8859-1"] || - [value isEqual: @"iso_8859-1"]) - _encoding = - OF_STRING_ENCODING_ISO_8859_1; - else if ([value isEqual: @"iso-8859-2"] || - [value isEqual: @"iso_8859-2"]) - _encoding = - OF_STRING_ENCODING_ISO_8859_2; - else if ([value isEqual: @"iso-8859-15"] || - [value isEqual: @"iso_8859-15"]) - _encoding = - OF_STRING_ENCODING_ISO_8859_15; - else if ([value isEqual: @"windows-1251"] || - [value isEqual: @"cp1251"] || - [value isEqual: @"cp-1251"]) - _encoding = - OF_STRING_ENCODING_WINDOWS_1251; - else if ([value isEqual: @"windows-1252"] || - [value isEqual: @"cp1252"] || - [value isEqual: @"cp-1252"]) - _encoding = - OF_STRING_ENCODING_WINDOWS_1252; - else if ([value isEqual: @"cp437"] || - [value isEqual: @"cp-437"]) - _encoding = - OF_STRING_ENCODING_CODEPAGE_437; - else if ([value isEqual: @"cp850"] || - [value isEqual: @"cp-850"]) - _encoding = - OF_STRING_ENCODING_CODEPAGE_850; - else if ([value isEqual: @"cp858"] || - [value isEqual: @"cp-858"]) - _encoding = - OF_STRING_ENCODING_CODEPAGE_858; - else if ([value isEqual: @"macintosh"]) - _encoding = - OF_STRING_ENCODING_MAC_ROMAN; - else - return false; - } + if ([attribute isEqual: @"encoding"]) + _encoding = of_string_parse_encoding(value); last = i + 1; PIState = 0; break; Index: tests/OFXMLParserTests.m ================================================================== --- tests/OFXMLParserTests.m +++ tests/OFXMLParserTests.m @@ -22,10 +22,11 @@ #import "OFXMLParser.h" #import "OFString.h" #import "OFArray.h" #import "OFAutoreleasePool.h" +#import "OFInvalidEncodingException.h" #import "OFMalformedXMLException.h" #import "TestsAppDelegate.h" static OFString *module = @"OFXMLParser"; @@ -381,11 +382,11 @@ OFMalformedXMLException, [parser parseString: @""]) parser = [OFXMLParser parser]; EXPECT_EXCEPTION(@"Detection of invalid XML processing instructions #2", - OFMalformedXMLException, + OFInvalidEncodingException, [parser parseString: @""]) parser = [OFXMLParser parser]; EXPECT_EXCEPTION(@"Detection of invalid XML processing instructions #3", OFMalformedXMLException,