/* * Copyright (c) 2008-2024 Jonathan Schleifer <js@nil.im> * * All rights reserved. * * This program is free software: you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License version 3.0 only, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * version 3.0 for more details. * * You should have received a copy of the GNU Lesser General Public License * version 3.0 along with this program. If not, see * <https://www.gnu.org/licenses/>. */ #include "config.h" #include <stdlib.h> #include <string.h> #include <math.h> #import "OFString+JSONParsing.h" #import "OFString+Private.h" #import "OFArray.h" #import "OFDictionary.h" #import "OFNumber.h" #import "OFNull.h" #import "OFInvalidJSONException.h" #ifndef INFINITY # define INFINITY __builtin_inf() #endif int _OFString_JSONParsing_reference; static id nextObject(const char **pointer, const char *stop, size_t *line, size_t depthLimit); static void skipWhitespaces(const char **pointer, const char *stop, size_t *line) { while (*pointer < stop && (**pointer == ' ' || **pointer == '\t' || **pointer == '\r' || **pointer == '\n')) { if (**pointer == '\n') (*line)++; (*pointer)++; } } static void skipComment(const char **pointer, const char *stop, size_t *line) { if (**pointer != '/') return; if (*pointer + 1 >= stop) return; (*pointer)++; if (**pointer == '*') { bool lastIsAsterisk = false; (*pointer)++; while (*pointer < stop) { if (lastIsAsterisk && **pointer == '/') { (*pointer)++; return; } lastIsAsterisk = (**pointer == '*'); if (**pointer == '\n') (*line)++; (*pointer)++; } } else if (**pointer == '/') { (*pointer)++; while (*pointer < stop) { if (**pointer == '\r' || **pointer == '\n') { (*pointer)++; (*line)++; return; } (*pointer)++; } } else (*pointer)--; } static void skipWhitespacesAndComments(const char **pointer, const char *stop, size_t *line) { const char *old = NULL; while (old != *pointer) { old = *pointer; skipWhitespaces(pointer, stop, line); skipComment(pointer, stop, line); } } static inline OFChar16 parseUnicodeEscape(const char *pointer, const char *stop) { OFChar16 ret = 0; if (pointer + 5 >= stop) return 0xFFFF; if (pointer[0] != '\\' || pointer[1] != 'u') return 0xFFFF; for (uint8_t i = 0; i < 4; i++) { char c = pointer[i + 2]; ret <<= 4; if (c >= '0' && c <= '9') ret |= c - '0'; else if (c >= 'a' && c <= 'f') ret |= c + 10 - 'a'; else if (c >= 'A' && c <= 'F') ret |= c + 10 - 'A'; else return 0xFFFF; } if (ret == 0) return 0xFFFF; return ret; } static inline OFString * parseString(const char **pointer, const char *stop, size_t *line) { char *buffer; size_t i = 0; char delimiter = **pointer; if (++(*pointer) + 1 >= stop) return nil; buffer = OFAllocMemory(stop - *pointer, 1); while (*pointer < stop) { /* Parse escape codes */ if (**pointer == '\\') { if (++(*pointer) >= stop) { OFFreeMemory(buffer); return nil; } switch (**pointer) { case '"': case '\\': case '/': buffer[i++] = **pointer; (*pointer)++; break; case 'b': buffer[i++] = '\b'; (*pointer)++; break; case 'f': buffer[i++] = '\f'; (*pointer)++; break; case 'n': buffer[i++] = '\n'; (*pointer)++; break; case 'r': buffer[i++] = '\r'; (*pointer)++; break; case 't': buffer[i++] = '\t'; (*pointer)++; break; /* Parse Unicode escape sequence */ case 'u':; OFChar16 c1, c2; OFUnichar c; size_t l; c1 = parseUnicodeEscape(*pointer - 1, stop); if (c1 == 0xFFFF) { OFFreeMemory(buffer); return nil; } /* Low surrogate */ if ((c1 & 0xFC00) == 0xDC00) { OFFreeMemory(buffer); return nil; } /* Normal character */ if ((c1 & 0xFC00) != 0xD800) { l = _OFUTF8StringEncode(c1, buffer + i); if (l == 0) { OFFreeMemory(buffer); return nil; } i += l; *pointer += 5; break; } /* * If we are still here, we only got one UTF-16 * surrogate and now need to get the other one * in order to produce UTF-8 and not CESU-8. */ c2 = parseUnicodeEscape(*pointer + 5, stop); if (c2 == 0xFFFF) { OFFreeMemory(buffer); return nil; } c = (((c1 & 0x3FF) << 10) | (c2 & 0x3FF)) + 0x10000; l = _OFUTF8StringEncode(c, buffer + i); if (l == 0) { OFFreeMemory(buffer); return nil; } i += l; *pointer += 11; break; case '\r': (*pointer)++; if (*pointer < stop && **pointer == '\n') { (*pointer)++; (*line)++; } break; case '\n': (*pointer)++; (*line)++; break; default: OFFreeMemory(buffer); return nil; } /* End of string found */ } else if (**pointer == delimiter) { OFString *ret; @try { ret = [OFString stringWithUTF8String: buffer length: i]; } @finally { OFFreeMemory(buffer); } (*pointer)++; return ret; /* Newlines in strings are disallowed */ } else if (**pointer == '\n' || **pointer == '\r') { (*line)++; OFFreeMemory(buffer); return nil; } else { buffer[i++] = **pointer; (*pointer)++; } } OFFreeMemory(buffer); return nil; } static inline OFString * parseIdentifier(const char **pointer, const char *stop) { char *buffer; size_t i = 0; buffer = OFAllocMemory(stop - *pointer, 1); while (*pointer < stop) { if ((**pointer >= 'a' && **pointer <= 'z') || (**pointer >= 'A' && **pointer <= 'Z') || (**pointer >= '0' && **pointer <= '9') || **pointer == '_' || **pointer == '$' || (**pointer & 0x80)) { buffer[i++] = **pointer; (*pointer)++; } else if (**pointer == '\\') { OFChar16 c1, c2; OFUnichar c; size_t l; if (++(*pointer) >= stop || **pointer != 'u') { OFFreeMemory(buffer); return nil; } c1 = parseUnicodeEscape(*pointer - 1, stop); if (c1 == 0xFFFF) { OFFreeMemory(buffer); return nil; } /* Low surrogate */ if ((c1 & 0xFC00) == 0xDC00) { OFFreeMemory(buffer); return nil; } /* Normal character */ if ((c1 & 0xFC00) != 0xD800) { l = _OFUTF8StringEncode(c1, buffer + i); if (l == 0) { OFFreeMemory(buffer); return nil; } i += l; *pointer += 5; continue; } /* * If we are still here, we only got one UTF-16 * surrogate and now need to get the other one in order * to produce UTF-8 and not CESU-8. */ c2 = parseUnicodeEscape(*pointer + 5, stop); if (c2 == 0xFFFF) { OFFreeMemory(buffer); return nil; } c = (((c1 & 0x3FF) << 10) | (c2 & 0x3FF)) + 0x10000; l = _OFUTF8StringEncode(c, buffer + i); if (l == 0) { OFFreeMemory(buffer); return nil; } i += l; *pointer += 11; } else { OFString *ret; if (i == 0 || (buffer[0] >= '0' && buffer[0] <= '9')) { OFFreeMemory(buffer); return nil; } @try { ret = [OFString stringWithUTF8String: buffer length: i]; } @finally { OFFreeMemory(buffer); } return ret; } } /* * It is never possible to end with an identifier, thus we should never * reach stop. */ OFFreeMemory(buffer); return nil; } static inline OFMutableArray * parseArray(const char **pointer, const char *stop, size_t *line, size_t depthLimit) { OFMutableArray *array = [OFMutableArray array]; if (++(*pointer) >= stop) return nil; if (--depthLimit == 0) return nil; while (**pointer != ']') { id object; skipWhitespacesAndComments(pointer, stop, line); if (*pointer >= stop) return nil; if (**pointer == ']') break; if (**pointer == ',') { (*pointer)++; skipWhitespacesAndComments(pointer, stop, line); if (*pointer >= stop || **pointer != ']') return nil; break; } object = nextObject(pointer, stop, line, depthLimit); if (object == nil) return nil; [array addObject: object]; skipWhitespacesAndComments(pointer, stop, line); if (*pointer >= stop) return nil; if (**pointer == ',') { (*pointer)++; skipWhitespacesAndComments(pointer, stop, line); if (*pointer >= stop) return nil; } else if (**pointer != ']') return nil; } (*pointer)++; return array; } static inline OFMutableDictionary * parseDictionary(const char **pointer, const char *stop, size_t *line, size_t depthLimit) { OFMutableDictionary *dictionary = [OFMutableDictionary dictionary]; if (++(*pointer) >= stop) return nil; if (--depthLimit == 0) return nil; while (**pointer != '}') { OFString *key; id object; skipWhitespacesAndComments(pointer, stop, line); if (*pointer >= stop) return nil; if (**pointer == '}') break; if (**pointer == ',') { (*pointer)++; skipWhitespacesAndComments(pointer, stop, line); if (*pointer >= stop || **pointer != '}') return nil; break; } skipWhitespacesAndComments(pointer, stop, line); if (*pointer + 1 >= stop) return nil; if ((**pointer >= 'a' && **pointer <= 'z') || (**pointer >= 'A' && **pointer <= 'Z') || **pointer == '_' || **pointer == '$' || **pointer == '\\') key = parseIdentifier(pointer, stop); else key = nextObject(pointer, stop, line, depthLimit); if (![key isKindOfClass: [OFString class]]) return nil; skipWhitespacesAndComments(pointer, stop, line); if (*pointer + 1 >= stop || **pointer != ':') return nil; (*pointer)++; object = nextObject(pointer, stop, line, depthLimit); if (object == nil) return nil; [dictionary setObject: object forKey: key]; skipWhitespacesAndComments(pointer, stop, line); if (*pointer >= stop) return nil; if (**pointer == ',') { (*pointer)++; skipWhitespacesAndComments(pointer, stop, line); if (*pointer >= stop) return nil; } else if (**pointer != '}') return nil; } (*pointer)++; return dictionary; } static inline OFNumber * parseNumber(const char **pointer, const char *stop, size_t *line) { bool isNegative = (*pointer < stop && (*pointer)[0] == '-'); bool hasDecimal = false; size_t i; OFString *string; OFNumber *number; for (i = 0; *pointer + i < stop; i++) { if ((*pointer)[i] == '.') hasDecimal = true; if ((*pointer)[i] == ' ' || (*pointer)[i] == '\t' || (*pointer)[i] == '\r' || (*pointer)[i] == '\n' || (*pointer)[i] == ',' || (*pointer)[i] == ']' || (*pointer)[i] == '}') { if ((*pointer)[i] == '\n') (*line)++; break; } } string = [[OFString alloc] initWithUTF8String: *pointer length: i]; *pointer += i; @try { if (hasDecimal) number = [OFNumber numberWithDouble: string.doubleValue]; else if ([string isEqual: @"Infinity"]) number = [OFNumber numberWithDouble: INFINITY]; else if ([string isEqual: @"-Infinity"]) number = [OFNumber numberWithDouble: -INFINITY]; else if (isNegative) number = [OFNumber numberWithLongLong: [string longLongValueWithBase: 0]]; else number = [OFNumber numberWithUnsignedLongLong: [string unsignedLongLongValueWithBase: 0]]; } @finally { [string release]; } return number; } static id nextObject(const char **pointer, const char *stop, size_t *line, size_t depthLimit) { skipWhitespacesAndComments(pointer, stop, line); if (*pointer >= stop) return nil; switch (**pointer) { case '"': case '\'': return parseString(pointer, stop, line); case '[': return parseArray(pointer, stop, line, depthLimit); case '{': return parseDictionary(pointer, stop, line, depthLimit); case 't': if (*pointer + 3 >= stop) return nil; if (memcmp(*pointer, "true", 4) != 0) return nil; (*pointer) += 4; return [OFNumber numberWithBool: true]; case 'f': if (*pointer + 4 >= stop) return nil; if (memcmp(*pointer, "false", 5) != 0) return nil; (*pointer) += 5; return [OFNumber numberWithBool: false]; case 'n': if (*pointer + 3 >= stop) return nil; if (memcmp(*pointer, "null", 4) != 0) return nil; (*pointer) += 4; return [OFNull null]; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '+': case '-': case '.': case 'I': return parseNumber(pointer, stop, line); default: return nil; } } @implementation OFString (JSONParsing) - (id)objectByParsingJSON { return [self objectByParsingJSONWithDepthLimit: 32]; } - (id)objectByParsingJSONWithDepthLimit: (size_t)depthLimit { void *pool = objc_autoreleasePoolPush(); const char *pointer = self.UTF8String; const char *stop = pointer + self.UTF8StringLength; id object; size_t line = 1; object = nextObject(&pointer, stop, &line, depthLimit); skipWhitespacesAndComments(&pointer, stop, &line); if (pointer < stop || object == nil) @throw [OFInvalidJSONException exceptionWithString: self line: line]; [object retain]; objc_autoreleasePoolPop(pool); return [object autorelease]; } @end