ObjFW  Diff

Differences From Artifact [3080670bc7]:

To Artifact [d6113071ec]:


35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63






















64
65
66
67
68
69
70
35
36
37
38
39
40
41






















42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







#import "OFInvalidEncodingException.h"
#import "OFInvalidFormatException.h"
#import "OFOutOfMemoryException.h"
#import "OFOutOfRangeException.h"

#import "unicode.h"

extern const OFChar16 of_iso_8859_2_table[];
extern const size_t of_iso_8859_2_table_offset;
extern const OFChar16 of_iso_8859_3_table[];
extern const size_t of_iso_8859_3_table_offset;
extern const OFChar16 of_iso_8859_15_table[];
extern const size_t of_iso_8859_15_table_offset;
extern const OFChar16 of_windows_1251_table[];
extern const size_t of_windows_1251_table_offset;
extern const OFChar16 of_windows_1252_table[];
extern const size_t of_windows_1252_table_offset;
extern const OFChar16 of_codepage_437_table[];
extern const size_t of_codepage_437_table_offset;
extern const OFChar16 of_codepage_850_table[];
extern const size_t of_codepage_850_table_offset;
extern const OFChar16 of_codepage_858_table[];
extern const size_t of_codepage_858_table_offset;
extern const OFChar16 of_mac_roman_table[];
extern const size_t of_mac_roman_table_offset;
extern const OFChar16 of_koi8_r_table[];
extern const size_t of_koi8_r_table_offset;
extern const OFChar16 of_koi8_u_table[];
extern const size_t of_koi8_u_table_offset;
extern const OFChar16 OFISO8859_2Table[];
extern const size_t OFISO8859_2TableOffset;
extern const OFChar16 OFISO8859_3Table[];
extern const size_t OFISO8859_3TableOffset;
extern const OFChar16 OFISO8859_15Table[];
extern const size_t OFISO8859_15TableOffset;
extern const OFChar16 OFWindows1251Table[];
extern const size_t OFWindows1251TableOffset;
extern const OFChar16 OFWindows1252Table[];
extern const size_t OFWindows1252TableOffset;
extern const OFChar16 OFCodepage437Table[];
extern const size_t OFCodepage437TableOffset;
extern const OFChar16 OFCodepage850Table[];
extern const size_t OFCodepage850TableOffset;
extern const OFChar16 OFCodepage858Table[];
extern const size_t OFCodepage858TableOffset;
extern const OFChar16 OFMacRomanTable[];
extern const size_t OFMacRomanTableOffset;
extern const OFChar16 OFKOI8RTable[];
extern const size_t OFKOI8RTableOffset;
extern const OFChar16 OFKOI8UTable[];
extern const size_t OFKOI8UTableOffset;

static inline int
memcasecmp(const char *first, const char *second, size_t length)
{
	for (size_t i = 0; i < length; i++) {
		unsigned char f = first[i];
		unsigned char s = second[i];
78
79
80
81
82
83
84
85

86
87
88
89
90
91
92
78
79
80
81
82
83
84

85
86
87
88
89
90
91
92







-
+







			return OFOrderedAscending;
	}

	return OFOrderedSame;
}

int
of_string_utf8_check(const char *UTF8String, size_t UTF8Length, size_t *length)
OFUTF8StringCheck(const char *UTF8String, size_t UTF8Length, size_t *length)
{
	size_t tmpLength = UTF8Length;
	int isUTF8 = 0;

	for (size_t i = 0; i < UTF8Length; i++) {
		/* No sign of UTF-8 here */
		if OF_LIKELY (!(UTF8String[i] & 0x80))
145
146
147
148
149
150
151
152

153
154
155
156
157
158
159
160
161
162
163
164

165
166
167
168
169
170
171
145
146
147
148
149
150
151

152
153
154
155
156
157
158
159
160
161
162
163

164
165
166
167
168
169
170
171







-
+











-
+







	if (length != NULL)
		*length = tmpLength;

	return isUTF8;
}

size_t
of_string_utf8_get_index(const char *string, size_t position)
positionToIndex(const char *string, size_t position)
{
	size_t idx = position;

	for (size_t i = 0; i < position; i++)
		if OF_UNLIKELY ((string[i] & 0xC0) == 0x80)
			idx--;

	return idx;
}

size_t
of_string_utf8_get_position(const char *string, size_t idx, size_t length)
OFUTF8StringIndexToPosition(const char *string, size_t idx, size_t length)
{
	for (size_t i = 0; i <= idx; i++)
		if OF_UNLIKELY ((string[i] & 0xC0) == 0x80)
			if (++idx > length)
				@throw [OFInvalidFormatException exception];

	return idx;
203
204
205
206
207
208
209
210

211
212
213
214
215
216
217
203
204
205
206
207
208
209

210
211
212
213
214
215
216
217







-
+







		}

		_s = &_storage;

		_s->cString = storage;
		_s->cStringLength = UTF8StringLength;

		switch (of_string_utf8_check(UTF8String, UTF8StringLength,
		switch (OFUTF8StringCheck(UTF8String, UTF8StringLength,
		    &_s->length)) {
		case 1:
			_s->isUTF8 = true;
			break;
		case -1:
			@throw [OFInvalidEncodingException exception];
		}
247
248
249
250
251
252
253
254

255
256
257
258
259
260
261
247
248
249
250
251
252
253

254
255
256
257
258
259
260
261







-
+








		_s->cString = OFAllocMemory(cStringLength + 1, 1);
		_s->cStringLength = cStringLength;
		_s->freeWhenDone = true;

		if (encoding == OFStringEncodingUTF8 ||
		    encoding == OFStringEncodingASCII) {
			switch (of_string_utf8_check(cString, cStringLength,
			switch (OFUTF8StringCheck(cString, cStringLength,
			    &_s->length)) {
			case 1:
				if (encoding == OFStringEncodingASCII)
					@throw [OFInvalidEncodingException
					    exception];

				_s->isUTF8 = true;
281
282
283
284
285
286
287
288

289
290
291
292
293
294
295
281
282
283
284
285
286
287

288
289
290
291
292
293
294
295







-
+








				if (!(cString[i] & 0x80)) {
					_s->cString[j++] = cString[i];
					continue;
				}

				_s->isUTF8 = true;
				bytes = of_string_utf8_encode(
				bytes = OFUTF8StringEncode(
				    (uint8_t)cString[i], buffer);

				if (bytes == 0)
					@throw [OFInvalidEncodingException
					    exception];

				_s->cStringLength += bytes - 1;
305
306
307
308
309
310
311
312

313
314
315

316
317
318

319
320
321

322
323
324

325
326
327

328
329
330

331
332
333

334
335
336

337
338
339

340
341
342

343
344
345

346
347
348
349
350
351
352
305
306
307
308
309
310
311

312
313
314

315
316
317

318
319
320

321
322
323

324
325
326

327
328
329

330
331
332

333
334
335

336
337
338

339
340
341

342
343
344

345
346
347
348
349
350
351
352







-
+


-
+


-
+


-
+


-
+


-
+


-
+


-
+


-
+


-
+


-
+


-
+







			return self;
		}

		switch (encoding) {
#define CASE(encoding, var)				\
		case encoding:				\
			table = var;			\
			tableOffset = var##_offset;	\
			tableOffset = var##Offset;	\
			break;
#ifdef HAVE_ISO_8859_2
		CASE(OFStringEncodingISO8859_2, of_iso_8859_2_table)
		CASE(OFStringEncodingISO8859_2, OFISO8859_2Table)
#endif
#ifdef HAVE_ISO_8859_3
		CASE(OFStringEncodingISO8859_3, of_iso_8859_3_table)
		CASE(OFStringEncodingISO8859_3, OFISO8859_3Table)
#endif
#ifdef HAVE_ISO_8859_15
		CASE(OFStringEncodingISO8859_15, of_iso_8859_15_table)
		CASE(OFStringEncodingISO8859_15, OFISO8859_15Table)
#endif
#ifdef HAVE_WINDOWS_1251
		CASE(OFStringEncodingWindows1251, of_windows_1251_table)
		CASE(OFStringEncodingWindows1251, OFWindows1251Table)
#endif
#ifdef HAVE_WINDOWS_1252
		CASE(OFStringEncodingWindows1252, of_windows_1252_table)
		CASE(OFStringEncodingWindows1252, OFWindows1252Table)
#endif
#ifdef HAVE_CODEPAGE_437
		CASE(OFStringEncodingCodepage437, of_codepage_437_table)
		CASE(OFStringEncodingCodepage437, OFCodepage437Table)
#endif
#ifdef HAVE_CODEPAGE_850
		CASE(OFStringEncodingCodepage850, of_codepage_850_table)
		CASE(OFStringEncodingCodepage850, OFCodepage850Table)
#endif
#ifdef HAVE_CODEPAGE_858
		CASE(OFStringEncodingCodepage858, of_codepage_858_table)
		CASE(OFStringEncodingCodepage858, OFCodepage858Table)
#endif
#ifdef HAVE_MAC_ROMAN
		CASE(OFStringEncodingMacRoman, of_mac_roman_table)
		CASE(OFStringEncodingMacRoman, OFMacRomanTable)
#endif
#ifdef HAVE_KOI8_R
		CASE(OFStringEncodingKOI8R, of_koi8_r_table)
		CASE(OFStringEncodingKOI8R, OFKOI8RTable)
#endif
#ifdef HAVE_KOI8_U
		CASE(OFStringEncodingKOI8U, of_koi8_u_table)
		CASE(OFStringEncodingKOI8U, OFKOI8UTable)
#endif
#undef CASE
		default:
			@throw [OFInvalidEncodingException exception];
		}

		j = 0;
363
364
365
366
367
368
369
370

371
372
373
374
375
376
377
363
364
365
366
367
368
369

370
371
372
373
374
375
376
377







-
+








			unichar = table[character - tableOffset];

			if (unichar == 0xFFFF)
				@throw [OFInvalidEncodingException exception];

			_s->isUTF8 = true;
			byteLength = of_string_utf8_encode(unichar, buffer);
			byteLength = OFUTF8StringEncode(unichar, buffer);

			if (byteLength == 0)
				@throw [OFInvalidEncodingException exception];

			_s->cStringLength += byteLength - 1;
			_s->cString = OFResizeMemory(_s->cString,
			    _s->cStringLength + 1, 1);
409
410
411
412
413
414
415
416

417
418
419
420
421
422
423
409
410
411
412
413
414
415

416
417
418
419
420
421
422
423







-
+








		if (UTF8StringLength >= 3 &&
		    memcmp(UTF8String, "\xEF\xBB\xBF", 3) == 0) {
			UTF8String += 3;
			UTF8StringLength -= 3;
		}

		switch (of_string_utf8_check(UTF8String, UTF8StringLength,
		switch (OFUTF8StringCheck(UTF8String, UTF8StringLength,
		    &_s->length)) {
		case 1:
			_s->isUTF8 = true;
			break;
		case -1:
			@throw [OFInvalidEncodingException exception];
		}
473
474
475
476
477
478
479
480

481
482
483
484
485
486
487
473
474
475
476
477
478
479

480
481
482
483
484
485
486
487







-
+








		_s->cString = OFAllocMemory((length * 4) + 1, 1);
		_s->length = length;
		_s->freeWhenDone = true;

		j = 0;
		for (size_t i = 0; i < length; i++) {
			size_t len = of_string_utf8_encode(characters[i],
			size_t len = OFUTF8StringEncode(characters[i],
			    _s->cString + j);

			if (len == 0)
				@throw [OFInvalidEncodingException exception];

			if (len > 1)
				_s->isUTF8 = true;
559
560
561
562
563
564
565
566

567
568
569
570
571
572
573
559
560
561
562
563
564
565

566
567
568
569
570
571
572
573







-
+







				character = (((character & 0x3FF) << 10) |
				    (nextCharacter & 0x3FF)) + 0x10000;

				i++;
				_s->length--;
			}

			len = of_string_utf8_encode(character, _s->cString + j);
			len = OFUTF8StringEncode(character, _s->cString + j);

			if (len == 0)
				@throw [OFInvalidEncodingException exception];

			if (len > 1)
				_s->isUTF8 = true;

615
616
617
618
619
620
621
622

623
624
625
626
627
628
629
615
616
617
618
619
620
621

622
623
624
625
626
627
628
629







-
+







		_s->cString = OFAllocMemory((length * 4) + 1, 1);
		_s->length = length;
		_s->freeWhenDone = true;

		j = 0;
		for (size_t i = 0; i < length; i++) {
			char buffer[4];
			size_t len = of_string_utf8_encode((swap
			size_t len = OFUTF8StringEncode((swap
			    ? OFByteSwap32(characters[i])
			    : characters[i]),
			    buffer);

			switch (len) {
			case 1:
				_s->cString[j++] = buffer[0];
675
676
677
678
679
680
681
682

683
684
685
686
687
688
689
675
676
677
678
679
680
681

682
683
684
685
686
687
688
689







-
+







		if ((cStringLength = OFVASPrintF(&tmp, format.UTF8String,
		    arguments)) == -1)
			@throw [OFInvalidFormatException exception];

		_s->cStringLength = cStringLength;

		@try {
			switch (of_string_utf8_check(tmp, cStringLength,
			switch (OFUTF8StringCheck(tmp, cStringLength,
			    &_s->length)) {
			case 1:
				_s->isUTF8 = true;
				break;
			case -1:
				@throw [OFInvalidEncodingException exception];
			}
869
870
871
872
873
874
875
876

877
878

879
880
881
882
883
884

885
886

887
888
889
890
891
892

893
894

895
896
897
898
899
900
901
869
870
871
872
873
874
875

876
877

878
879
880
881
882
883

884
885

886
887
888
889
890
891

892
893

894
895
896
897
898
899
900
901







-
+

-
+





-
+

-
+





-
+

-
+








	i = j = 0;

	while (i < _s->cStringLength && j < otherCStringLength) {
		OFUnichar c1, c2;
		ssize_t l1, l2;

		l1 = of_string_utf8_decode(_s->cString + i,
		l1 = OFUTF8StringDecode(_s->cString + i,
		    _s->cStringLength - i, &c1);
		l2 = of_string_utf8_decode(otherCString + j,
		l2 = OFUTF8StringDecode(otherCString + j,
		    otherCStringLength - j, &c2);

		if (l1 <= 0 || l2 <= 0 || c1 > 0x10FFFF || c2 > 0x10FFFF)
			@throw [OFInvalidEncodingException exception];

		if (c1 >> 8 < OF_UNICODE_CASEFOLDING_TABLE_SIZE) {
		if (c1 >> 8 < OFUnicodeCaseFoldingTableSize) {
			OFUnichar tc =
			    of_unicode_casefolding_table[c1 >> 8][c1 & 0xFF];
			    OFUnicodeCaseFoldingTable[c1 >> 8][c1 & 0xFF];

			if (tc)
				c1 = tc;
		}

		if (c2 >> 8 < OF_UNICODE_CASEFOLDING_TABLE_SIZE) {
		if (c2 >> 8 < OFUnicodeCaseFoldingTableSize) {
			OFUnichar tc =
			    of_unicode_casefolding_table[c2 >> 8][c2 & 0xFF];
			    OFUnicodeCaseFoldingTable[c2 >> 8][c2 & 0xFF];

			if (tc)
				c2 = tc;
		}

		if (c1 > c2)
			return OFOrderedDescending;
924
925
926
927
928
929
930
931

932
933
934
935
936
937
938
924
925
926
927
928
929
930

931
932
933
934
935
936
937
938







-
+








	OFHashInit(&hash);

	for (size_t i = 0; i < _s->cStringLength; i++) {
		OFUnichar c;
		ssize_t length;

		if ((length = of_string_utf8_decode(_s->cString + i,
		if ((length = OFUTF8StringDecode(_s->cString + i,
		    _s->cStringLength - i, &c)) <= 0)
			@throw [OFInvalidEncodingException exception];

		OFHashAdd(&hash, (c & 0xFF0000) >> 16);
		OFHashAdd(&hash, (c & 0x00FF00) >> 8);
		OFHashAdd(&hash, c & 0x0000FF);

953
954
955
956
957
958
959
960

961
962
963


964
965
966
967
968
969
970
953
954
955
956
957
958
959

960
961


962
963
964
965
966
967
968
969
970







-
+

-
-
+
+








	if (idx >= _s->length)
		@throw [OFOutOfRangeException exception];

	if (!_s->isUTF8)
		return _s->cString[idx];

	idx = of_string_utf8_get_position(_s->cString, idx, _s->cStringLength);
	idx = OFUTF8StringIndexToPosition(_s->cString, idx, _s->cStringLength);

	if (of_string_utf8_decode(_s->cString + idx,
	    _s->cStringLength - idx, &character) <= 0)
	if (OFUTF8StringDecode(_s->cString + idx, _s->cStringLength - idx,
	    &character) <= 0)
		@throw [OFInvalidEncodingException exception];

	return character;
}

- (void)getCharacters: (OFUnichar *)buffer inRange: (OFRange)range
{
991
992
993
994
995
996
997
998

999
1000

1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018

1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033

1034
1035
1036
1037
1038
1039
1040
991
992
993
994
995
996
997

998
999

1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017

1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032

1033
1034
1035
1036
1037
1038
1039
1040







-
+

-
+

















-
+














-
+







	size_t rangeLocation, rangeLength;

	if (range.length > SIZE_MAX - range.location ||
	    range.location + range.length > _s->length)
		@throw [OFOutOfRangeException exception];

	if (_s->isUTF8) {
		rangeLocation = of_string_utf8_get_position(
		rangeLocation = OFUTF8StringIndexToPosition(
		    _s->cString, range.location, _s->cStringLength);
		rangeLength = of_string_utf8_get_position(
		rangeLength = OFUTF8StringIndexToPosition(
		    _s->cString + rangeLocation, range.length,
		    _s->cStringLength - rangeLocation);
	} else {
		rangeLocation = range.location;
		rangeLength = range.length;
	}

	if (cStringLength == 0)
		return OFRangeMake(0, 0);

	if (cStringLength > rangeLength)
		return OFRangeMake(OFNotFound, 0);

	if (options & OFStringSearchBackwards) {
		for (size_t i = rangeLength - cStringLength;; i--) {
			if (memcmp(_s->cString + rangeLocation + i, cString,
			    cStringLength) == 0) {
				range.location += of_string_utf8_get_index(
				range.location += positionToIndex(
				    _s->cString + rangeLocation, i);
				range.length = string.length;

				return range;
			}

			/* Did not match and we're at the last char */
			if (i == 0)
				return OFRangeMake(OFNotFound, 0);
		}
	} else {
		for (size_t i = 0; i <= rangeLength - cStringLength; i++) {
			if (memcmp(_s->cString + rangeLocation + i, cString,
			    cStringLength) == 0) {
				range.location += of_string_utf8_get_index(
				range.location += positionToIndex(
				    _s->cString + rangeLocation, i);
				range.length = string.length;

				return range;
			}
		}
	}
1065
1066
1067
1068
1069
1070
1071
1072

1073
1074

1075
1076
1077
1078
1079
1080
1081
1065
1066
1067
1068
1069
1070
1071

1072
1073

1074
1075
1076
1077
1078
1079
1080
1081







-
+

-
+







	size_t start = range.location;
	size_t end = range.location + range.length;

	if (range.length > SIZE_MAX - range.location || end > _s->length)
		@throw [OFOutOfRangeException exception];

	if (_s->isUTF8) {
		start = of_string_utf8_get_position(_s->cString, start,
		start = OFUTF8StringIndexToPosition(_s->cString, start,
		    _s->cStringLength);
		end = of_string_utf8_get_position(_s->cString, end,
		end = OFUTF8StringIndexToPosition(_s->cString, end,
		    _s->cStringLength);
	}

	return [OFString stringWithUTF8String: _s->cString + start
				       length: end - start];
}

1158
1159
1160
1161
1162
1163
1164
1165

1166
1167
1168
1169
1170
1171
1172
1158
1159
1160
1161
1162
1163
1164

1165
1166
1167
1168
1169
1170
1171
1172







-
+







	OFUnichar *buffer = OFAllocMemory(_s->length, sizeof(OFUnichar));
	size_t i = 0, j = 0;

	while (i < _s->cStringLength) {
		OFUnichar c;
		ssize_t cLen;

		cLen = of_string_utf8_decode(_s->cString + i,
		cLen = OFUTF8StringDecode(_s->cString + i,
		    _s->cStringLength - i, &c);

		if (cLen <= 0 || c > 0x10FFFF) {
			OFFreeMemory(buffer);
			@throw [OFInvalidEncodingException exception];
		}

1185
1186
1187
1188
1189
1190
1191
1192

1193
1194
1195
1196
1197
1198
1199
1185
1186
1187
1188
1189
1190
1191

1192
1193
1194
1195
1196
1197
1198
1199







-
+







	OFChar32 *buffer = OFAllocMemory(_s->length + 1, sizeof(OFChar32));
	size_t i = 0, j = 0;

	while (i < _s->cStringLength) {
		OFChar32 c;
		ssize_t cLen;

		cLen = of_string_utf8_decode(_s->cString + i,
		cLen = OFUTF8StringDecode(_s->cString + i,
		    _s->cStringLength - i, &c);

		if (cLen <= 0 || c > 0x10FFFF) {
			OFFreeMemory(buffer);
			@throw [OFInvalidEncodingException exception];
		}