ObjFW  Diff

Differences From Artifact [720c0708e2]:

To Artifact [97ab3896de]:


39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67






















68
69
70
71
72
73
74
39
40
41
42
43
44
45






















46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







#import "OFInvalidEncodingException.h"
#import "OFInvalidFormatException.h"
#import "OFOutOfMemoryException.h"
#import "OFOutOfRangeException.h"

#import "unicode.h"

extern const OFChar16 OFISO8859_2Table[];
extern const size_t OFISO8859_2TableOffset;
extern const OFChar16 OFISO8859_3Table[];
extern const size_t OFISO8859_3TableOffset;
extern const OFChar16 OFISO8859_15Table[];
extern const size_t OFISO8859_15TableOffset;
extern const OFChar16 OFWindows1251Table[];
extern const size_t OFWindows1251TableOffset;
extern const OFChar16 OFWindows1252Table[];
extern const size_t OFWindows1252TableOffset;
extern const OFChar16 OFCodepage437Table[];
extern const size_t OFCodepage437TableOffset;
extern const OFChar16 OFCodepage850Table[];
extern const size_t OFCodepage850TableOffset;
extern const OFChar16 OFCodepage858Table[];
extern const size_t OFCodepage858TableOffset;
extern const OFChar16 OFMacRomanTable[];
extern const size_t OFMacRomanTableOffset;
extern const OFChar16 OFKOI8RTable[];
extern const size_t OFKOI8RTableOffset;
extern const OFChar16 OFKOI8UTable[];
extern const size_t OFKOI8UTableOffset;
extern const OFChar16 _OFISO8859_2Table[];
extern const size_t _OFISO8859_2TableOffset;
extern const OFChar16 _OFISO8859_3Table[];
extern const size_t _OFISO8859_3TableOffset;
extern const OFChar16 _OFISO8859_15Table[];
extern const size_t _OFISO8859_15TableOffset;
extern const OFChar16 _OFWindows1251Table[];
extern const size_t _OFWindows1251TableOffset;
extern const OFChar16 _OFWindows1252Table[];
extern const size_t _OFWindows1252TableOffset;
extern const OFChar16 _OFCodepage437Table[];
extern const size_t _OFCodepage437TableOffset;
extern const OFChar16 _OFCodepage850Table[];
extern const size_t _OFCodepage850TableOffset;
extern const OFChar16 _OFCodepage858Table[];
extern const size_t _OFCodepage858TableOffset;
extern const OFChar16 _OFMacRomanTable[];
extern const size_t _OFMacRomanTableOffset;
extern const OFChar16 _OFKOI8RTable[];
extern const size_t _OFKOI8RTableOffset;
extern const OFChar16 _OFKOI8UTable[];
extern const size_t _OFKOI8UTableOffset;

static inline int
memcasecmp(const char *first, const char *second, size_t length)
{
	for (size_t i = 0; i < length; i++) {
		unsigned char f = first[i];
		unsigned char s = second[i];
82
83
84
85
86
87
88
89

90
91
92
93
94
95
96
82
83
84
85
86
87
88

89
90
91
92
93
94
95
96







-
+







			return OFOrderedAscending;
	}

	return OFOrderedSame;
}

int
OFUTF8StringCheck(const char *UTF8String, size_t UTF8Length, size_t *length)
_OFUTF8StringCheck(const char *UTF8String, size_t UTF8Length, size_t *length)
{
	size_t tmpLength = UTF8Length;
	int isUTF8 = 0;

	for (size_t i = 0; i < UTF8Length; i++) {
		/* No sign of UTF-8 here */
		if OF_LIKELY (!(UTF8String[i] & 0x80))
148
149
150
151
152
153
154
155

156
157
158
159
160
161
162
163
164
165
166
167
168

169
170
171
172
173
174
175
148
149
150
151
152
153
154

155
156
157
158
159
160
161
162
163
164
165
166
167

168
169
170
171
172
173
174
175







-
+












-
+








	if (length != NULL)
		*length = tmpLength;

	return isUTF8;
}

size_t
static size_t
positionToIndex(const char *string, size_t position)
{
	size_t idx = position;

	for (size_t i = 0; i < position; i++)
		if OF_UNLIKELY ((string[i] & 0xC0) == 0x80)
			idx--;

	return idx;
}

size_t
OFUTF8StringIndexToPosition(const char *string, size_t idx, size_t length)
_OFUTF8StringIndexToPosition(const char *string, size_t idx, size_t length)
{
	for (size_t i = 0; i <= idx; i++)
		if OF_UNLIKELY ((string[i] & 0xC0) == 0x80)
			if (++idx > length)
				@throw [OFInvalidFormatException exception];

	return idx;
207
208
209
210
211
212
213
214

215
216
217
218
219
220
221
207
208
209
210
211
212
213

214
215
216
217
218
219
220
221







-
+







		}

		_s = &_storage;

		_s->cString = storage;
		_s->cStringLength = UTF8StringLength;

		switch (OFUTF8StringCheck(UTF8String, UTF8StringLength,
		switch (_OFUTF8StringCheck(UTF8String, UTF8StringLength,
		    &_s->length)) {
		case 1:
			_s->isUTF8 = true;
			break;
		case -1:
			@throw [OFInvalidEncodingException exception];
		}
251
252
253
254
255
256
257
258

259
260
261
262
263
264
265
251
252
253
254
255
256
257

258
259
260
261
262
263
264
265







-
+








		_s->cString = OFAllocMemory(cStringLength + 1, 1);
		_s->cStringLength = cStringLength;
		_s->freeWhenDone = true;

		if (encoding == OFStringEncodingUTF8 ||
		    encoding == OFStringEncodingASCII) {
			switch (OFUTF8StringCheck(cString, cStringLength,
			switch (_OFUTF8StringCheck(cString, cStringLength,
			    &_s->length)) {
			case 1:
				if (encoding == OFStringEncodingASCII)
					@throw [OFInvalidEncodingException
					    exception];

				_s->isUTF8 = true;
285
286
287
288
289
290
291
292

293
294
295
296
297
298
299
285
286
287
288
289
290
291

292
293
294
295
296
297
298
299







-
+








				if (!(cString[i] & 0x80)) {
					_s->cString[j++] = cString[i];
					continue;
				}

				_s->isUTF8 = true;
				bytes = OFUTF8StringEncode(
				bytes = _OFUTF8StringEncode(
				    (uint8_t)cString[i], buffer);

				if (bytes == 0)
					@throw [OFInvalidEncodingException
					    exception];

				_s->cStringLength += bytes - 1;
312
313
314
315
316
317
318
319

320
321
322

323
324
325

326
327
328

329
330
331

332
333
334

335
336
337

338
339
340

341
342
343

344
345
346

347
348
349

350
351
352
353
354
355
356
312
313
314
315
316
317
318

319
320
321

322
323
324

325
326
327

328
329
330

331
332
333

334
335
336

337
338
339

340
341
342

343
344
345

346
347
348

349
350
351
352
353
354
355
356







-
+


-
+


-
+


-
+


-
+


-
+


-
+


-
+


-
+


-
+


-
+







		switch (encoding) {
#define CASE(encoding, var)				\
		case encoding:				\
			table = var;			\
			tableOffset = var##Offset;	\
			break;
#ifdef HAVE_ISO_8859_2
		CASE(OFStringEncodingISO8859_2, OFISO8859_2Table)
		CASE(OFStringEncodingISO8859_2, _OFISO8859_2Table)
#endif
#ifdef HAVE_ISO_8859_3
		CASE(OFStringEncodingISO8859_3, OFISO8859_3Table)
		CASE(OFStringEncodingISO8859_3, _OFISO8859_3Table)
#endif
#ifdef HAVE_ISO_8859_15
		CASE(OFStringEncodingISO8859_15, OFISO8859_15Table)
		CASE(OFStringEncodingISO8859_15, _OFISO8859_15Table)
#endif
#ifdef HAVE_WINDOWS_1251
		CASE(OFStringEncodingWindows1251, OFWindows1251Table)
		CASE(OFStringEncodingWindows1251, _OFWindows1251Table)
#endif
#ifdef HAVE_WINDOWS_1252
		CASE(OFStringEncodingWindows1252, OFWindows1252Table)
		CASE(OFStringEncodingWindows1252, _OFWindows1252Table)
#endif
#ifdef HAVE_CODEPAGE_437
		CASE(OFStringEncodingCodepage437, OFCodepage437Table)
		CASE(OFStringEncodingCodepage437, _OFCodepage437Table)
#endif
#ifdef HAVE_CODEPAGE_850
		CASE(OFStringEncodingCodepage850, OFCodepage850Table)
		CASE(OFStringEncodingCodepage850, _OFCodepage850Table)
#endif
#ifdef HAVE_CODEPAGE_858
		CASE(OFStringEncodingCodepage858, OFCodepage858Table)
		CASE(OFStringEncodingCodepage858, _OFCodepage858Table)
#endif
#ifdef HAVE_MAC_ROMAN
		CASE(OFStringEncodingMacRoman, OFMacRomanTable)
		CASE(OFStringEncodingMacRoman, _OFMacRomanTable)
#endif
#ifdef HAVE_KOI8_R
		CASE(OFStringEncodingKOI8R, OFKOI8RTable)
		CASE(OFStringEncodingKOI8R, _OFKOI8RTable)
#endif
#ifdef HAVE_KOI8_U
		CASE(OFStringEncodingKOI8U, OFKOI8UTable)
		CASE(OFStringEncodingKOI8U, _OFKOI8UTable)
#endif
#undef CASE
		default:
			@throw [OFInvalidArgumentException exception];
		}

		j = 0;
367
368
369
370
371
372
373
374

375
376
377
378
379
380
381
367
368
369
370
371
372
373

374
375
376
377
378
379
380
381







-
+








			unichar = table[character - tableOffset];

			if (unichar == 0xFFFF)
				@throw [OFInvalidEncodingException exception];

			_s->isUTF8 = true;
			byteLength = OFUTF8StringEncode(unichar, buffer);
			byteLength = _OFUTF8StringEncode(unichar, buffer);

			if (byteLength == 0)
				@throw [OFInvalidEncodingException exception];

			_s->cStringLength += byteLength - 1;
			_s->cString = OFResizeMemory(_s->cString,
			    _s->cStringLength + 1, 1);
413
414
415
416
417
418
419
420

421
422
423
424
425
426
427
413
414
415
416
417
418
419

420
421
422
423
424
425
426
427







-
+








		if (UTF8StringLength >= 3 &&
		    memcmp(UTF8String, "\xEF\xBB\xBF", 3) == 0) {
			UTF8String += 3;
			UTF8StringLength -= 3;
		}

		switch (OFUTF8StringCheck(UTF8String, UTF8StringLength,
		switch (_OFUTF8StringCheck(UTF8String, UTF8StringLength,
		    &_s->length)) {
		case 1:
			_s->isUTF8 = true;
			break;
		case -1:
			@throw [OFInvalidEncodingException exception];
		}
477
478
479
480
481
482
483
484

485
486
487
488
489
490
491
477
478
479
480
481
482
483

484
485
486
487
488
489
490
491







-
+








		_s->cString = OFAllocMemory((length * 4) + 1, 1);
		_s->length = length;
		_s->freeWhenDone = true;

		j = 0;
		for (size_t i = 0; i < length; i++) {
			size_t len = OFUTF8StringEncode(characters[i],
			size_t len = _OFUTF8StringEncode(characters[i],
			    _s->cString + j);

			if (len == 0)
				@throw [OFInvalidEncodingException exception];

			if (len > 1)
				_s->isUTF8 = true;
563
564
565
566
567
568
569
570

571
572
573
574
575
576
577
563
564
565
566
567
568
569

570
571
572
573
574
575
576
577







-
+







				character = (((character & 0x3FF) << 10) |
				    (nextCharacter & 0x3FF)) + 0x10000;

				i++;
				_s->length--;
			}

			len = OFUTF8StringEncode(character, _s->cString + j);
			len = _OFUTF8StringEncode(character, _s->cString + j);

			if (len == 0)
				@throw [OFInvalidEncodingException exception];

			if (len > 1)
				_s->isUTF8 = true;

619
620
621
622
623
624
625
626

627
628
629
630
631
632
633
619
620
621
622
623
624
625

626
627
628
629
630
631
632
633







-
+







		_s->cString = OFAllocMemory((length * 4) + 1, 1);
		_s->length = length;
		_s->freeWhenDone = true;

		j = 0;
		for (size_t i = 0; i < length; i++) {
			char buffer[4];
			size_t len = OFUTF8StringEncode((swap
			size_t len = _OFUTF8StringEncode((swap
			    ? OFByteSwap32(characters[i])
			    : characters[i]),
			    buffer);

			switch (len) {
			case 1:
				_s->cString[j++] = buffer[0];
672
673
674
675
676
677
678
679

680
681
682
683
684
685
686

687
688
689
690
691
692
693
672
673
674
675
676
677
678

679
680
681
682
683
684
685

686
687
688
689
690
691
692
693







-
+






-
+







		int cStringLength;

		if (format == nil)
			@throw [OFInvalidArgumentException exception];

		_s = &_storage;

		if ((cStringLength = OFVASPrintF(&tmp, format.UTF8String,
		if ((cStringLength = _OFVASPrintF(&tmp, format.UTF8String,
		    arguments)) == -1)
			@throw [OFInvalidFormatException exception];

		_s->cStringLength = cStringLength;

		@try {
			switch (OFUTF8StringCheck(tmp, cStringLength,
			switch (_OFUTF8StringCheck(tmp, cStringLength,
			    &_s->length)) {
			case 1:
				_s->isUTF8 = true;
				break;
			case -1:
				@throw [OFInvalidEncodingException exception];
			}
873
874
875
876
877
878
879
880

881
882

883
884
885
886
887
888

889
890

891
892
893
894
895
896

897
898

899
900
901
902
903
904
905
873
874
875
876
877
878
879

880
881

882
883
884
885
886
887

888
889

890
891
892
893
894
895

896
897

898
899
900
901
902
903
904
905







-
+

-
+





-
+

-
+





-
+

-
+








	i = j = 0;

	while (i < _s->cStringLength && j < otherCStringLength) {
		OFUnichar c1, c2;
		ssize_t l1, l2;

		l1 = OFUTF8StringDecode(_s->cString + i,
		l1 = _OFUTF8StringDecode(_s->cString + i,
		    _s->cStringLength - i, &c1);
		l2 = OFUTF8StringDecode(otherCString + j,
		l2 = _OFUTF8StringDecode(otherCString + j,
		    otherCStringLength - j, &c2);

		if (l1 <= 0 || l2 <= 0 || c1 > 0x10FFFF || c2 > 0x10FFFF)
			@throw [OFInvalidEncodingException exception];

		if (c1 >> 8 < OFUnicodeCaseFoldingTableSize) {
		if (c1 >> 8 < _OFUnicodeCaseFoldingTableSize) {
			OFUnichar tc =
			    OFUnicodeCaseFoldingTable[c1 >> 8][c1 & 0xFF];
			    _OFUnicodeCaseFoldingTable[c1 >> 8][c1 & 0xFF];

			if (tc)
				c1 = tc;
		}

		if (c2 >> 8 < OFUnicodeCaseFoldingTableSize) {
		if (c2 >> 8 < _OFUnicodeCaseFoldingTableSize) {
			OFUnichar tc =
			    OFUnicodeCaseFoldingTable[c2 >> 8][c2 & 0xFF];
			    _OFUnicodeCaseFoldingTable[c2 >> 8][c2 & 0xFF];

			if (tc)
				c2 = tc;
		}

		if (c1 > c2)
			return OFOrderedDescending;
928
929
930
931
932
933
934
935

936
937
938
939
940
941
942
928
929
930
931
932
933
934

935
936
937
938
939
940
941
942







-
+








	OFHashInit(&hash);

	for (size_t i = 0; i < _s->cStringLength; i++) {
		OFUnichar c;
		ssize_t length;

		if ((length = OFUTF8StringDecode(_s->cString + i,
		if ((length = _OFUTF8StringDecode(_s->cString + i,
		    _s->cStringLength - i, &c)) <= 0)
			@throw [OFInvalidEncodingException exception];

		OFHashAddByte(&hash, (c & 0xFF0000) >> 16);
		OFHashAddByte(&hash, (c & 0x00FF00) >> 8);
		OFHashAddByte(&hash, c & 0x0000FF);

957
958
959
960
961
962
963
964

965
966

967
968
969
970
971
972
973
957
958
959
960
961
962
963

964
965

966
967
968
969
970
971
972
973







-
+

-
+








	if (idx >= _s->length)
		@throw [OFOutOfRangeException exception];

	if (!_s->isUTF8)
		return _s->cString[idx];

	idx = OFUTF8StringIndexToPosition(_s->cString, idx, _s->cStringLength);
	idx = _OFUTF8StringIndexToPosition(_s->cString, idx, _s->cStringLength);

	if (OFUTF8StringDecode(_s->cString + idx, _s->cStringLength - idx,
	if (_OFUTF8StringDecode(_s->cString + idx, _s->cStringLength - idx,
	    &character) <= 0)
		@throw [OFInvalidEncodingException exception];

	return character;
}

- (void)getCharacters: (OFUnichar *)buffer inRange: (OFRange)range
995
996
997
998
999
1000
1001
1002

1003
1004

1005
1006
1007
1008
1009
1010
1011
995
996
997
998
999
1000
1001

1002
1003

1004
1005
1006
1007
1008
1009
1010
1011







-
+

-
+







	size_t rangeLocation, rangeLength;

	if (range.length > SIZE_MAX - range.location ||
	    range.location + range.length > _s->length)
		@throw [OFOutOfRangeException exception];

	if (_s->isUTF8) {
		rangeLocation = OFUTF8StringIndexToPosition(
		rangeLocation = _OFUTF8StringIndexToPosition(
		    _s->cString, range.location, _s->cStringLength);
		rangeLength = OFUTF8StringIndexToPosition(
		rangeLength = _OFUTF8StringIndexToPosition(
		    _s->cString + rangeLocation, range.length,
		    _s->cStringLength - rangeLocation);
	} else {
		rangeLocation = range.location;
		rangeLength = range.length;
	}

1069
1070
1071
1072
1073
1074
1075
1076

1077
1078

1079
1080
1081
1082
1083
1084
1085
1069
1070
1071
1072
1073
1074
1075

1076
1077

1078
1079
1080
1081
1082
1083
1084
1085







-
+

-
+







	size_t start = range.location;
	size_t end = range.location + range.length;

	if (range.length > SIZE_MAX - range.location || end > _s->length)
		@throw [OFOutOfRangeException exception];

	if (_s->isUTF8) {
		start = OFUTF8StringIndexToPosition(_s->cString, start,
		start = _OFUTF8StringIndexToPosition(_s->cString, start,
		    _s->cStringLength);
		end = OFUTF8StringIndexToPosition(_s->cString, end,
		end = _OFUTF8StringIndexToPosition(_s->cString, end,
		    _s->cStringLength);
	}

	return [OFString stringWithUTF8String: _s->cString + start
				       length: end - start];
}

1163
1164
1165
1166
1167
1168
1169
1170

1171
1172
1173
1174
1175
1176
1177
1163
1164
1165
1166
1167
1168
1169

1170
1171
1172
1173
1174
1175
1176
1177







-
+







	size_t i = 0, j = 0;
	const OFUnichar *ret;

	while (i < _s->cStringLength) {
		OFUnichar c;
		ssize_t cLen;

		cLen = OFUTF8StringDecode(_s->cString + i,
		cLen = _OFUTF8StringDecode(_s->cString + i,
		    _s->cStringLength - i, &c);

		if (cLen <= 0 || c > 0x10FFFF) {
			OFFreeMemory(buffer);
			@throw [OFInvalidEncodingException exception];
		}

1198
1199
1200
1201
1202
1203
1204
1205

1206
1207
1208
1209
1210
1211
1212
1198
1199
1200
1201
1202
1203
1204

1205
1206
1207
1208
1209
1210
1211
1212







-
+







	size_t i = 0, j = 0;
	const OFChar32 *ret;

	while (i < _s->cStringLength) {
		OFChar32 c;
		ssize_t cLen;

		cLen = OFUTF8StringDecode(_s->cString + i,
		cLen = _OFUTF8StringDecode(_s->cString + i,
		    _s->cStringLength - i, &c);

		if (cLen <= 0 || c > 0x10FFFF) {
			OFFreeMemory(buffer);
			@throw [OFInvalidEncodingException exception];
		}