ObjFW  Diff

Differences From Artifact [3ce37a1895]:

To Artifact [3a479b89af]:

  • File src/OFString_UTF8.m — part of check-in [3d16a30f41] at 2013-06-22 12:12:36 on branch trunk — Rework exceptions.

    This mostly removes the argument for the class in which the exception
    occurred. As backtraces were recently added for all platforms, the
    passed class does not give any extra information on where the exception
    occurred anymore.

    This also removes a few other arguments which were not too helpful. In
    the past, the idea was to pass as many arguments as possible so that it
    is easier to find the origin of the exception. However, as backtraces
    are a much better way to find the origin, those are not useful anymore
    and just make the exception more cumbersome to use. The rule is now to
    only pass arguments that might help in recovering from the exception or
    provide information that is otherwise not easily accessible. (user: js, size: 30279) [annotate] [blame] [check-ins using]


187
188
189
190
191
192
193
194

195
196
197
198
199
200
201
202
187
188
189
190
191
192
193

194

195
196
197
198
199
200
201







-
+
-








		switch (of_string_utf8_check(UTF8String, UTF8StringLength,
		    &_s->length)) {
		case 1:
			_s->isUTF8 = true;
			break;
		case -1:
			@throw [OFInvalidEncodingException
			@throw [OFInvalidEncodingException exception];
			    exceptionWithClass: [self class]];
		}

		memcpy(_s->cString, UTF8String, UTF8StringLength);
		_s->cString[UTF8StringLength] = 0;
	} @catch (id e) {
		[self release];
		@throw e;
229
230
231
232
233
234
235
236

237
238
239
240
241

242
243
244
245
246
247
248
249
228
229
230
231
232
233
234

235
236
237
238
239

240

241
242
243
244
245
246
247







-
+




-
+
-







		if (encoding == OF_STRING_ENCODING_UTF_8 ||
		    encoding == OF_STRING_ENCODING_ASCII) {
			switch (of_string_utf8_check(cString, cStringLength,
			    &_s->length)) {
			case 1:
				if (encoding == OF_STRING_ENCODING_ASCII)
					@throw [OFInvalidEncodingException
					    exceptionWithClass: [self class]];
					    exception];

				_s->isUTF8 = true;
				break;
			case -1:
				@throw [OFInvalidEncodingException
				@throw [OFInvalidEncodingException exception];
				    exceptionWithClass: [self class]];
			}

			memcpy(_s->cString, cString, cStringLength);
			_s->cString[cStringLength] = 0;

			return self;
		}
263
264
265
266
267
268
269
270

271
272
273
274
275
276
277
261
262
263
264
265
266
267

268
269
270
271
272
273
274
275







-
+








				_s->isUTF8 = true;
				bytes = of_string_utf8_encode(
				    (uint8_t)cString[i], buffer);

				if (bytes == 0)
					@throw [OFInvalidEncodingException
					    exceptionWithClass: [self class]];
					    exception];

				_s->cStringLength += bytes - 1;
				_s->cString = [self
				    resizeMemory: _s->cString
					    size: _s->cStringLength + 1];

				memcpy(_s->cString + j, buffer, bytes);
287
288
289
290
291
292
293
294

295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311

312
313
314
315
316
317
318
319

320
321
322
323
324
325
326
327
285
286
287
288
289
290
291

292

293
294
295
296
297
298
299
300
301
302
303
304
305
306
307

308

309
310
311
312
313
314

315

316
317
318
319
320
321
322







-
+
-















-
+
-






-
+
-







		case OF_STRING_ENCODING_ISO_8859_15:
			table = of_iso_8859_15;
			break;
		case OF_STRING_ENCODING_WINDOWS_1252:
			table = of_windows_1252;
			break;
		default:
			@throw [OFInvalidEncodingException
			@throw [OFInvalidEncodingException exception];
			    exceptionWithClass: [self class]];
		}

		for (i = j = 0; i < cStringLength; i++) {
			char buffer[4];
			of_unichar_t character;
			size_t characterBytes;

			if (!(cString[i] & 0x80)) {
				_s->cString[j++] = cString[i];
				continue;
			}

			character = table[(uint8_t)cString[i]];

			if (character == 0xFFFD)
				@throw [OFInvalidEncodingException
				@throw [OFInvalidEncodingException exception];
				    exceptionWithClass: [self class]];

			_s->isUTF8 = true;
			characterBytes = of_string_utf8_encode(character,
			    buffer);

			if (characterBytes == 0)
				@throw [OFInvalidEncodingException
				@throw [OFInvalidEncodingException exception];
				    exceptionWithClass: [self class]];

			_s->cStringLength += characterBytes - 1;
			_s->cString = [self
			    resizeMemory: _s->cString
				    size: _s->cStringLength + 1];

			memcpy(_s->cString + j, buffer, characterBytes);
361
362
363
364
365
366
367
368

369
370
371
372
373
374
375
376
356
357
358
359
360
361
362

363

364
365
366
367
368
369
370







-
+
-








		switch (of_string_utf8_check(UTF8String, UTF8StringLength,
		    &_s->length)) {
		case 1:
			_s->isUTF8 = true;
			break;
		case -1:
			@throw [OFInvalidEncodingException
			@throw [OFInvalidEncodingException exception];
			    exceptionWithClass: [self class]];
		}
	} @catch (id e) {
		[self release];
		@throw e;
	}

	return self;
431
432
433
434
435
436
437
438

439
440
441
442
443
444
445
446
425
426
427
428
429
430
431

432

433
434
435
436
437
438
439







-
+
-







				_s->isUTF8 = true;

				memcpy(_s->cString + j, buffer, len);
				j += len;

				break;
			default:
				@throw [OFInvalidEncodingException
				@throw [OFInvalidEncodingException exception];
				    exceptionWithClass: [self class]];
			}
		}

		_s->cString[j] = '\0';
		_s->cStringLength = j;

		@try {
486
487
488
489
490
491
492
493

494
495
496
497
498
499
500
501

502
503
504
505
506
507
508
509

510
511
512
513
514
515
516
479
480
481
482
483
484
485

486

487
488
489
490
491
492

493
494
495
496
497
498
499
500

501
502
503
504
505
506
507
508







-
+
-






-
+







-
+







			char buffer[4];
			of_unichar_t character =
			    (swap ? OF_BSWAP16(string[i]) : string[i]);
			size_t len;

			/* Missing high surrogate */
			if ((character & 0xFC00) == 0xDC00)
				@throw [OFInvalidEncodingException
				@throw [OFInvalidEncodingException exception];
				    exceptionWithClass: [self class]];

			if ((character & 0xFC00) == 0xD800) {
				of_char16_t nextCharacter;

				if (length <= i + 1)
					@throw [OFInvalidEncodingException
					    exceptionWithClass: [self class]];
					    exception];

				nextCharacter = (swap
				    ? OF_BSWAP16(string[i + 1])
				    : string[i + 1]);

				if ((nextCharacter & 0xFC00) != 0xDC00)
					@throw [OFInvalidEncodingException
					    exceptionWithClass: [self class]];
					    exception];

				character = (((character & 0x3FF) << 10) |
				    (nextCharacter & 0x3FF)) + 0x10000;

				i++;
				_s->length--;
			}
527
528
529
530
531
532
533
534

535
536
537
538
539
540
541
542
519
520
521
522
523
524
525

526

527
528
529
530
531
532
533







-
+
-







				_s->isUTF8 = true;

				memcpy(_s->cString + j, buffer, len);
				j += len;

				break;
			default:
				@throw [OFInvalidEncodingException
				@throw [OFInvalidEncodingException exception];
				    exceptionWithClass: [self class]];
			}
		}

		_s->cString[j] = '\0';
		_s->cStringLength = j;

		@try {
594
595
596
597
598
599
600
601

602
603
604
605
606
607
608
609
585
586
587
588
589
590
591

592

593
594
595
596
597
598
599







-
+
-







				_s->isUTF8 = true;

				memcpy(_s->cString + j, buffer, len);
				j += len;

				break;
			default:
				@throw [OFInvalidEncodingException
				@throw [OFInvalidEncodingException exception];
				    exceptionWithClass: [self class]];
			}
		}

		_s->cString[j] = '\0';
		_s->cStringLength = j;

		@try {
626
627
628
629
630
631
632
633

634
635
636
637
638
639
640
641

642
643
644
645
646
647
648
649
650
651
652
653

654
655
656
657
658
659
660
661
616
617
618
619
620
621
622

623


624
625
626
627
628

629

630
631
632
633
634
635
636
637
638
639

640

641
642
643
644
645
646
647







-
+
-
-





-
+
-










-
+
-







	self = [super init];

	@try {
		char *tmp;
		int cStringLength;

		if (format == nil)
			@throw [OFInvalidArgumentException
			@throw [OFInvalidArgumentException exception];
			    exceptionWithClass: [self class]
				      selector: _cmd];

		_s = &_storage;

		if ((cStringLength = of_vasprintf(&tmp, [format UTF8String],
		    arguments)) == -1)
			@throw [OFInvalidFormatException
			@throw [OFInvalidFormatException exception];
			    exceptionWithClass: [self class]];

		_s->cStringLength = cStringLength;

		@try {
			switch (of_string_utf8_check(tmp, cStringLength,
			    &_s->length)) {
			case 1:
				_s->isUTF8 = true;
				break;
			case -1:
				@throw [OFInvalidEncodingException
				@throw [OFInvalidEncodingException exception];
				    exceptionWithClass: [self class]];
			}

			_s->cString = [self
			    allocMemoryWithSize: cStringLength + 1];
			memcpy(_s->cString, tmp, cStringLength + 1);
		} @finally {
			free(tmp);
743
744
745
746
747
748
749
750

751
752
753
754
755

756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773

774
775
776
777
778
779
780
781
729
730
731
732
733
734
735

736

737
738
739

740

741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756

757

758
759
760
761
762
763
764







-
+
-



-
+
-
















-
+
-







- (size_t)getCString: (char*)cString
	   maxLength: (size_t)maxLength
	    encoding: (of_string_encoding_t)encoding
{
	switch (encoding) {
	case OF_STRING_ENCODING_ASCII:
		if (_s->isUTF8)
			@throw [OFInvalidEncodingException
			@throw [OFInvalidEncodingException exception];
			    exceptionWithClass: [self class]];
		/* intentional fall-through */
	case OF_STRING_ENCODING_UTF_8:
		if (_s->cStringLength + 1 > maxLength)
			@throw [OFOutOfRangeException
			@throw [OFOutOfRangeException exception];
			    exceptionWithClass: [self class]];

		memcpy(cString, _s->cString, _s->cStringLength + 1);

		return _s->cStringLength;
	default:
		return [super getCString: cString
			       maxLength: maxLength
				encoding: encoding];
	}
}

- (const char*)cStringWithEncoding: (of_string_encoding_t)encoding
{
	switch (encoding) {
	case OF_STRING_ENCODING_ASCII:
		if (_s->isUTF8)
			@throw [OFInvalidEncodingException
			@throw [OFInvalidEncodingException exception];
			    exceptionWithClass: [self class]];
		/* intentional fall-through */
	case OF_STRING_ENCODING_UTF_8:
		return _s->cString;
	default:
		return [super cStringWithEncoding: encoding];
	}
}
840
841
842
843
844
845
846
847

848
849
850
851
852
853
854
855
856
823
824
825
826
827
828
829

830


831
832
833
834
835
836
837







-
+
-
-







	size_t otherCStringLength, minimumCStringLength;
	int compare;

	if (object == self)
		return OF_ORDERED_SAME;

	if (![object isKindOfClass: [OFString class]])
		@throw [OFInvalidArgumentException
		@throw [OFInvalidArgumentException exception];
		    exceptionWithClass: [self class]
			      selector: _cmd];

	otherString = (OFString*)object;
	otherCStringLength = [otherString UTF8StringLength];
	minimumCStringLength = (_s->cStringLength > otherCStringLength
	    ? otherCStringLength : _s->cStringLength);

	if ((compare = memcmp(_s->cString, [otherString UTF8String],
874
875
876
877
878
879
880
881

882
883
884
885
886
887
888
889
890
855
856
857
858
859
860
861

862


863
864
865
866
867
868
869







-
+
-
-







	size_t i, j, otherCStringLength, minimumCStringLength;
	int compare;

	if (otherString == self)
		return OF_ORDERED_SAME;

	if (![otherString isKindOfClass: [OFString class]])
		@throw [OFInvalidArgumentException
		@throw [OFInvalidArgumentException exception];
		    exceptionWithClass: [self class]
			      selector: _cmd];

	otherCString = [otherString UTF8String];
	otherCStringLength = [otherString UTF8StringLength];

	if (!_s->isUTF8) {
		minimumCStringLength = (_s->cStringLength > otherCStringLength
		    ? otherCStringLength : _s->cStringLength);
912
913
914
915
916
917
918
919

920
921
922
923
924
925
926
927
891
892
893
894
895
896
897

898

899
900
901
902
903
904
905







-
+
-








		l1 = of_string_utf8_decode(_s->cString + i,
		    _s->cStringLength - i, &c1);
		l2 = of_string_utf8_decode(otherCString + j,
		    otherCStringLength - j, &c2);

		if (l1 == 0 || l2 == 0 || c1 > 0x10FFFF || c2 > 0x10FFFF)
			@throw [OFInvalidEncodingException
			@throw [OFInvalidEncodingException exception];
			    exceptionWithClass: [self class]];

		if (c1 >> 8 < OF_UNICODE_CASEFOLDING_TABLE_SIZE) {
			of_unichar_t tc =
			    of_unicode_casefolding_table[c1 >> 8][c1 & 0xFF];

			if (tc)
				c1 = tc;
964
965
966
967
968
969
970
971

972
973
974
975
976
977
978
979
942
943
944
945
946
947
948

949

950
951
952
953
954
955
956







-
+
-








	for (i = 0; i < _s->cStringLength; i++) {
		of_unichar_t c;
		size_t length;

		if ((length = of_string_utf8_decode(_s->cString + i,
		    _s->cStringLength - i, &c)) == 0)
			@throw [OFInvalidEncodingException
			@throw [OFInvalidEncodingException exception];
			    exceptionWithClass: [self class]];

		OF_HASH_ADD(hash, (c & 0xFF0000) >> 16);
		OF_HASH_ADD(hash, (c & 0x00FF00) >>  8);
		OF_HASH_ADD(hash,  c & 0x0000FF);

		i += length - 1;
	}
987
988
989
990
991
992
993
994

995
996
997
998
999
1000
1001
1002
1003
1004

1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019

1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037

1038
1039
1040
1041
1042
1043
1044
964
965
966
967
968
969
970

971
972
973
974
975
976
977
978
979
980

981

982
983
984
985
986
987
988
989
990
991
992
993
994

995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012

1013
1014
1015
1016
1017
1018
1019
1020







-
+









-
+
-













-
+

















-
+







}

- (of_unichar_t)characterAtIndex: (size_t)index
{
	of_unichar_t character;

	if (index >= _s->length)
		@throw [OFOutOfRangeException exceptionWithClass: [self class]];
		@throw [OFOutOfRangeException exception];

	if (!_s->isUTF8)
		return _s->cString[index];

	index = of_string_utf8_get_position(_s->cString, index,
	    _s->cStringLength);

	if (!of_string_utf8_decode(_s->cString + index,
	    _s->cStringLength - index, &character))
		@throw [OFInvalidEncodingException
		@throw [OFInvalidEncodingException exception];
		    exceptionWithClass: [self class]];

	return character;
}

- (void)getCharacters: (of_unichar_t*)buffer
	      inRange: (of_range_t)range
{
	/* TODO: Could be slightly optimized */
	void *pool = objc_autoreleasePoolPush();
	const of_unichar_t *characters = [self characters];

	if (range.length > SIZE_MAX - range.location ||
	    range.location + range.length > _s->length)
		@throw [OFOutOfRangeException exceptionWithClass: [self class]];
		@throw [OFOutOfRangeException exception];

	memcpy(buffer, characters + range.location,
	    range.length * sizeof(of_unichar_t));

	objc_autoreleasePoolPop(pool);
}

- (of_range_t)rangeOfString: (OFString*)string
		    options: (int)options
		      range: (of_range_t)range
{
	const char *cString = [string UTF8String];
	size_t i, cStringLength = [string UTF8StringLength];
	size_t rangeLocation, rangeLength;

	if (range.length > SIZE_MAX - range.location ||
	    range.location + range.length > _s->length)
		@throw [OFOutOfRangeException exceptionWithClass: [self class]];
		@throw [OFOutOfRangeException exception];

	if (_s->isUTF8) {
		rangeLocation = of_string_utf8_get_position(
		    _s->cString, range.location, _s->cStringLength);
		rangeLength = of_string_utf8_get_position(
		    _s->cString + rangeLocation, range.length,
		    _s->cStringLength - rangeLocation);
1104
1105
1106
1107
1108
1109
1110
1111

1112
1113
1114
1115
1116
1117
1118
1080
1081
1082
1083
1084
1085
1086

1087
1088
1089
1090
1091
1092
1093
1094







-
+








- (OFString*)substringWithRange: (of_range_t)range
{
	size_t start = range.location;
	size_t end = range.location + range.length;

	if (range.length > SIZE_MAX - range.location || end > _s->length)
		@throw [OFOutOfRangeException exceptionWithClass: [self class]];
		@throw [OFOutOfRangeException exception];

	if (_s->isUTF8) {
		start = of_string_utf8_get_position(_s->cString, start,
		    _s->cStringLength);
		end = of_string_utf8_get_position(_s->cString, end,
		    _s->cStringLength);
	}
1322
1323
1324
1325
1326
1327
1328
1329

1330
1331
1332
1333
1334
1335
1336
1337
1298
1299
1300
1301
1302
1303
1304

1305

1306
1307
1308
1309
1310
1311
1312







-
+
-







		of_unichar_t c;
		size_t cLen;

		cLen = of_string_utf8_decode(_s->cString + i,
		    _s->cStringLength - i, &c);

		if (cLen == 0 || c > 0x10FFFF)
			@throw [OFInvalidEncodingException
			@throw [OFInvalidEncodingException exception];
			    exceptionWithClass: [self class]];

		ret[j++] = c;
		i += cLen;
	}

	return ret;
}
1351
1352
1353
1354
1355
1356
1357
1358

1359
1360
1361
1362
1363
1364
1365
1366
1326
1327
1328
1329
1330
1331
1332

1333

1334
1335
1336
1337
1338
1339
1340







-
+
-







		of_unichar_t c;
		size_t cLen;

		cLen = of_string_utf8_decode(_s->cString + i,
		    _s->cStringLength - i, &c);

		if (cLen == 0 || c > 0x10FFFF)
			@throw [OFInvalidEncodingException
			@throw [OFInvalidEncodingException exception];
			    exceptionWithClass: [self class]];

		if (byteOrder != OF_BYTE_ORDER_NATIVE)
			ret[j++] = OF_BSWAP32(c);
		else
			ret[j++] = c;

		i += cLen;