ObjFW  Diff

Differences From Artifact [d68d151905]:

To Artifact [fee37530e3]:


78
79
80
81
82
83
84
85

86
87

88
89
90

91
92

93
94

95
96
97
98
99
100
101


102
103
104
105
106
107


108
109
110
111
112
113
114



115
116
117
118
119

120

121
122
123
124
125
126
127



128
129
130
131
132

133

134
135
136
137
138
139
140



141
142
143
144
145
146
147
148
149


150
151
152
153

154
155

156



157
158
159
160
161
162
163
78
79
80
81
82
83
84

85
86

87
88
89

90
91

92
93

94
95
96
97
98
99


100
101
102
103
104
105


106
107
108
109
110
111



112
113
114
115
116
117
118

119
120
121
122
123
124
125



126
127
128
129
130
131
132

133
134
135
136
137
138
139



140
141
142
143
144
145
146
147
148
149


150
151
152
153
154
155
156
157
158
159

160
161
162
163
164
165
166
167
168
169







-
+

-
+


-
+

-
+

-
+





-
-
+
+




-
-
+
+




-
-
-
+
+
+




-
+

+




-
-
-
+
+
+




-
+

+




-
-
-
+
+
+







-
-
+
+




+


+
-
+
+
+







			return OF_ORDERED_ASCENDING;
	}

	return OF_ORDERED_SAME;
}

int
of_string_check_utf8(const char *string, size_t length)
of_string_check_utf8(const char *cString, size_t cStringLength, size_t *length)
{
	size_t i;
	size_t i, tmpLength = cStringLength;
	int isUTF8 = 0;

	madvise((void*)string, length, MADV_SEQUENTIAL);
	madvise((void*)cString, cStringLength, MADV_SEQUENTIAL);

	for (i = 0; i < length; i++) {
	for (i = 0; i < cStringLength; i++) {
		/* No sign of UTF-8 here */
		if (OF_LIKELY(!(string[i] & 0x80)))
		if (OF_LIKELY(!(cString[i] & 0x80)))
			continue;

		isUTF8 = 1;

		/* We're missing a start byte here */
		if (OF_UNLIKELY(!(string[i] & 0x40))) {
			madvise((void*)string, length, MADV_NORMAL);
		if (OF_UNLIKELY(!(cString[i] & 0x40))) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/* 2 byte sequences for code points 0 - 127 are forbidden */
		if (OF_UNLIKELY((string[i] & 0x7E) == 0x40)) {
			madvise((void*)string, length, MADV_NORMAL);
		if (OF_UNLIKELY((cString[i] & 0x7E) == 0x40)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/* We have at minimum a 2 byte character -> check next byte */
		if (OF_UNLIKELY(length <= i + 1 ||
		    (string[i + 1] & 0xC0) != 0x80)) {
			madvise((void*)string, length, MADV_NORMAL);
		if (OF_UNLIKELY(cStringLength <= i + 1 ||
		    (cString[i + 1] & 0xC0) != 0x80)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/* Check if we have at minimum a 3 byte character */
		if (OF_LIKELY(!(string[i] & 0x20))) {
		if (OF_LIKELY(!(cString[i] & 0x20))) {
			i++;
			tmpLength--;
			continue;
		}

		/* We have at minimum a 3 byte char -> check second next byte */
		if (OF_UNLIKELY(length <= i + 2 ||
		    (string[i + 2] & 0xC0) != 0x80)) {
			madvise((void*)string, length, MADV_NORMAL);
		if (OF_UNLIKELY(cStringLength <= i + 2 ||
		    (cString[i + 2] & 0xC0) != 0x80)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/* Check if we have a 4 byte character */
		if (OF_LIKELY(!(string[i] & 0x10))) {
		if (OF_LIKELY(!(cString[i] & 0x10))) {
			i += 2;
			tmpLength -= 2;
			continue;
		}

		/* We have a 4 byte character -> check third next byte */
		if (OF_UNLIKELY(length <= i + 3 ||
		    (string[i + 3] & 0xC0) != 0x80)) {
			madvise((void*)string, length, MADV_NORMAL);
		if (OF_UNLIKELY(cStringLength <= i + 3 ||
		    (cString[i + 3] & 0xC0) != 0x80)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/*
		 * Just in case, check if there's a 5th character, which is
		 * forbidden by UTF-8
		 */
		if (OF_UNLIKELY(string[i] & 0x08)) {
			madvise((void*)string, length, MADV_NORMAL);
		if (OF_UNLIKELY(cString[i] & 0x08)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		i += 3;
		tmpLength -= 3;
	}

	madvise((void*)cString, cStringLength, MADV_NORMAL);
	madvise((void*)string, length, MADV_NORMAL);

	if (length != NULL)
		*length = tmpLength;

	return isUTF8;
}

size_t
of_string_unicode_to_utf8(of_unichar_t character, char *buffer)
{
472
473
474
475
476
477
478
479


480
481
482
483
484
485
486
487
488
489
490
491
492



493
494
495
496
497
498
499
478
479
480
481
482
483
484

485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509







-
+
+













+
+
+







		s = [self allocMemoryWithSize: sizeof(*s)];
		memset(s, 0, sizeof(*s));

		s->cString = [self allocMemoryWithSize: cStringLength + 1];
		s->cStringLength = cStringLength;

		if (encoding == OF_STRING_ENCODING_UTF_8) {
			switch (of_string_check_utf8(cString, cStringLength)) {
			switch (of_string_check_utf8(cString, cStringLength,
			    &s->length)) {
			case 1:
				s->isUTF8 = YES;
				break;
			case -1:
				@throw [OFInvalidEncodingException
				    newWithClass: isa];
			}

			memcpy(s->cString, cString, cStringLength);
			s->cString[cStringLength] = 0;

			return self;
		}

		/* All other encodings we support are single byte encodings */
		s->length = cStringLength;

		if (encoding == OF_STRING_ENCODING_ISO_8859_1) {
			for (i = j = 0; i < cStringLength; i++) {
				char buffer[4];
				size_t bytes;

				if (!(cString[i] & 0x80)) {
587
588
589
590
591
592
593




594
595

596
597
598

599
600
601
602
603
604
605
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612

613
614
615
616
617
618
619
620







+
+
+
+


+


-
+







{
	self = [super init];

	@try {
		s = [self allocMemoryWithSize: sizeof(*s)];
		memset(s, 0, sizeof(*s));

		/*
		 * We need one call to make sure it's initialized (in case it's
		 * a constant string).
		 */
		s->cStringLength = [string cStringLength];
		s->isUTF8 = string->s->isUTF8;
		s->length = string->s->length;

		s->cString = [self allocMemoryWithSize: s->cStringLength + 1];
		memcpy(s->cString, [string cString], s->cStringLength + 1);
		memcpy(s->cString, string->s->cString, s->cStringLength + 1);
	} @catch (id e) {
		[self release];
		@throw e;
	}

	return self;
}
648
649
650
651
652
653
654

655
656
657
658
659
660
661
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677







+







			swap = YES;

		s = [self allocMemoryWithSize: sizeof(*s)];
		memset(s, 0, sizeof(*s));

		s->cStringLength = length;
		s->cString = [self allocMemoryWithSize: (length * 4) + 1];
		s->length = length;

		for (i = 0; i < length; i++) {
			char buffer[4];
			size_t characterLen = of_string_unicode_to_utf8(
			    (swap ? of_bswap32(string[i]) : string[i]),
			    buffer);

754
755
756
757
758
759
760

761
762
763
764
765
766
767
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784







+







			swap = YES;

		s = [self allocMemoryWithSize: sizeof(*s)];
		memset(s, 0, sizeof(*s));

		s->cStringLength = length;
		s->cString = [self allocMemoryWithSize: (length * 4) + 1];
		s->length = length;

		for (i = 0; i < length; i++) {
			char buffer[4];
			of_unichar_t character =
			    (swap ? of_bswap16(string[i]) : string[i]);
			size_t characterLen;

781
782
783
784
785
786
787

788
789
790
791
792
793
794
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812







+







				    ? of_bswap16(string[i + 1])
				    : string[i + 1]);
				character = (((character & 0x3FF) << 10) |
				    (nextCharacter & 0x3FF)) + 0x10000;

				i++;
				s->cStringLength--;
				s->length--;
			}

			characterLen = of_string_unicode_to_utf8(
			    character, buffer);

			switch (characterLen) {
			case 1:
873
874
875
876
877
878
879
880

881
882
883
884
885
886
887
891
892
893
894
895
896
897

898
899
900
901
902
903
904
905







-
+







		    arguments)) == -1)
			@throw [OFInvalidFormatException newWithClass: isa];

		s->cStringLength = cStringLength;

		@try {
			switch (of_string_check_utf8(s->cString,
			    cStringLength)) {
			    cStringLength, &s->length)) {
			case 1:
				s->isUTF8 = YES;
				break;
			case -1:
				@throw [OFInvalidEncodingException
				    newWithClass: isa];
			}
926
927
928
929
930
931
932

933
934
935
936
937
938
939


940
941
942
943
944
945
946
944
945
946
947
948
949
950
951
952
953
954
955
956


957
958
959
960
961
962
963
964
965







+





-
-
+
+








		/*
		 * First needs to be a call to be sure it is initialized, in
		 * case it's a constant string.
		 */
		s->cStringLength = [firstComponent cStringLength];
		s->isUTF8 = firstComponent->s->isUTF8;
		s->length = firstComponent->s->length;

		/* Calculate length and see if we need UTF-8 */
		va_copy(argumentsCopy, arguments);
		while ((component = va_arg(argumentsCopy, OFString*)) != nil) {
			/* First needs to be a call, see above */
			cStringLength = [component cStringLength];
			s->cStringLength += 1 + cStringLength;
			s->cStringLength += 1 + [component cStringLength];
			s->length += 1 + component->s->length;

			if (component->s->isUTF8)
				s->isUTF8 = YES;
		}

		s->cString = [self allocMemoryWithSize: s->cStringLength + 1];

1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118

1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129


1130
1131
1132


1133


1134
1135
1136

1137
1138
1139
1140
1141
1142
1143
1127
1128
1129
1130
1131
1132
1133




1134


1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150

1151
1152
1153
1154

1155
1156
1157
1158
1159
1160
1161
1162







-
-
-
-
+
-
-









+
+



+
+
-
+
+


-
+







- (const char*)cString
{
	return s->cString;
}

- (size_t)length
{
	/* FIXME: Maybe cache this in an ivar? */

	if (!s->isUTF8)
		return s->cStringLength;
	return s->length;

	return of_string_position_to_index(s->cString, s->cStringLength);
}

- (size_t)cStringLength
{
	return s->cStringLength;
}

- (BOOL)isEqual: (id)object
{
	OFString *otherString;

	if (![object isKindOfClass: [OFString class]])
		return NO;

	otherString = object;

	if ([object cStringLength] != s->cStringLength)
	if ([otherString cStringLength] != s->cStringLength ||
	    otherString->s->length != s->length)
		return NO;

	if (strcmp(s->cString, [object cString]))
	if (strcmp(s->cString, otherString->s->cString))
		return NO;

	return YES;
}

- copy
{
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310


1311

1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1320
1321
1322
1323
1324
1325
1326



1327
1328
1329
1330
1331

1332
1333
1334
1335



1336
1337
1338
1339
1340
1341
1342







-
-
-
+
+

+

-




-
-
-







	return element;
}

- (of_unichar_t)characterAtIndex: (size_t)index
{
	of_unichar_t character;

	if (!s->isUTF8) {
		if (index >= s->cStringLength)
			@throw [OFOutOfRangeException newWithClass: isa];
	if (index >= s->length)
		@throw [OFOutOfRangeException newWithClass: isa];

	if (!s->isUTF8)
		return s->cString[index];
	}

	index = of_string_index_to_position(s->cString, index,
	    s->cStringLength);

	if (index >= s->cStringLength)
		@throw [OFOutOfRangeException newWithClass: isa];

	if (!of_string_utf8_to_unicode(s->cString + index,
	    s->cStringLength - index, &character))
		@throw [OFInvalidEncodingException newWithClass: isa];

	return character;
}

1364
1365
1366
1367
1368
1369
1370
1371

1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388







1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1379
1380
1381
1382
1383
1384
1385

1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417







1418
1419
1420
1421
1422
1423
1424







-
+

















+
+
+
+
+
+
+







-
-
-
-
-
-
-







			return OF_INVALID_INDEX;
	}
}

- (BOOL)containsString: (OFString*)string
{
	const char *cString = [string cString];
	size_t i, cStringLength = [string cStringLength];
	size_t i, cStringLength = string->s->cStringLength;

	if (cStringLength == 0)
		return YES;

	if (cStringLength > s->cStringLength)
		return NO;

	for (i = 0; i <= s->cStringLength - cStringLength; i++)
		if (!memcmp(s->cString + i, cString, cStringLength))
			return YES;

	return NO;
}

- (OFString*)substringFromIndex: (size_t)start
			toIndex: (size_t)end
{
	if (start > end)
		@throw [OFInvalidArgumentException newWithClass: isa
						       selector: _cmd];

	if (end > s->length)
		@throw [OFOutOfRangeException newWithClass: isa];

	if (s->isUTF8) {
		start = of_string_index_to_position(s->cString, start,
		    s->cStringLength);
		end = of_string_index_to_position(s->cString, end,
		    s->cStringLength);
	}

	if (start > end)
		@throw [OFInvalidArgumentException newWithClass: isa
						       selector: _cmd];

	if (end > s->cStringLength)
		@throw [OFOutOfRangeException newWithClass: isa];

	return [OFString stringWithCString: s->cString + start
				    length: end - start];
}

- (OFString*)substringWithRange: (of_range_t)range
{
	return [self substringFromIndex: range.start
1858
1859
1860
1861
1862
1863
1864
1865

1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879

1880
1881
1882
1883
1884
1885
1886
1887







-
+








- (of_unichar_t*)unicodeString
{
	OFObject *object = [[[OFObject alloc] init] autorelease];
	of_unichar_t *ret;
	size_t i, j;

	ret = [object allocMemoryForNItems: [self length] + 2
	ret = [object allocMemoryForNItems: s->length + 2
				  withSize: sizeof(of_unichar_t)];

	i = 0;
	j = 0;

	ret[j++] = 0xFEFF;