ObjFW  Diff

Differences From Artifact [d68d151905]:

To Artifact [fee37530e3]:


78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

121
122
123
124
125
126
127
128
129
130
131
132
133

134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

154
155

156


157
158
159
160
161
162
163
			return OF_ORDERED_ASCENDING;
	}

	return OF_ORDERED_SAME;
}

int
of_string_check_utf8(const char *string, size_t length)
{
	size_t i;
	int isUTF8 = 0;

	madvise((void*)string, length, MADV_SEQUENTIAL);

	for (i = 0; i < length; i++) {
		/* No sign of UTF-8 here */
		if (OF_LIKELY(!(string[i] & 0x80)))
			continue;

		isUTF8 = 1;

		/* We're missing a start byte here */
		if (OF_UNLIKELY(!(string[i] & 0x40))) {
			madvise((void*)string, length, MADV_NORMAL);
			return -1;
		}

		/* 2 byte sequences for code points 0 - 127 are forbidden */
		if (OF_UNLIKELY((string[i] & 0x7E) == 0x40)) {
			madvise((void*)string, length, MADV_NORMAL);
			return -1;
		}

		/* We have at minimum a 2 byte character -> check next byte */
		if (OF_UNLIKELY(length <= i + 1 ||
		    (string[i + 1] & 0xC0) != 0x80)) {
			madvise((void*)string, length, MADV_NORMAL);
			return -1;
		}

		/* Check if we have at minimum a 3 byte character */
		if (OF_LIKELY(!(string[i] & 0x20))) {
			i++;

			continue;
		}

		/* We have at minimum a 3 byte char -> check second next byte */
		if (OF_UNLIKELY(length <= i + 2 ||
		    (string[i + 2] & 0xC0) != 0x80)) {
			madvise((void*)string, length, MADV_NORMAL);
			return -1;
		}

		/* Check if we have a 4 byte character */
		if (OF_LIKELY(!(string[i] & 0x10))) {
			i += 2;

			continue;
		}

		/* We have a 4 byte character -> check third next byte */
		if (OF_UNLIKELY(length <= i + 3 ||
		    (string[i + 3] & 0xC0) != 0x80)) {
			madvise((void*)string, length, MADV_NORMAL);
			return -1;
		}

		/*
		 * Just in case, check if there's a 5th character, which is
		 * forbidden by UTF-8
		 */
		if (OF_UNLIKELY(string[i] & 0x08)) {
			madvise((void*)string, length, MADV_NORMAL);
			return -1;
		}

		i += 3;

	}


	madvise((void*)string, length, MADV_NORMAL);



	return isUTF8;
}

size_t
of_string_unicode_to_utf8(of_unichar_t character, char *buffer)
{







|

|


|

|

|





|
|




|
|




|
|
|




|

>




|
|
|




|

>




|
|
|







|
|




>


>
|
>
>







78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
			return OF_ORDERED_ASCENDING;
	}

	return OF_ORDERED_SAME;
}

int
of_string_check_utf8(const char *cString, size_t cStringLength, size_t *length)
{
	size_t i, tmpLength = cStringLength;
	int isUTF8 = 0;

	madvise((void*)cString, cStringLength, MADV_SEQUENTIAL);

	for (i = 0; i < cStringLength; i++) {
		/* No sign of UTF-8 here */
		if (OF_LIKELY(!(cString[i] & 0x80)))
			continue;

		isUTF8 = 1;

		/* We're missing a start byte here */
		if (OF_UNLIKELY(!(cString[i] & 0x40))) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/* 2 byte sequences for code points 0 - 127 are forbidden */
		if (OF_UNLIKELY((cString[i] & 0x7E) == 0x40)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/* We have at minimum a 2 byte character -> check next byte */
		if (OF_UNLIKELY(cStringLength <= i + 1 ||
		    (cString[i + 1] & 0xC0) != 0x80)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/* Check if we have at minimum a 3 byte character */
		if (OF_LIKELY(!(cString[i] & 0x20))) {
			i++;
			tmpLength--;
			continue;
		}

		/* We have at minimum a 3 byte char -> check second next byte */
		if (OF_UNLIKELY(cStringLength <= i + 2 ||
		    (cString[i + 2] & 0xC0) != 0x80)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/* Check if we have a 4 byte character */
		if (OF_LIKELY(!(cString[i] & 0x10))) {
			i += 2;
			tmpLength -= 2;
			continue;
		}

		/* We have a 4 byte character -> check third next byte */
		if (OF_UNLIKELY(cStringLength <= i + 3 ||
		    (cString[i + 3] & 0xC0) != 0x80)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/*
		 * Just in case, check if there's a 5th character, which is
		 * forbidden by UTF-8
		 */
		if (OF_UNLIKELY(cString[i] & 0x08)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		i += 3;
		tmpLength -= 3;
	}

	madvise((void*)cString, cStringLength, MADV_NORMAL);

	if (length != NULL)
		*length = tmpLength;

	return isUTF8;
}

size_t
of_string_unicode_to_utf8(of_unichar_t character, char *buffer)
{
472
473
474
475
476
477
478
479

480
481
482
483
484
485
486
487
488
489
490
491
492



493
494
495
496
497
498
499
		s = [self allocMemoryWithSize: sizeof(*s)];
		memset(s, 0, sizeof(*s));

		s->cString = [self allocMemoryWithSize: cStringLength + 1];
		s->cStringLength = cStringLength;

		if (encoding == OF_STRING_ENCODING_UTF_8) {
			switch (of_string_check_utf8(cString, cStringLength)) {

			case 1:
				s->isUTF8 = YES;
				break;
			case -1:
				@throw [OFInvalidEncodingException
				    newWithClass: isa];
			}

			memcpy(s->cString, cString, cStringLength);
			s->cString[cStringLength] = 0;

			return self;
		}




		if (encoding == OF_STRING_ENCODING_ISO_8859_1) {
			for (i = j = 0; i < cStringLength; i++) {
				char buffer[4];
				size_t bytes;

				if (!(cString[i] & 0x80)) {







|
>













>
>
>







478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
		s = [self allocMemoryWithSize: sizeof(*s)];
		memset(s, 0, sizeof(*s));

		s->cString = [self allocMemoryWithSize: cStringLength + 1];
		s->cStringLength = cStringLength;

		if (encoding == OF_STRING_ENCODING_UTF_8) {
			switch (of_string_check_utf8(cString, cStringLength,
			    &s->length)) {
			case 1:
				s->isUTF8 = YES;
				break;
			case -1:
				@throw [OFInvalidEncodingException
				    newWithClass: isa];
			}

			memcpy(s->cString, cString, cStringLength);
			s->cString[cStringLength] = 0;

			return self;
		}

		/* All other encodings we support are single byte encodings */
		s->length = cStringLength;

		if (encoding == OF_STRING_ENCODING_ISO_8859_1) {
			for (i = j = 0; i < cStringLength; i++) {
				char buffer[4];
				size_t bytes;

				if (!(cString[i] & 0x80)) {
587
588
589
590
591
592
593




594
595

596
597
598
599
600
601
602
603
604
605
{
	self = [super init];

	@try {
		s = [self allocMemoryWithSize: sizeof(*s)];
		memset(s, 0, sizeof(*s));





		s->cStringLength = [string cStringLength];
		s->isUTF8 = string->s->isUTF8;


		s->cString = [self allocMemoryWithSize: s->cStringLength + 1];
		memcpy(s->cString, [string cString], s->cStringLength + 1);
	} @catch (id e) {
		[self release];
		@throw e;
	}

	return self;
}







>
>
>
>


>


|







597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
{
	self = [super init];

	@try {
		s = [self allocMemoryWithSize: sizeof(*s)];
		memset(s, 0, sizeof(*s));

		/*
		 * We need one call to make sure it's initialized (in case it's
		 * a constant string).
		 */
		s->cStringLength = [string cStringLength];
		s->isUTF8 = string->s->isUTF8;
		s->length = string->s->length;

		s->cString = [self allocMemoryWithSize: s->cStringLength + 1];
		memcpy(s->cString, string->s->cString, s->cStringLength + 1);
	} @catch (id e) {
		[self release];
		@throw e;
	}

	return self;
}
648
649
650
651
652
653
654

655
656
657
658
659
660
661
			swap = YES;

		s = [self allocMemoryWithSize: sizeof(*s)];
		memset(s, 0, sizeof(*s));

		s->cStringLength = length;
		s->cString = [self allocMemoryWithSize: (length * 4) + 1];


		for (i = 0; i < length; i++) {
			char buffer[4];
			size_t characterLen = of_string_unicode_to_utf8(
			    (swap ? of_bswap32(string[i]) : string[i]),
			    buffer);








>







663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
			swap = YES;

		s = [self allocMemoryWithSize: sizeof(*s)];
		memset(s, 0, sizeof(*s));

		s->cStringLength = length;
		s->cString = [self allocMemoryWithSize: (length * 4) + 1];
		s->length = length;

		for (i = 0; i < length; i++) {
			char buffer[4];
			size_t characterLen = of_string_unicode_to_utf8(
			    (swap ? of_bswap32(string[i]) : string[i]),
			    buffer);

754
755
756
757
758
759
760

761
762
763
764
765
766
767
			swap = YES;

		s = [self allocMemoryWithSize: sizeof(*s)];
		memset(s, 0, sizeof(*s));

		s->cStringLength = length;
		s->cString = [self allocMemoryWithSize: (length * 4) + 1];


		for (i = 0; i < length; i++) {
			char buffer[4];
			of_unichar_t character =
			    (swap ? of_bswap16(string[i]) : string[i]);
			size_t characterLen;








>







770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
			swap = YES;

		s = [self allocMemoryWithSize: sizeof(*s)];
		memset(s, 0, sizeof(*s));

		s->cStringLength = length;
		s->cString = [self allocMemoryWithSize: (length * 4) + 1];
		s->length = length;

		for (i = 0; i < length; i++) {
			char buffer[4];
			of_unichar_t character =
			    (swap ? of_bswap16(string[i]) : string[i]);
			size_t characterLen;

781
782
783
784
785
786
787

788
789
790
791
792
793
794
				    ? of_bswap16(string[i + 1])
				    : string[i + 1]);
				character = (((character & 0x3FF) << 10) |
				    (nextCharacter & 0x3FF)) + 0x10000;

				i++;
				s->cStringLength--;

			}

			characterLen = of_string_unicode_to_utf8(
			    character, buffer);

			switch (characterLen) {
			case 1:







>







798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
				    ? of_bswap16(string[i + 1])
				    : string[i + 1]);
				character = (((character & 0x3FF) << 10) |
				    (nextCharacter & 0x3FF)) + 0x10000;

				i++;
				s->cStringLength--;
				s->length--;
			}

			characterLen = of_string_unicode_to_utf8(
			    character, buffer);

			switch (characterLen) {
			case 1:
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
		    arguments)) == -1)
			@throw [OFInvalidFormatException newWithClass: isa];

		s->cStringLength = cStringLength;

		@try {
			switch (of_string_check_utf8(s->cString,
			    cStringLength)) {
			case 1:
				s->isUTF8 = YES;
				break;
			case -1:
				@throw [OFInvalidEncodingException
				    newWithClass: isa];
			}







|







891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
		    arguments)) == -1)
			@throw [OFInvalidFormatException newWithClass: isa];

		s->cStringLength = cStringLength;

		@try {
			switch (of_string_check_utf8(s->cString,
			    cStringLength, &s->length)) {
			case 1:
				s->isUTF8 = YES;
				break;
			case -1:
				@throw [OFInvalidEncodingException
				    newWithClass: isa];
			}
926
927
928
929
930
931
932

933
934
935
936
937
938
939
940
941
942
943
944
945
946

		/*
		 * First needs to be a call to be sure it is initialized, in
		 * case it's a constant string.
		 */
		s->cStringLength = [firstComponent cStringLength];
		s->isUTF8 = firstComponent->s->isUTF8;


		/* Calculate length and see if we need UTF-8 */
		va_copy(argumentsCopy, arguments);
		while ((component = va_arg(argumentsCopy, OFString*)) != nil) {
			/* First needs to be a call, see above */
			cStringLength = [component cStringLength];
			s->cStringLength += 1 + cStringLength;

			if (component->s->isUTF8)
				s->isUTF8 = YES;
		}

		s->cString = [self allocMemoryWithSize: s->cStringLength + 1];








>





|
|







944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965

		/*
		 * First needs to be a call to be sure it is initialized, in
		 * case it's a constant string.
		 */
		s->cStringLength = [firstComponent cStringLength];
		s->isUTF8 = firstComponent->s->isUTF8;
		s->length = firstComponent->s->length;

		/* Calculate length and see if we need UTF-8 */
		va_copy(argumentsCopy, arguments);
		while ((component = va_arg(argumentsCopy, OFString*)) != nil) {
			/* First needs to be a call, see above */
			s->cStringLength += 1 + [component cStringLength];
			s->length += 1 + component->s->length;

			if (component->s->isUTF8)
				s->isUTF8 = YES;
		}

		s->cString = [self allocMemoryWithSize: s->cStringLength + 1];

1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129


1130
1131
1132


1133

1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
- (const char*)cString
{
	return s->cString;
}

- (size_t)length
{
	/* FIXME: Maybe cache this in an ivar? */

	if (!s->isUTF8)
		return s->cStringLength;

	return of_string_position_to_index(s->cString, s->cStringLength);
}

- (size_t)cStringLength
{
	return s->cStringLength;
}

- (BOOL)isEqual: (id)object
{


	if (![object isKindOfClass: [OFString class]])
		return NO;



	if ([object cStringLength] != s->cStringLength)

		return NO;

	if (strcmp(s->cString, [object cString]))
		return NO;

	return YES;
}

- copy
{







<
<
<
|
<
<









>
>



>
>
|
>


|







1127
1128
1129
1130
1131
1132
1133



1134


1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
- (const char*)cString
{
	return s->cString;
}

- (size_t)length
{



	return s->length;


}

- (size_t)cStringLength
{
	return s->cStringLength;
}

- (BOOL)isEqual: (id)object
{
	OFString *otherString;

	if (![object isKindOfClass: [OFString class]])
		return NO;

	otherString = object;

	if ([otherString cStringLength] != s->cStringLength ||
	    otherString->s->length != s->length)
		return NO;

	if (strcmp(s->cString, otherString->s->cString))
		return NO;

	return YES;
}

- copy
{
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311

1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
	return element;
}

- (of_unichar_t)characterAtIndex: (size_t)index
{
	of_unichar_t character;

	if (!s->isUTF8) {
		if (index >= s->cStringLength)
			@throw [OFOutOfRangeException newWithClass: isa];


		return s->cString[index];
	}

	index = of_string_index_to_position(s->cString, index,
	    s->cStringLength);

	if (index >= s->cStringLength)
		@throw [OFOutOfRangeException newWithClass: isa];

	if (!of_string_utf8_to_unicode(s->cString + index,
	    s->cStringLength - index, &character))
		@throw [OFInvalidEncodingException newWithClass: isa];

	return character;
}








<
|
|

>

<




<
<
<







1320
1321
1322
1323
1324
1325
1326

1327
1328
1329
1330
1331

1332
1333
1334
1335



1336
1337
1338
1339
1340
1341
1342
	return element;
}

- (of_unichar_t)characterAtIndex: (size_t)index
{
	of_unichar_t character;


	if (index >= s->length)
		@throw [OFOutOfRangeException newWithClass: isa];

	if (!s->isUTF8)
		return s->cString[index];


	index = of_string_index_to_position(s->cString, index,
	    s->cStringLength);




	if (!of_string_utf8_to_unicode(s->cString + index,
	    s->cStringLength - index, &character))
		@throw [OFInvalidEncodingException newWithClass: isa];

	return character;
}

1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388







1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
			return OF_INVALID_INDEX;
	}
}

- (BOOL)containsString: (OFString*)string
{
	const char *cString = [string cString];
	size_t i, cStringLength = [string cStringLength];

	if (cStringLength == 0)
		return YES;

	if (cStringLength > s->cStringLength)
		return NO;

	for (i = 0; i <= s->cStringLength - cStringLength; i++)
		if (!memcmp(s->cString + i, cString, cStringLength))
			return YES;

	return NO;
}

- (OFString*)substringFromIndex: (size_t)start
			toIndex: (size_t)end
{







	if (s->isUTF8) {
		start = of_string_index_to_position(s->cString, start,
		    s->cStringLength);
		end = of_string_index_to_position(s->cString, end,
		    s->cStringLength);
	}

	if (start > end)
		@throw [OFInvalidArgumentException newWithClass: isa
						       selector: _cmd];

	if (end > s->cStringLength)
		@throw [OFOutOfRangeException newWithClass: isa];

	return [OFString stringWithCString: s->cString + start
				    length: end - start];
}

- (OFString*)substringWithRange: (of_range_t)range
{
	return [self substringFromIndex: range.start







|

















>
>
>
>
>
>
>







<
<
<
<
<
<
<







1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417







1418
1419
1420
1421
1422
1423
1424
			return OF_INVALID_INDEX;
	}
}

- (BOOL)containsString: (OFString*)string
{
	const char *cString = [string cString];
	size_t i, cStringLength = string->s->cStringLength;

	if (cStringLength == 0)
		return YES;

	if (cStringLength > s->cStringLength)
		return NO;

	for (i = 0; i <= s->cStringLength - cStringLength; i++)
		if (!memcmp(s->cString + i, cString, cStringLength))
			return YES;

	return NO;
}

- (OFString*)substringFromIndex: (size_t)start
			toIndex: (size_t)end
{
	if (start > end)
		@throw [OFInvalidArgumentException newWithClass: isa
						       selector: _cmd];

	if (end > s->length)
		@throw [OFOutOfRangeException newWithClass: isa];

	if (s->isUTF8) {
		start = of_string_index_to_position(s->cString, start,
		    s->cStringLength);
		end = of_string_index_to_position(s->cString, end,
		    s->cStringLength);
	}








	return [OFString stringWithCString: s->cString + start
				    length: end - start];
}

- (OFString*)substringWithRange: (of_range_t)range
{
	return [self substringFromIndex: range.start
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872

- (of_unichar_t*)unicodeString
{
	OFObject *object = [[[OFObject alloc] init] autorelease];
	of_unichar_t *ret;
	size_t i, j;

	ret = [object allocMemoryForNItems: [self length] + 2
				  withSize: sizeof(of_unichar_t)];

	i = 0;
	j = 0;

	ret[j++] = 0xFEFF;








|







1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887

- (of_unichar_t*)unicodeString
{
	OFObject *object = [[[OFObject alloc] init] autorelease];
	of_unichar_t *ret;
	size_t i, j;

	ret = [object allocMemoryForNItems: s->length + 2
				  withSize: sizeof(of_unichar_t)];

	i = 0;
	j = 0;

	ret[j++] = 0xFEFF;