ObjFW  Check-in [5e2ef97c35]

Overview
Comment:Change of_string_utf8_decode() API

It now returns <= 0 on error, with negative values being the number of
bytes it would have needed * -1.

This can be used to detect cut off and how many bytes are missing.

Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 5e2ef97c35ba3911a76fe45afb629244b10f893f01be2e60163c1a82442548ce
User & Date: js on 2016-03-13 11:19:13
Other Links: manifest | tags
Context
2016-03-13
12:39
OFRunLoop: Tiny documentation improvement check-in: d4555b9c1a user: js tags: trunk
11:19
Change of_string_utf8_decode() API check-in: 5e2ef97c35 user: js tags: trunk
10:24
OFStdIOStream_Win32Console: Improve reading check-in: 566d4df603 user: js tags: trunk
Changes

Modified src/OFMutableString_UTF8.m from [3b7cce9749] to [f55475cd25].

105
106
107
108
109
110
111
112

113
114
115
116
117
118
119
120
121
122
123
124
125

126
127
128
129
130
131
132
105
106
107
108
109
110
111

112
113
114
115
116
117
118
119
120
121
122
123
124

125
126
127
128
129
130
131
132







-
+












-
+







	i = j = 0;
	newCStringLength = 0;

	while (i < _s->cStringLength) {
		const of_unichar_t *const *table;
		size_t tableSize;
		of_unichar_t c;
		size_t cLen;
		ssize_t cLen;

		if (isStart) {
			table = startTable;
			tableSize = middleTableSize;
		} else {
			table = middleTable;
			tableSize = middleTableSize;
		}

		cLen = of_string_utf8_decode(_s->cString + i,
		    _s->cStringLength - i, &c);

		if (cLen == 0 || c > 0x10FFFF) {
		if (cLen <= 0 || c > 0x10FFFF) {
			[self freeMemory: unicodeString];
			@throw [OFInvalidEncodingException exception];
		}

		switch (c) {
		case ' ':
		case '\t':
200
201
202
203
204
205
206
207


208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227

228
229
230
231
232

233
234

235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250

251
252
253
254
255
256
257
200
201
202
203
204
205
206

207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227

228
229
230
231
232

233
234

235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250

251
252
253
254
255
256
257
258







-
+
+



















-
+




-
+

-
+















-
+







}

- (void)setCharacter: (of_unichar_t)character
	     atIndex: (size_t)index
{
	char buffer[4];
	of_unichar_t c;
	size_t lenNew, lenOld;
	size_t lenNew;
	ssize_t lenOld;

	if (_s->isUTF8)
		index = of_string_utf8_get_position(_s->cString, index,
		    _s->cStringLength);

	if (index > _s->cStringLength)
		@throw [OFOutOfRangeException exception];

	/* Shortcut if old and new character both are ASCII */
	if (!(character & 0x80) && !(_s->cString[index] & 0x80)) {
		_s->hashed = false;
		_s->cString[index] = character;
		return;
	}

	if ((lenNew = of_string_utf8_encode(character, buffer)) == 0)
		@throw [OFInvalidEncodingException exception];

	if ((lenOld = of_string_utf8_decode(_s->cString + index,
	    _s->cStringLength - index, &c)) == 0)
	    _s->cStringLength - index, &c)) <= 0)
		@throw [OFInvalidEncodingException exception];

	_s->hashed = false;

	if (lenNew == lenOld)
	if (lenNew == (size_t)lenOld)
		memcpy(_s->cString + index, buffer, lenNew);
	else if (lenNew > lenOld) {
	else if (lenNew > (size_t)lenOld) {
		_s->cString = [self resizeMemory: _s->cString
					    size: _s->cStringLength -
						  lenOld + lenNew + 1];

		memmove(_s->cString + index + lenNew,
		    _s->cString + index + lenOld,
		    _s->cStringLength - index - lenOld);
		memcpy(_s->cString + index, buffer, lenNew);

		_s->cStringLength -= lenOld;
		_s->cStringLength += lenNew;
		_s->cString[_s->cStringLength] = '\0';

		if (character & 0x80)
			_s->isUTF8 = true;
	} else if (lenNew < lenOld) {
	} else if (lenNew < (size_t)lenOld) {
		memmove(_s->cString + index + lenNew,
		    _s->cString + index + lenOld,
		    _s->cStringLength - index - lenOld);
		memcpy(_s->cString + index, buffer, lenNew);

		_s->cStringLength -= lenOld;
		_s->cStringLength += lenNew;

Modified src/OFStdIOStream_Win32Console.m from [88dea4ced5] to [f55d77f8b8].

39
40
41
42
43
44
45

46
47
48
49
50
51
52
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53







+








#define OF_STDIO_STREAM_WIN32_CONSOLE_M

#include "config.h"

#import "OFStdIOStream_Win32Console.h"
#import "OFStdIOStream+Private.h"
#import "OFString.h"
#import "OFDataArray.h"

#import "OFInvalidArgumentException.h"
#import "OFInvalidEncodingException.h"
#import "OFOutOfRangeException.h"
#import "OFReadFailedException.h"
#import "OFWriteFailedException.h"
219
220
221
222
223
224
225
226

227
228

229
230
231

232
233
234
235
236
237
238
239
240
241

242
243
244
245
246
247
248
249
250
251
252
253
220
221
222
223
224
225
226

227
228

229
230
231

232
233
234
235
236
237
238
239
240
241

242
243
244
245
246
247
248
249
250
251
252
253
254







-
+

-
+


-
+









-
+












				  count: length * 2];
	@try {
		size_t i = 0, j = 0;
		DWORD written;

		while (i < length) {
			of_unichar_t c;
			size_t cLen;
			size_t UTF8Len;

			cLen = of_string_utf8_decode(buffer + i, length - i,
			UTF8Len = of_string_utf8_decode(buffer + i, length - i,
			    &c);

			if (cLen == 0 || c > 0x10FFFF)
			if (UTF8Len <= 0 || c > 0x10FFFF)
				@throw [OFInvalidEncodingException exception];

			if (c > 0xFFFF) {
				c -= 0x10000;
				tmp[j++] = 0xD800 | (c >> 10);
				tmp[j++] = 0xDC00 | (c & 0x3FF);
			} else
				tmp[j++] = c;

			i += cLen;
			i += UTF8Len;
		}

		if (!WriteConsoleW(_handle, tmp, j, &written, NULL) ||
		    written != j)
			@throw [OFWriteFailedException
			    exceptionWithObject: self
				requestedLength: j];
	} @finally {
		[self freeMemory: tmp];
	}
}
@end

Modified src/OFString.h from [a580fc1ea1] to [f6c039afa4].

1096
1097
1098
1099
1100
1101
1102
1103

1104
1105
1106
1107
1108
1109
1110
1096
1097
1098
1099
1100
1101
1102

1103
1104
1105
1106
1107
1108
1109
1110







-
+







#endif
@end

#ifdef __cplusplus
extern "C" {
#endif
extern size_t of_string_utf8_encode(of_unichar_t, char*);
extern size_t of_string_utf8_decode(const char*, size_t, of_unichar_t*);
extern ssize_t of_string_utf8_decode(const char*, size_t, of_unichar_t*);
extern size_t of_string_utf16_length(const of_char16_t*);
extern size_t of_string_utf32_length(const of_char32_t*);
#ifdef __cplusplus
}
#endif

OF_ASSUME_NONNULL_END

Modified src/OFString.m from [e3e6e72034] to [b713fffc4b].

124
125
126
127
128
129
130
131

132
133

134
135
136
137
138
139
140
141
142
143

144
145
146
147
148
149
150
151

152
153
154
155
156
157
158
159
160

161
162
163
164
165
166
167
124
125
126
127
128
129
130

131
132
133
134
135
136
137
138
139
140
141
142
143

144
145
146
147
148
149
150
151

152
153
154
155
156
157
158
159
160

161
162
163
164
165
166
167
168







-
+


+









-
+







-
+








-
+







		buffer[i] = 0x80 | (character & 0x3F);
		return 4;
	}

	return 0;
}

size_t
ssize_t
of_string_utf8_decode(const char *buffer_, size_t length, of_unichar_t *ret)
{
	/* FIXME: Check if the following bytes are indeed surrogates */
	const uint8_t *buffer = (const uint8_t*)buffer_;

	if (!(*buffer & 0x80)) {
		*ret = buffer[0];
		return 1;
	}

	if ((*buffer & 0xE0) == 0xC0) {
		if OF_UNLIKELY (length < 2)
			return 0;
			return -2;

		*ret = ((buffer[0] & 0x1F) << 6) | (buffer[1] & 0x3F);
		return 2;
	}

	if ((*buffer & 0xF0) == 0xE0) {
		if OF_UNLIKELY (length < 3)
			return 0;
			return -3;

		*ret = ((buffer[0] & 0x0F) << 12) | ((buffer[1] & 0x3F) << 6) |
		    (buffer[2] & 0x3F);
		return 3;
	}

	if ((*buffer & 0xF8) == 0xF0) {
		if OF_UNLIKELY (length < 4)
			return 0;
			return -4;

		*ret = ((buffer[0] & 0x07) << 18) | ((buffer[1] & 0x3F) << 12) |
		    ((buffer[2] & 0x3F) << 6) | (buffer[3] & 0x3F);
		return 4;
	}

	return 0;

Modified src/OFString_UTF8.m from [f9779f2392] to [c460dbfe91].

804
805
806
807
808
809
810
811

812
813
814
815
816
817
818

819
820
821
822
823
824
825
804
805
806
807
808
809
810

811
812
813
814
815
816
817

818
819
820
821
822
823
824
825







-
+






-
+







			return OF_ORDERED_ASCENDING;
	}

	i = j = 0;

	while (i < _s->cStringLength && j < otherCStringLength) {
		of_unichar_t c1, c2;
		size_t l1, l2;
		ssize_t l1, l2;

		l1 = of_string_utf8_decode(_s->cString + i,
		    _s->cStringLength - i, &c1);
		l2 = of_string_utf8_decode(otherCString + j,
		    otherCStringLength - j, &c2);

		if (l1 == 0 || l2 == 0 || c1 > 0x10FFFF || c2 > 0x10FFFF)
		if (l1 <= 0 || l2 <= 0 || c1 > 0x10FFFF || c2 > 0x10FFFF)
			@throw [OFInvalidEncodingException exception];

		if (c1 >> 8 < OF_UNICODE_CASEFOLDING_TABLE_SIZE) {
			of_unichar_t tc =
			    of_unicode_casefolding_table[c1 >> 8][c1 & 0xFF];

			if (tc)
858
859
860
861
862
863
864
865

866
867
868

869
870
871
872
873
874
875
858
859
860
861
862
863
864

865
866
867

868
869
870
871
872
873
874
875







-
+


-
+







	if (_s->hashed)
		return _s->hash;

	OF_HASH_INIT(hash);

	for (size_t i = 0; i < _s->cStringLength; i++) {
		of_unichar_t c;
		size_t length;
		ssize_t length;

		if ((length = of_string_utf8_decode(_s->cString + i,
		    _s->cStringLength - i, &c)) == 0)
		    _s->cStringLength - i, &c)) <= 0)
			@throw [OFInvalidEncodingException exception];

		OF_HASH_ADD(hash, (c & 0xFF0000) >> 16);
		OF_HASH_ADD(hash, (c & 0x00FF00) >>  8);
		OF_HASH_ADD(hash,  c & 0x0000FF);

		i += length - 1;
892
893
894
895
896
897
898
899
900


901
902
903
904
905
906
907
892
893
894
895
896
897
898


899
900
901
902
903
904
905
906
907







-
-
+
+








	if (!_s->isUTF8)
		return _s->cString[index];

	index = of_string_utf8_get_position(_s->cString, index,
	    _s->cStringLength);

	if (!of_string_utf8_decode(_s->cString + index,
	    _s->cStringLength - index, &character))
	if (of_string_utf8_decode(_s->cString + index,
	    _s->cStringLength - index, &character) <= 0)
		@throw [OFInvalidEncodingException exception];

	return character;
}

- (void)getCharacters: (of_unichar_t*)buffer
	      inRange: (of_range_t)range
1188
1189
1190
1191
1192
1193
1194
1195

1196
1197
1198
1199
1200

1201
1202
1203
1204
1205
1206
1207
1188
1189
1190
1191
1192
1193
1194

1195
1196
1197
1198
1199

1200
1201
1202
1203
1204
1205
1206
1207







-
+




-
+







	ret = [object allocMemoryWithSize: sizeof(of_unichar_t)
				    count: _s->length];

	i = j = 0;

	while (i < _s->cStringLength) {
		of_unichar_t c;
		size_t cLen;
		ssize_t cLen;

		cLen = of_string_utf8_decode(_s->cString + i,
		    _s->cStringLength - i, &c);

		if (cLen == 0 || c > 0x10FFFF)
		if (cLen <= 0 || c > 0x10FFFF)
			@throw [OFInvalidEncodingException exception];

		ret[j++] = c;
		i += cLen;
	}

	return ret;
1216
1217
1218
1219
1220
1221
1222
1223

1224
1225
1226
1227
1228

1229
1230
1231
1232
1233
1234
1235
1216
1217
1218
1219
1220
1221
1222

1223
1224
1225
1226
1227

1228
1229
1230
1231
1232
1233
1234
1235







-
+




-
+







	ret = [object allocMemoryWithSize: sizeof(of_unichar_t)
				    count: _s->length + 1];

	i = j = 0;

	while (i < _s->cStringLength) {
		of_unichar_t c;
		size_t cLen;
		ssize_t cLen;

		cLen = of_string_utf8_decode(_s->cString + i,
		    _s->cStringLength - i, &c);

		if (cLen == 0 || c > 0x10FFFF)
		if (cLen <= 0 || c > 0x10FFFF)
			@throw [OFInvalidEncodingException exception];

		if (byteOrder != OF_BYTE_ORDER_NATIVE)
			ret[j++] = OF_BSWAP32(c);
		else
			ret[j++] = c;