ObjFW  Check-in [6cbbd7c39c]

Overview
Comment:Don't use madvise().

It turns out the overhead of calling madvise() is bigger than the gain
of using it.

Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 6cbbd7c39c35513ed77073c0ab5ae03c01ec7a0f5f62363418554da16dfd50fb
User & Date: js on 2011-10-11 21:55:19
Other Links: manifest | tags
Context
2011-10-11
22:13
Small optimization in OFXMLParser. check-in: 8523d20555 user: js tags: trunk
21:55
Don't use madvise(). check-in: 6cbbd7c39c user: js tags: trunk
21:37
Include stdlib.h instead of alloca.h. check-in: adf57d7dbf user: js tags: trunk
Changes

Modified configure.ac from [cccbc97d62] to [255ae73185].

557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574

		AC_MSG_RESULT($have_threadsafe_getaddrinfo)
	])
], [
	AC_MSG_RESULT(no)
])

AC_CHECK_FUNC(madvise, [
	AC_DEFINE(HAVE_MADVISE, 1, [Whether we have madvise])
])

AS_IF([test x"$objc_runtime" = x"Apple"], [
	AC_CHECK_HEADER(Foundation/NSObject.h, [
		AC_SUBST(FOUNDATION_COMPAT_M, "foundation-compat.m")
	])
])

AS_IF([test x"$GOBJC" = x"yes"], [







<
<
<
<







557
558
559
560
561
562
563




564
565
566
567
568
569
570

		AC_MSG_RESULT($have_threadsafe_getaddrinfo)
	])
], [
	AC_MSG_RESULT(no)
])





AS_IF([test x"$objc_runtime" = x"Apple"], [
	AC_CHECK_HEADER(Foundation/NSObject.h, [
		AC_SUBST(FOUNDATION_COMPAT_M, "foundation-compat.m")
	])
])

AS_IF([test x"$GOBJC" = x"yes"], [

Modified src/OFMutableString.m from [471b2f9466] to [a718ee6404].

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#ifdef HAVE_MADVISE
# include <sys/mman.h>
#else
# define madvise(addr, len, advise)
#endif

#import "OFString.h"
#import "OFAutoreleasePool.h"

#import "OFInvalidArgumentException.h"
#import "OFInvalidEncodingException.h"
#import "OFInvalidFormatException.h"
#import "OFOutOfMemoryException.h"







<
<
<
<
<
<







18
19
20
21
22
23
24






25
26
27
28
29
30
31

#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>







#import "OFString.h"
#import "OFAutoreleasePool.h"

#import "OFInvalidArgumentException.h"
#import "OFInvalidEncodingException.h"
#import "OFInvalidFormatException.h"
#import "OFOutOfMemoryException.h"
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
			  atIndex: 0];
}

- (void)reverse
{
	size_t i, j;

	madvise(s->cString, s->cStringLength, MADV_SEQUENTIAL);

	/* We reverse all bytes and restore UTF-8 later, if necessary */
	for (i = 0, j = s->cStringLength - 1; i < s->cStringLength / 2;
	    i++, j--) {
		s->cString[i] ^= s->cString[j];
		s->cString[j] ^= s->cString[i];
		s->cString[i] ^= s->cString[j];
	}

	if (!s->UTF8) {
		madvise(s->cString, s->cStringLength, MADV_NORMAL);
		return;
	}

	for (i = 0; i < s->cStringLength; i++) {
		/* ASCII */
		if (OF_LIKELY(!(s->cString[i] & 0x80)))
			continue;

		/* A start byte can't happen first as we reversed everything */
		if (OF_UNLIKELY(s->cString[i] & 0x40)) {
			madvise(s->cString, s->cStringLength, MADV_NORMAL);
			@throw [OFInvalidEncodingException
			    exceptionWithClass: isa];
		}

		/* Next byte must not be ASCII */
		if (OF_UNLIKELY(s->cStringLength < i + 1 ||
		    !(s->cString[i + 1] & 0x80))) {
			madvise(s->cString, s->cStringLength, MADV_NORMAL);
			@throw [OFInvalidEncodingException
			    exceptionWithClass: isa];
		}

		/* Next byte is the start byte */
		if (OF_LIKELY(s->cString[i + 1] & 0x40)) {
			s->cString[i] ^= s->cString[i + 1];
			s->cString[i + 1] ^= s->cString[i];
			s->cString[i] ^= s->cString[i + 1];

			i++;
			continue;
		}

		/* Second next byte must not be ASCII */
		if (OF_UNLIKELY(s->cStringLength < i + 2 ||
		    !(s->cString[i + 2] & 0x80))) {
			madvise(s->cString, s->cStringLength, MADV_NORMAL);
			@throw [OFInvalidEncodingException
			    exceptionWithClass: isa];
		}

		/* Second next byte is the start byte */
		if (OF_LIKELY(s->cString[i + 2] & 0x40)) {
			s->cString[i] ^= s->cString[i + 2];
			s->cString[i + 2] ^= s->cString[i];
			s->cString[i] ^= s->cString[i + 2];

			i += 2;
			continue;
		}

		/* Third next byte must not be ASCII */
		if (OF_UNLIKELY(s->cStringLength < i + 3 ||
		    !(s->cString[i + 3] & 0x80))) {
			madvise(s->cString, s->cStringLength, MADV_NORMAL);
			@throw [OFInvalidEncodingException
			    exceptionWithClass: isa];
		}

		/* Third next byte is the start byte */
		if (OF_LIKELY(s->cString[i + 3] & 0x40)) {
			s->cString[i] ^= s->cString[i + 3];
			s->cString[i + 3] ^= s->cString[i];
			s->cString[i] ^= s->cString[i + 3];

			s->cString[i + 1] ^= s->cString[i + 2];
			s->cString[i + 2] ^= s->cString[i + 1];
			s->cString[i + 1] ^= s->cString[i + 2];

			i += 3;
			continue;
		}

		/* UTF-8 does not allow more than 4 bytes per character */
		madvise(s->cString, s->cStringLength, MADV_NORMAL);
		@throw [OFInvalidEncodingException exceptionWithClass: isa];
	}

	madvise(s->cString, s->cStringLength, MADV_NORMAL);
}

- (void)upper
{
	[self _applyTable: of_unicode_upper_table
		 withSize: OF_UNICODE_UPPER_TABLE_SIZE];
}







<
<








|
<

<







|
<


<



|
<


<













|
<


<













|
<


<
















<


<
<







279
280
281
282
283
284
285


286
287
288
289
290
291
292
293
294

295

296
297
298
299
300
301
302
303

304
305

306
307
308
309

310
311

312
313
314
315
316
317
318
319
320
321
322
323
324
325

326
327

328
329
330
331
332
333
334
335
336
337
338
339
340
341

342
343

344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359

360
361


362
363
364
365
366
367
368
			  atIndex: 0];
}

- (void)reverse
{
	size_t i, j;



	/* We reverse all bytes and restore UTF-8 later, if necessary */
	for (i = 0, j = s->cStringLength - 1; i < s->cStringLength / 2;
	    i++, j--) {
		s->cString[i] ^= s->cString[j];
		s->cString[j] ^= s->cString[i];
		s->cString[i] ^= s->cString[j];
	}

	if (!s->UTF8)

		return;


	for (i = 0; i < s->cStringLength; i++) {
		/* ASCII */
		if (OF_LIKELY(!(s->cString[i] & 0x80)))
			continue;

		/* A start byte can't happen first as we reversed everything */
		if (OF_UNLIKELY(s->cString[i] & 0x40))

			@throw [OFInvalidEncodingException
			    exceptionWithClass: isa];


		/* Next byte must not be ASCII */
		if (OF_UNLIKELY(s->cStringLength < i + 1 ||
		    !(s->cString[i + 1] & 0x80)))

			@throw [OFInvalidEncodingException
			    exceptionWithClass: isa];


		/* Next byte is the start byte */
		if (OF_LIKELY(s->cString[i + 1] & 0x40)) {
			s->cString[i] ^= s->cString[i + 1];
			s->cString[i + 1] ^= s->cString[i];
			s->cString[i] ^= s->cString[i + 1];

			i++;
			continue;
		}

		/* Second next byte must not be ASCII */
		if (OF_UNLIKELY(s->cStringLength < i + 2 ||
		    !(s->cString[i + 2] & 0x80)))

			@throw [OFInvalidEncodingException
			    exceptionWithClass: isa];


		/* Second next byte is the start byte */
		if (OF_LIKELY(s->cString[i + 2] & 0x40)) {
			s->cString[i] ^= s->cString[i + 2];
			s->cString[i + 2] ^= s->cString[i];
			s->cString[i] ^= s->cString[i + 2];

			i += 2;
			continue;
		}

		/* Third next byte must not be ASCII */
		if (OF_UNLIKELY(s->cStringLength < i + 3 ||
		    !(s->cString[i + 3] & 0x80)))

			@throw [OFInvalidEncodingException
			    exceptionWithClass: isa];


		/* Third next byte is the start byte */
		if (OF_LIKELY(s->cString[i + 3] & 0x40)) {
			s->cString[i] ^= s->cString[i + 3];
			s->cString[i + 3] ^= s->cString[i];
			s->cString[i] ^= s->cString[i + 3];

			s->cString[i + 1] ^= s->cString[i + 2];
			s->cString[i + 2] ^= s->cString[i + 1];
			s->cString[i + 1] ^= s->cString[i + 2];

			i += 3;
			continue;
		}

		/* UTF-8 does not allow more than 4 bytes per character */

		@throw [OFInvalidEncodingException exceptionWithClass: isa];
	}


}

- (void)upper
{
	[self _applyTable: of_unicode_upper_table
		 withSize: OF_UNICODE_UPPER_TABLE_SIZE];
}

Modified src/OFString.m from [83cfd0201b] to [f8793e2293].

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <ctype.h>

#include <sys/stat.h>
#ifdef HAVE_MADVISE
# include <sys/mman.h>
#else
# define madvise(addr, len, advise)
#endif

#import "OFString.h"
#import "OFArray.h"
#import "OFDictionary.h"
#import "OFFile.h"
#import "OFURL.h"
#import "OFHTTPRequest.h"







<
<
<
<
<







20
21
22
23
24
25
26





27
28
29
30
31
32
33
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <ctype.h>

#include <sys/stat.h>






#import "OFString.h"
#import "OFArray.h"
#import "OFDictionary.h"
#import "OFFile.h"
#import "OFURL.h"
#import "OFHTTPRequest.h"
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168

int
of_string_check_utf8(const char *cString, size_t cStringLength, size_t *length)
{
	size_t i, tmpLength = cStringLength;
	int UTF8 = 0;

	madvise((void*)cString, cStringLength, MADV_SEQUENTIAL);

	for (i = 0; i < cStringLength; i++) {
		/* No sign of UTF-8 here */
		if (OF_LIKELY(!(cString[i] & 0x80)))
			continue;

		UTF8 = 1;

		/* We're missing a start byte here */
		if (OF_UNLIKELY(!(cString[i] & 0x40))) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/* 2 byte sequences for code points 0 - 127 are forbidden */
		if (OF_UNLIKELY((cString[i] & 0x7E) == 0x40)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/* We have at minimum a 2 byte character -> check next byte */
		if (OF_UNLIKELY(cStringLength <= i + 1 ||
		    (cString[i + 1] & 0xC0) != 0x80)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/* Check if we have at minimum a 3 byte character */
		if (OF_LIKELY(!(cString[i] & 0x20))) {
			i++;
			tmpLength--;
			continue;
		}

		/* We have at minimum a 3 byte char -> check second next byte */
		if (OF_UNLIKELY(cStringLength <= i + 2 ||
		    (cString[i + 2] & 0xC0) != 0x80)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/* Check if we have a 4 byte character */
		if (OF_LIKELY(!(cString[i] & 0x10))) {
			i += 2;
			tmpLength -= 2;
			continue;
		}

		/* We have a 4 byte character -> check third next byte */
		if (OF_UNLIKELY(cStringLength <= i + 3 ||
		    (cString[i + 3] & 0xC0) != 0x80)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		/*
		 * Just in case, check if there's a 5th character, which is
		 * forbidden by UTF-8
		 */
		if (OF_UNLIKELY(cString[i] & 0x08)) {
			madvise((void*)cString, cStringLength, MADV_NORMAL);
			return -1;
		}

		i += 3;
		tmpLength -= 3;
	}

	madvise((void*)cString, cStringLength, MADV_NORMAL);

	if (length != NULL)
		*length = tmpLength;

	return UTF8;
}

size_t







<
<








|
<

|
<

|
<

<



|
<

<










|
<

<










|
<

<





|
<

<





<
<







79
80
81
82
83
84
85


86
87
88
89
90
91
92
93
94

95
96

97
98

99

100
101
102
103

104

105
106
107
108
109
110
111
112
113
114
115

116

117
118
119
120
121
122
123
124
125
126
127

128

129
130
131
132
133
134

135

136
137
138
139
140


141
142
143
144
145
146
147

int
of_string_check_utf8(const char *cString, size_t cStringLength, size_t *length)
{
	size_t i, tmpLength = cStringLength;
	int UTF8 = 0;



	for (i = 0; i < cStringLength; i++) {
		/* No sign of UTF-8 here */
		if (OF_LIKELY(!(cString[i] & 0x80)))
			continue;

		UTF8 = 1;

		/* We're missing a start byte here */
		if (OF_UNLIKELY(!(cString[i] & 0x40)))

			return -1;


		/* 2 byte sequences for code points 0 - 127 are forbidden */
		if (OF_UNLIKELY((cString[i] & 0x7E) == 0x40))

			return -1;


		/* We have at minimum a 2 byte character -> check next byte */
		if (OF_UNLIKELY(cStringLength <= i + 1 ||
		    (cString[i + 1] & 0xC0) != 0x80))

			return -1;


		/* Check if we have at minimum a 3 byte character */
		if (OF_LIKELY(!(cString[i] & 0x20))) {
			i++;
			tmpLength--;
			continue;
		}

		/* We have at minimum a 3 byte char -> check second next byte */
		if (OF_UNLIKELY(cStringLength <= i + 2 ||
		    (cString[i + 2] & 0xC0) != 0x80))

			return -1;


		/* Check if we have a 4 byte character */
		if (OF_LIKELY(!(cString[i] & 0x10))) {
			i += 2;
			tmpLength -= 2;
			continue;
		}

		/* We have a 4 byte character -> check third next byte */
		if (OF_UNLIKELY(cStringLength <= i + 3 ||
		    (cString[i + 3] & 0xC0) != 0x80))

			return -1;


		/*
		 * Just in case, check if there's a 5th character, which is
		 * forbidden by UTF-8
		 */
		if (OF_UNLIKELY(cString[i] & 0x08))

			return -1;


		i += 3;
		tmpLength -= 3;
	}



	if (length != NULL)
		*length = tmpLength;

	return UTF8;
}

size_t