ObjFW  Check-in [bd1bd1460b]

Overview
Comment:Add -[characterAtIndex:] to OFString.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: bd1bd1460bd6bb01662b89ba29aad9bef69b4ad07e182e99115cbe4ca4e36817
User & Date: js on 2009-10-04 13:43:02
Other Links: manifest | tags
Context
2009-10-04
20:31
Fix symlinks to .so files for tests on OpenBSD. check-in: 80c29649b4 user: js tags: trunk
13:43
Add -[characterAtIndex:] to OFString. check-in: bd1bd1460b user: js tags: trunk
12:45
Add of_unichar_t type. check-in: 8b162c7122 user: js tags: trunk
Changes

Modified src/OFString.h from [560317e4d4] to [9823a943c7].

11
12
13
14
15
16
17


18
19
20
21
22
23
24
25
26
27
28

29
30
31
32
33
34
35

#include <stdio.h>
#include <stdarg.h>

#import "OFObject.h"
#import "OFArray.h"



typedef uint32_t of_unichar_t;

enum of_string_encoding {
	OF_STRING_ENCODING_UTF_8,
	OF_STRING_ENCODING_ISO_8859_1,
	OF_STRING_ENCODING_ISO_8859_15,
	OF_STRING_ENCODING_WINDOWS_1252
};

extern int of_string_check_utf8(const char*, size_t);
extern size_t of_string_unicode_to_utf8(of_unichar_t, char*);

extern size_t of_string_position_to_index(const char*, size_t);
extern size_t of_string_index_to_position(const char*, size_t, size_t);

/**
 * A class for managing strings.
 */
@interface OFString: OFObject <OFCopying, OFMutableCopying>







>
>











>







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38

#include <stdio.h>
#include <stdarg.h>

#import "OFObject.h"
#import "OFArray.h"

#define OF_INVALID_UNICHAR UINT32_MAX

typedef uint32_t of_unichar_t;

enum of_string_encoding {
	OF_STRING_ENCODING_UTF_8,
	OF_STRING_ENCODING_ISO_8859_1,
	OF_STRING_ENCODING_ISO_8859_15,
	OF_STRING_ENCODING_WINDOWS_1252
};

extern int of_string_check_utf8(const char*, size_t);
extern size_t of_string_unicode_to_utf8(of_unichar_t, char*);
extern of_unichar_t of_string_utf8_to_unicode(const char*, size_t);
extern size_t of_string_position_to_index(const char*, size_t);
extern size_t of_string_index_to_position(const char*, size_t, size_t);

/**
 * A class for managing strings.
 */
@interface OFString: OFObject <OFCopying, OFMutableCopying>
208
209
210
211
212
213
214






215
216
217
218
219
220
221
 *
 * \param obj An object to compare with
 * \return An integer which is the result of the comparison, see for example
 *	   strcmp
 */
- (int)compare: (id)obj;







/**
 * \param str The string to search
 * \return The index of the first occurrence of the string or SIZE_MAX if it
 *	   wasn't found
 */
- (size_t)indexOfFirstOccurrenceOfString: (OFString*)str;








>
>
>
>
>
>







211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
 *
 * \param obj An object to compare with
 * \return An integer which is the result of the comparison, see for example
 *	   strcmp
 */
- (int)compare: (id)obj;

/**
 * \param index The index of the Unicode character to return
 * \return The Unicode character at the specified index
 */
- (of_unichar_t)characterAtIndex: (size_t)index;

/**
 * \param str The string to search
 * \return The index of the first occurrence of the string or SIZE_MAX if it
 *	   wasn't found
 */
- (size_t)indexOfFirstOccurrenceOfString: (OFString*)str;

Modified src/OFString.m from [5d80701917] to [b88d3c3e4d].

135
136
137
138
139
140
141
































142
143
144
145
146
147
148
		buf[i++] = 0x80 | (c >> 6 & 0x3F);
		buf[i] = 0x80 | (c & 0x3F);
		return 4;
	}

	return 0;
}

































size_t
of_string_position_to_index(const char *str, size_t pos)
{
	size_t i, idx = pos;

	for (i = 0; i < pos; i++)







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
		buf[i++] = 0x80 | (c >> 6 & 0x3F);
		buf[i] = 0x80 | (c & 0x3F);
		return 4;
	}

	return 0;
}

of_unichar_t
of_string_utf8_to_unicode(const char *buf_, size_t len)
{
	const uint8_t *buf = (const uint8_t*)buf_;

	if (*buf < 0x80)
		return buf[0];

	switch (*buf & 0xF0) {
	case 0xC0:
	case 0xD0:
		if (OF_UNLIKELY(len < 2))
			return OF_INVALID_UNICHAR;

		return ((buf[0] & 0x1F) << 6) | (buf[1] & 0x3F);
	case 0xE0:
		if (OF_UNLIKELY(len < 3))
			return OF_INVALID_UNICHAR;

		return ((buf[0] & 0x0F) << 12) | ((buf[1] & 0x3F) << 6) |
		    (buf[2] & 0x3F);
	case 0xF0:
		if (OF_UNLIKELY(len < 4))
			return OF_INVALID_UNICHAR;

		return ((buf[0] & 0x07) << 18) | ((buf[1] & 0x3F) << 12) |
		    ((buf[2] & 0x3F) << 6) | (buf[3] & 0x3F);
	}

	return OF_INVALID_UNICHAR;
}

size_t
of_string_position_to_index(const char *str, size_t pos)
{
	size_t i, idx = pos;

	for (i = 0; i < pos; i++)
553
554
555
556
557
558
559
















560
561
562
563
564
565
566
	OF_HASH_INIT(hash);
	for (i = 0; i < length; i++)
		OF_HASH_ADD(hash, string[i]);
	OF_HASH_FINALIZE(hash);

	return hash;
}

















- (size_t)indexOfFirstOccurrenceOfString: (OFString*)str
{
	const char *str_c = [str cString];
	size_t str_len = [str cStringLength];
	size_t i;








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
	OF_HASH_INIT(hash);
	for (i = 0; i < length; i++)
		OF_HASH_ADD(hash, string[i]);
	OF_HASH_FINALIZE(hash);

	return hash;
}

- (of_unichar_t)characterAtIndex: (size_t)index
{
	of_unichar_t c;

	index = of_string_index_to_position(string, index, length);

	if (index >= length)
		@throw [OFOutOfRangeException newWithClass: isa];

	if ((c = of_string_utf8_to_unicode(string + index, length - index)) ==
	    OF_INVALID_UNICHAR)
		@throw [OFInvalidEncodingException newWithClass: isa];

	return c;
}

- (size_t)indexOfFirstOccurrenceOfString: (OFString*)str
{
	const char *str_c = [str cString];
	size_t str_len = [str cStringLength];
	size_t i;

Modified tests/string.m from [43cc839036] to [72abea7542].

62
63
64
65
66
67
68









69
70
71
72
73
74
75
	TEST(@"-[appendString:] and -[appendCString:]",
	    [s[1] appendCString: "1𝄞"] && [s[1] appendString: @"3"] &&
	    [[s[0] appendString: s[1]] isEqual: @"täs€1𝄞3"])

	TEST(@"-[length]", [s[0] length] == 7)
	TEST(@"-[cStringLength]", [s[0] cStringLength] == 13)
	TEST(@"-[hash]", [s[0] hash] == 0x8AC1EEF6)









	TEST(@"-[reverse]", [[s[0] reverse] isEqual: @"3𝄞1€sät"])

	s[0] = [OFMutableString stringWithString: @"321tset"];
	TEST(@"-[upper]", [[s[0] upper] isEqual: @"321TSET"])
	TEST(@"-[lower]", [[s[0] lower] isEqual: @"321tset"])

	TEST(@"+[stringWithCString:length:]",







>
>
>
>
>
>
>
>
>







62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
	TEST(@"-[appendString:] and -[appendCString:]",
	    [s[1] appendCString: "1𝄞"] && [s[1] appendString: @"3"] &&
	    [[s[0] appendString: s[1]] isEqual: @"täs€1𝄞3"])

	TEST(@"-[length]", [s[0] length] == 7)
	TEST(@"-[cStringLength]", [s[0] cStringLength] == 13)
	TEST(@"-[hash]", [s[0] hash] == 0x8AC1EEF6)

	TEST(@"-[characterAtIndex:]", [s[0] characterAtIndex: 0] == 't' &&
	    [s[0] characterAtIndex: 1] == 0xE4 &&
	    [s[0] characterAtIndex: 3] == 0x20AC &&
	    [s[0] characterAtIndex: 5] == 0x1D11E)

	EXPECT_EXCEPTION(@"Detect out of range in -[characterAtIndex:]",
	    OFOutOfRangeException, [s[0] characterAtIndex: 7])

	TEST(@"-[reverse]", [[s[0] reverse] isEqual: @"3𝄞1€sät"])

	s[0] = [OFMutableString stringWithString: @"321tset"];
	TEST(@"-[upper]", [[s[0] upper] isEqual: @"321TSET"])
	TEST(@"-[lower]", [[s[0] lower] isEqual: @"321tset"])

	TEST(@"+[stringWithCString:length:]",