ObjFW  Diff

Differences From Artifact [db489edc3a]:

To Artifact [96b363e98b]:


1
2

3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49






50
51
52
53
54






















55
56
57
58
59
60
61
62

63
64
65
66
67
68





69
70
71
72
73
74
75
76
77
78





79
80
81
82
83
















84
85
86
87
88
89
90
91
92
93
94
95
96
97


98
99
100

101
102
103
104
105





106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132

133
134

135
136
137
138
139





140
141
142
143
144
145

146
147
148
149
150





151
152
153
154
155


















156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191

192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223

224
225
226
227


228
229
230
231
232




233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248


249
250
251
252
253




254
255
256
257
258
259
260


261


262
263
264
265
266
267
268
269
270
271
272
273
274
275


276
277
278
279

280
281

282
283
284
285
286




287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303



304
305

306
307
308



309
310
311
312
313
314
315
316

317
318
319
320
321
322
323
324
325
326
327
328
329
330
331


332
333
334
335
336
337



338
339

340
341
342



343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359



360
361

362
363
364



365
366
367
368
369
370
371
372

373
374
375
376
377
378
379
380
381
382
383
384
385
386
387


388
389
390
391
392
393



394
395

396
397
398



399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415



416
417

418
419
420



421
422
423
424
425
426
427
428


429
430
431
432
433
434
435












436
437
438
439
440








441
442

443
444
445
446
447
448
449
450
451
452

453







454
455
456


















457
458
459
460
461
462
463
464
465
466
467
468
469
470
471

472
473
474
475





476
477
478
479
480
481
482
483
484

485
486

487





488
489
490
491
492
493
494
1

2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17



























18
19
20
21
22
23
24
25
26
27
28





29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57

58
59
60




61
62
63
64
65
66
67
68
69
70
71




72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102









103
104
105
106

107
108




109
110
111
112
113
114
115
116
117
118
119
120
121















122
123
124

125
126

127
128




129
130
131
132
133
134
135
136
137
138

139
140




141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175





























176
































177
178
179


180
181
182




183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200


201
202
203




204
205
206
207
208
209
210
211
212


213
214
215
216
217
218













219
220
221
222


223


224
225




226
227
228
229

230
231
232
233
234
235
236
237
238
239
240
241
242



243
244
245
246

247
248


249
250
251
252
253
254
255
256
257
258

259
260
261













262
263
264
265
266



267
268
269
270

271
272


273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289



290
291
292
293

294
295


296
297
298
299
300
301
302
303
304
305

306
307
308













309
310
311
312
313



314
315
316
317

318
319


320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336



337
338
339
340

341
342


343
344
345
346
347
348
349
350
351


352
353
354






355
356
357
358
359
360
361
362
363
364
365
366





367
368
369
370
371
372
373
374
375

376
377
378
379
380
381
382
383
384
385

386
387
388
389
390
391
392
393
394



395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426

427
428



429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444

445
446
447
448
449
450
451
452
453
454
455
456
457
458

-
+















-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-





+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







-
+


-
-
-
-
+
+
+
+
+






-
-
-
-
+
+
+
+
+





+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+





-
-
-
-
-
-
-
-
-
+
+


-
+

-
-
-
-
+
+
+
+
+








-
-
-
-
-
-
-
-
-
-
-
-
-
-
-



-
+

-
+

-
-
-
-
+
+
+
+
+





-
+

-
-
-
-
+
+
+
+
+





+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+


-
-
+
+

-
-
-
-
+
+
+
+














-
-
+
+

-
-
-
-
+
+
+
+





-
-
+
+

+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
+
+


-
-
+
-
-
+

-
-
-
-
+
+
+
+
-













-
-
-
+
+
+

-
+

-
-
+
+
+







-
+


-
-
-
-
-
-
-
-
-
-
-
-
-
+
+



-
-
-
+
+
+

-
+

-
-
+
+
+














-
-
-
+
+
+

-
+

-
-
+
+
+







-
+


-
-
-
-
-
-
-
-
-
-
-
-
-
+
+



-
-
-
+
+
+

-
+

-
-
+
+
+














-
-
-
+
+
+

-
+

-
-
+
+
+






-
-
+
+

-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
+
+
+

-
+









-
+

+
+
+
+
+
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+














-
+

-
-
-
+
+
+
+
+









+

-
+

+
+
+
+
+







/*
 * Copyright (c) 2008-2022 Jonathan Schleifer <js@nil.im>
 * Copyright (c) 2008-2024 Jonathan Schleifer <js@nil.im>
 *
 * All rights reserved.
 *
 * This file is part of ObjFW. It may be distributed under the terms of the
 * Q Public License 1.0, which can be found in the file LICENSE.QPL included in
 * the packaging of this file.
 *
 * Alternatively, it may be distributed under the terms of the GNU General
 * Public License, either version 2 or 3, which can be found in the file
 * LICENSE.GPLv2 or LICENSE.GPLv3 respectively included in the packaging of this
 * file.
 */

OF_ASSUME_NONNULL_BEGIN

static OF_INLINE int
OFAtomicIntAdd(volatile int *_Nonnull p, int i)
{
	if (sizeof(int) == 4)
		__asm__ __volatile__ (
		    "lock\n\t"
		    "xaddl	%0, %2\n\t"
		    "addl	%1, %0"
		    : "+&r"(i)
		    : "r"(i), "m"(*p)
		);
#ifdef OF_X86_64
	else if (sizeof(int) == 8)
		__asm__ __volatile__ (
		    "lock\n\t"
		    "xaddq	%0, %2\n\t"
		    "addq	%1, %0"
		    : "+&r"(i)
		    : "r"(i), "m"(*p)
		);
#endif
	else
		abort();

	return i;
}

static OF_INLINE int32_t
OFAtomicInt32Add(volatile int32_t *_Nonnull p, int32_t i)
{
	__asm__ __volatile__ (
	    "lock\n\t"
	    "xadd{l}	{ %0, %2 | %2, %0 }\n\t"
	    "add{l}	{ %1, %0 | %0, %1 }"
	    : "+&r" (i)
	    : "r" (i),
	      "m" (*p)
	);
	    "xaddl	%0, %2\n\t"
	    "addl	%1, %0"
	    : "+&r"(i)
	    : "r"(i), "m"(*p)
	);

	return i;
}

static OF_INLINE int
OFAtomicIntAdd(volatile int *_Nonnull p, int i)
{
	if (sizeof(int) == 4)
		return OFAtomicInt32Add(p, i);
#ifdef OF_AMD64
	else if (sizeof(int) == 8)
		__asm__ __volatile__ (
		    "lock\n\t"
		    "xadd{q}	{ %0, %2 | %2, %0 }\n\t"
		    "add{q}	{ %1, %0 | %0, %1 }"
		    : "+&r" (i)
		    : "r" (i),
		      "m" (*p)
		);
#endif
	else
		abort();

	return i;
}

static OF_INLINE void *_Nullable
OFAtomicPointerAdd(void *volatile _Nullable *_Nonnull p, intptr_t i)
{
#if defined(OF_X86_64)
#if defined(OF_AMD64)
	__asm__ __volatile__ (
	    "lock\n\t"
	    "xaddq	%0, %2\n\t"
	    "addq	%1, %0"
	    : "+&r"(i)
	    : "r"(i), "m"(*p)
	    "xadd{q}	{ %0, %2 | %2, %0 }\n\t"
	    "add{q}	{ %1, %0 | %0, %1 }"
	    : "+&r" (i)
	    : "r" (i),
	      "m" (*p)
	);

	return (void *)i;
#elif defined(OF_X86)
	__asm__ __volatile__ (
	    "lock\n\t"
	    "xaddl	%0, %2\n\t"
	    "addl	%1, %0"
	    : "+&r"(i)
	    : "r"(i), "m"(*p)
	    "xadd{l}	{ %0, %2 | %2, %0 }\n\t"
	    "add{l}	{ %1, %0 | %0, %1 }"
	    : "+&r" (i)
	    : "r" (i),
	      "m" (*p)
	);

	return (void *)i;
#endif
}

static OF_INLINE int32_t
OFAtomicInt32Subtract(volatile int32_t *_Nonnull p, int32_t i)
{
	__asm__ __volatile__ (
	    "neg{l}	%0\n\t"
	    "lock\n\t"
	    "xadd{l}	{ %0, %2 | %2, %0 }\n\t"
	    "sub{l}	{ %1, %0 | %0, %1 }"
	    : "+&r" (i)
	    : "r" (i),
	      "m" (*p)
	);

	return i;
}

static OF_INLINE int
OFAtomicIntSubtract(volatile int *_Nonnull p, int i)
{
	if (sizeof(int) == 4)
		__asm__ __volatile__ (
		    "negl	%0\n\t"
		    "lock\n\t"
		    "xaddl	%0, %2\n\t"
		    "subl	%1, %0"
		    : "+&r"(i)
		    : "r"(i), "m"(*p)
		);
#ifdef OF_X86_64
		return OFAtomicInt32Subtract(p, i);
#ifdef OF_AMD64
	else if (sizeof(int) == 8)
		__asm__ __volatile__ (
		    "negq	%0\n\t"
		    "neg{q}	%0\n\t"
		    "lock\n\t"
		    "xaddq	%0, %2\n\t"
		    "subq	%1, %0"
		    : "+&r"(i)
		    : "r"(i), "m"(*p)
		    "xadd{q}	{ %0, %2 | %2, %0 }\n\t"
		    "sub{q}	{ %1, %0 | %0, %1 }"
		    : "+&r" (i)
		    : "r" (i),
		      "m" (*p)
		);
#endif
	else
		abort();

	return i;
}

static OF_INLINE int32_t
OFAtomicInt32Subtract(volatile int32_t *_Nonnull p, int32_t i)
{
	__asm__ __volatile__ (
	    "negl	%0\n\t"
	    "lock\n\t"
	    "xaddl	%0, %2\n\t"
	    "subl	%1, %0"
	    : "+&r"(i)
	    : "r"(i), "m"(*p)
	);

	return i;
}

static OF_INLINE void *_Nullable
OFAtomicPointerSubtract(void *volatile _Nullable *_Nonnull p, intptr_t i)
{
#if defined(OF_X86_64)
#if defined(OF_AMD64)
	__asm__ __volatile__ (
	    "negq	%0\n\t"
	    "neg{q}	%0\n\t"
	    "lock\n\t"
	    "xaddq	%0, %2\n\t"
	    "subq	%1, %0"
	    : "+&r"(i)
	    : "r"(i), "m"(*p)
	    "xadd{q}	{ %0, %2 | %2, %0 }\n\t"
	    "sub{q}	{ %1, %0 | %0, %1 }"
	    : "+&r" (i)
	    : "r" (i),
	      "m" (*p)
	);

	return (void *)i;
#elif defined(OF_X86)
	__asm__ __volatile__ (
	    "negl	%0\n\t"
	    "neg{l}	%0\n\t"
	    "lock\n\t"
	    "xaddl	%0, %2\n\t"
	    "subl	%1, %0"
	    : "+&r"(i)
	    : "r"(i), "m"(*p)
	    "xadd{l}	{ %0, %2 | %2, %0 }\n\t"
	    "sub{l}	{ %1, %0 | %0, %1 }"
	    : "+&r" (i)
	    : "r" (i),
	      "m" (*p)
	);

	return (void *)i;
#endif
}

static OF_INLINE int32_t
OFAtomicInt32Increase(volatile int32_t *_Nonnull p)
{
	int32_t i;

	__asm__ __volatile__ (
	    "xor{l}	%0, %0\n\t"
	    "inc{l}	%0\n\t"
	    "lock\n\t"
	    "xadd{l}	{ %0, %1 | %1, %0 }\n\t"
	    "inc{l}	%0"
	    : "=&r" (i)
	    : "m" (*p)
	);

	return i;
}

static OF_INLINE int
OFAtomicIntIncrease(volatile int *_Nonnull p)
{
	int i;

	if (sizeof(int) == 4)
		__asm__ __volatile__ (
		    "xorl	%0, %0\n\t"
		    "incl	%0\n\t"
		    "lock\n\t"
		    "xaddl	%0, %1\n\t"
		    "incl	%0"
		    : "=&r"(i)
		    : "m"(*p)
		);
#ifdef OF_X86_64
	else if (sizeof(int) == 8)
		__asm__ __volatile__ (
		    "xorq	%0, %0\n\t"
		    "incq	%0\n\t"
		    "lock\n\t"
		    "xaddq	%0, %1\n\t"
		    "incq	%0"
		    : "=&r"(i)
		    : "m"(*p)
		);
#endif
	else
		abort();

	return i;
}

static OF_INLINE int32_t
OFAtomicInt32Increase(volatile int32_t *_Nonnull p)
		return OFAtomicInt32Increase(p);
{
	int32_t i;

	__asm__ __volatile__ (
	    "xorl	%0, %0\n\t"
	    "incl	%0\n\t"
	    "lock\n\t"
	    "xaddl	%0, %1\n\t"
	    "incl	%0"
	    : "=&r"(i)
	    : "m"(*p)
	);

	return i;
}

static OF_INLINE int
OFAtomicIntDecrease(volatile int *_Nonnull p)
{
	int i;

	if (sizeof(int) == 4)
		__asm__ __volatile__ (
		    "xorl	%0, %0\n\t"
		    "decl	%0\n\t"
		    "lock\n\t"
		    "xaddl	%0, %1\n\t"
		    "decl	%0"
		    : "=&r"(i)
		    : "m"(*p)
		);
#ifdef OF_X86_64
#ifdef OF_AMD64
	else if (sizeof(int) == 8)
		__asm__ __volatile__ (
		    "xorq	%0, %0\n\t"
		    "decq	%0\n\t"
		    "xor{q}	%0, %0\n\t"
		    "inc{q}	%0\n\t"
		    "lock\n\t"
		    "xaddq	%0, %1\n\t"
		    "decq	%0"
		    : "=&r"(i)
		    : "m"(*p)
		    "xadd{q}	{ %0, %1 | %1, %0 }\n\t"
		    "inc{q}	%0"
		    : "=&r" (i)
		    : "m" (*p)
		);
#endif
	else
		abort();

	return i;
}

static OF_INLINE int32_t
OFAtomicInt32Decrease(volatile int32_t *_Nonnull p)
{
	int32_t i;

	__asm__ __volatile__ (
	    "xorl	%0, %0\n\t"
	    "decl	%0\n\t"
	    "xor{l}	%0, %0\n\t"
	    "dec{l}	%0\n\t"
	    "lock\n\t"
	    "xaddl	%0, %1\n\t"
	    "decl	%0"
	    : "=&r"(i)
	    : "m"(*p)
	    "xadd{l}	{ %0, %1 | %1, %0 }\n\t"
	    "dec{l}	%0"
	    : "=&r" (i)
	    : "m" (*p)
	);

	return i;
}

static OF_INLINE unsigned int
OFAtomicIntOr(volatile unsigned int *_Nonnull p, unsigned int i)
static OF_INLINE int
OFAtomicIntDecrease(volatile int *_Nonnull p)
{
	int i;

	if (sizeof(int) == 4)
		__asm__ __volatile__ (
		    "0:\n\t"
		    "movl	%2, %0\n\t"
		    "movl	%0, %%eax\n\t"
		    "orl	%1, %0\n\t"
		    "lock\n\t"
		    "cmpxchg	%0, %2\n\t"
		    "jne	0b"
		    : "=&r"(i)
		    : "r"(i), "m"(*p)
		    : "eax", "cc"
		);
#ifdef OF_X86_64
		return OFAtomicInt32Decrease(p);
#ifdef OF_AMD64
	else if (sizeof(int) == 8)
		__asm__ __volatile__ (
		    "0:\n\t"
		    "movq	%2, %0\n\t"
		    "xor{q}	%0, %0\n\t"
		    "movq	%0, %%rax\n\t"
		    "orq	%1, %0\n\t"
		    "dec{q}	%0\n\t"
		    "lock\n\t"
		    "cmpxchg	%0, %2\n\t"
		    "jne	0b"
		    : "=&r"(i)
		    : "r"(i), "m"(*p)
		    "xadd{q}	{ %0, %1 | %1, %0 }\n\t"
		    "dec{q}	%0"
		    : "=&r" (i)
		    : "m" (*p)
		    : "rax", "cc"
		);
#endif
	else
		abort();

	return i;
}

static OF_INLINE uint32_t
OFAtomicInt32Or(volatile uint32_t *_Nonnull p, uint32_t i)
{
	__asm__ __volatile__ (
	    "0:\n\t"
	    "movl	%2, %0\n\t"
	    "movl	%0, %%eax\n\t"
	    "orl	%1, %0\n\t"
	    "mov{l}	{ %2, %0 | %0, %2 }\n\t"
	    "mov{l}	{ %0, %%eax | eax, %0 }\n\t"
	    "or{l}	{ %1, %0 | %0, %1 }\n\t"
	    "lock\n\t"
	    "cmpxchg	%0, %2\n\t"
	    "cmpxchg{l}	{ %0, %2 | %2, %0 }\n\t"
	    "jne	0b"
	    : "=&r"(i)
	    : "r"(i), "m"(*p)
	    : "=&r" (i)
	    : "r" (i),
	      "m" (*p)
	    : "eax", "cc"
	);

	return i;
}

static OF_INLINE unsigned int
OFAtomicIntAnd(volatile unsigned int *_Nonnull p, unsigned int i)
OFAtomicIntOr(volatile unsigned int *_Nonnull p, unsigned int i)
{
	if (sizeof(int) == 4)
		__asm__ __volatile__ (
		    "0:\n\t"
		    "movl	%2, %0\n\t"
		    "movl	%0, %%eax\n\t"
		    "andl	%1, %0\n\t"
		    "lock\n\t"
		    "cmpxchg	%0, %2\n\t"
		    "jne	0b"
		    : "=&r"(i)
		    : "r"(i), "m"(*p)
		    : "eax", "cc"
		);
#ifdef OF_X86_64
		return OFAtomicInt32Or(p, i);
#ifdef OF_AMD64
	else if (sizeof(int) == 8)
		__asm__ __volatile__ (
		    "0:\n\t"
		    "movq	%2, %0\n\t"
		    "movq	%0, %%rax\n\t"
		    "andq	%1, %0\n\t"
		    "mov{q}	{ %2, %0 | %0, %2 }\n\t"
		    "mov{q}	{ %0, %%rax | rax, %0 }\n\t"
		    "or{q}	{ %1, %0 | %0, %1 }\n\t"
		    "lock\n\t"
		    "cmpxchg	%0, %2\n\t"
		    "cmpxchg{q}	{ %0, %2 | %2, %0 }\n\t"
		    "jne	0b"
		    : "=&r"(i)
		    : "r"(i), "m"(*p)
		    : "=&r" (i)
		    : "r" (i),
		      "m" (*p)
		    : "rax", "cc"
		);
#endif
	else
		abort();

	return i;
}

static OF_INLINE uint32_t
OFAtomicInt32And(volatile uint32_t *_Nonnull p, uint32_t i)
{
	__asm__ __volatile__ (
	    "0:\n\t"
	    "movl	%2, %0\n\t"
	    "movl	%0, %%eax\n\t"
	    "andl	%1, %0\n\t"
	    "mov{l}	{ %2, %0 | %0, %2 }\n\t"
	    "mov{l}	{ %0, %%eax | eax, %0 }\n\t"
	    "and{l}	{ %1, %0 | %0, %1 }\n\t"
	    "lock\n\t"
	    "cmpxchg	%0, %2\n\t"
	    "cmpxchg{l}	{ %0, %2 | %2, %0 }\n\t"
	    "jne	0b"
	    : "=&r"(i)
	    : "r"(i), "m"(*p)
	    : "=&r" (i)
	    : "r" (i),
	      "m" (*p)
	    : "eax", "cc"
	);

	return i;
}

static OF_INLINE unsigned int
OFAtomicIntXor(volatile unsigned int *_Nonnull p, unsigned int i)
OFAtomicIntAnd(volatile unsigned int *_Nonnull p, unsigned int i)
{
	if (sizeof(int) == 4)
		__asm__ __volatile__ (
		    "0:\n\t"
		    "movl	%2, %0\n\t"
		    "movl	%0, %%eax\n\t"
		    "xorl	%1, %0\n\t"
		    "lock\n\t"
		    "cmpxchg	%0, %2\n\t"
		    "jne	0b"
		    : "=&r"(i)
		    : "r"(i), "m"(*p)
		    : "eax", "cc"
		);
#ifdef OF_X86_64
		return OFAtomicInt32And(p, i);
#ifdef OF_AMD64
	else if (sizeof(int) == 8)
		__asm__ __volatile__ (
		    "0:\n\t"
		    "movq	%2, %0\n\t"
		    "movq	%0, %%rax\n\t"
		    "xorq	%1, %0\n\t"
		    "mov{q}	{ %2, %0 | %0, %2 }\n\t"
		    "mov{q}	{ %0, %%rax | rax, %0 }\n\t"
		    "and{q}	{ %1, %0 | %0, %1 }\n\t"
		    "lock\n\t"
		    "cmpxchg	%0, %2\n\t"
		    "cmpxchg{q}	{ %0, %2 | %2, %0 }\n\t"
		    "jne	0b"
		    : "=&r"(i)
		    : "r"(i), "m"(*p)
		    : "=&r" (i)
		    : "r" (i),
		      "m" (*p)
		    : "rax", "cc"
		);
#endif
	else
		abort();

	return i;
}

static OF_INLINE uint32_t
OFAtomicInt32Xor(volatile uint32_t *_Nonnull p, uint32_t i)
{
	__asm__ __volatile__ (
	    "0:\n\t"
	    "movl	%2, %0\n\t"
	    "movl	%0, %%eax\n\t"
	    "xorl	%1, %0\n\t"
	    "mov{l}	{ %2, %0 | %0, %2 }\n\t"
	    "mov{l}	{ %0, %%eax | eax, %0 }\n\t"
	    "xor{l}	{ %1, %0 | %0, %1 }\n\t"
	    "lock\n\t"
	    "cmpxchgl	%0, %2\n\t"
	    "cmpxchg{l}	{ %0, %2 | %2, %0 }\n\t"
	    "jne	0b"
	    : "=&r"(i)
	    : "r"(i), "m"(*p)
	    : "=&r" (i)
	    : "r" (i),
	      "m" (*p)
	    : "eax", "cc"
	);

	return i;
}

static OF_INLINE bool
OFAtomicIntCompareAndSwap(volatile int *_Nonnull p, int o, int n)
static OF_INLINE unsigned int
OFAtomicIntXor(volatile unsigned int *_Nonnull p, unsigned int i)
{
	int r;

	__asm__ __volatile__ (
	    "lock\n\t"
	    "cmpxchg	%2, %3\n\t"
	    "sete	%b0\n\t"
	if (sizeof(int) == 4)
		return OFAtomicInt32Xor(p, i);
#ifdef OF_AMD64
	else if (sizeof(int) == 8)
		__asm__ __volatile__ (
		    "0:\n\t"
		    "mov{q}	{ %2, %0 | %0, %2 }\n\t"
		    "mov{q}	{ %0, %%rax | rax, %0 }\n\t"
		    "xor{q}	{ %1, %0 | %0, %1 }\n\t"
		    "lock\n\t"
		    "cmpxchg{q}	{ %0, %2 | %2, %0 }\n\t"
		    "jne	0b"
	    "movzbl	%b0, %0"
	    : "=&d"(r), "+a"(o)	/* use d instead of r to avoid a gcc bug */
	    : "r"(n), "m"(*p)
	    : "cc"
	);
		    : "=&r" (i)
		    : "r" (i),
		      "m" (*p)
		    : "rax", "cc"
		);
#endif
	else
		abort();

	return r;
	return i;
}

static OF_INLINE bool
OFAtomicInt32CompareAndSwap(volatile int32_t *_Nonnull p, int32_t o, int32_t n)
{
	int r;

	__asm__ __volatile__ (
	    "lock\n\t"
	    "cmpxchg	%2, %3\n\t"
	    "cmpxchg{l}	{ %2, %3 | %3, %2 }\n\t"
	    "sete	%b0\n\t"
	    "movz{bl|x}	{ %b0, %0 | %0, %b0 }"
	    : "=&d" (r),	/* use d instead of r to avoid a gcc bug */
	      "+a" (o)
	    : "r" (n),
	      "m" (*p)
	    : "cc"
	);
	    "movzbl	%b0, %0"
	    : "=&d"(r), "+a"(o)	/* use d instead of r to avoid a gcc bug */
	    : "r"(n), "m"(*p)

	return r;
}

static OF_INLINE bool
OFAtomicIntCompareAndSwap(volatile int *_Nonnull p, int o, int n)
{
	int r;

	__asm__ __volatile__ (
	    "lock\n\t"
	    "cmpxchg	{ %2, %3 | %3, %2 }\n\t"
	    "sete	%b0\n\t"
	    "movz{bl|x}	{ %b0, %0 | %0, %b0 }"
	    : "=&d" (r),	/* use d instead of r to avoid a gcc bug */
	      "+a" (o)
	    : "r" (n),
	      "m" (*p)
	    : "cc"
	);

	return r;
}

static OF_INLINE bool
OFAtomicPointerCompareAndSwap(void *volatile _Nullable *_Nonnull p,
    void *_Nullable o, void *_Nullable n)
{
	int r;

	__asm__ __volatile__ (
	    "lock\n\t"
	    "cmpxchg	%2, %3\n\t"
	    "cmpxchg	{ %2, %3 | %3, %2 }\n\t"
	    "sete	%b0\n\t"
	    "movzbl	%b0, %0"
	    : "=&d"(r), "+a"(o)	/* use d instead of r to avoid a gcc bug */
	    : "r"(n), "m"(*p)
	    "movz{bl|x}	{ %b0, %0 | %0, %b0 }"
	    : "=&d" (r),	/* use d instead of r to avoid a gcc bug */
	      "+a" (o)
	    : "r" (n),
	      "m" (*p)
	    : "cc"
	);

	return r;
}

static OF_INLINE void
OFMemoryBarrier(void)
{
#ifdef OF_AMD64
	__asm__ __volatile__ (
	    "mfence" ::: "memory"
	    "lock or{q}	{ $0, (%%rsp) | [rsp], 0 }" ::: "memory", "cc"
	);
#else
	__asm__ __volatile__ (
	    "lock or{l}	{ $0, (%%esp) | [esp], 0 }" ::: "memory", "cc"
	);
#endif
}

static OF_INLINE void
OFAcquireMemoryBarrier(void)
{
	__asm__ __volatile__ ("" ::: "memory");
}