Skip to content

Commit 61fe9c6

Browse files
committed
mutex works on linux too
1 parent eb6f073 commit 61fe9c6

2 files changed

Lines changed: 88 additions & 32 deletions

File tree

jsrc/mt.c

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
#include"j.h"
88

9-
#ifdef __APPLE__
9+
#if defined(__APPLE__) || defined(__linux__)
1010
enum{FREE=0,LOCK=1,WAIT=2};//values for mutex->v
1111
//todo consider storing owner in the high bits of v. apple pthreads does this. But it means we can't use xadd to unlock. On the other hand, apple is mostly arm now, which doesn't have xadd anyway.
1212
//Also, I just realised you _can_ use xadd to unlock--subtract the top bits at the same time as the bottom one--it just adds a weird state where the low bit is 1, but the high bits don't denote any task--but that's ok
@@ -20,11 +20,15 @@ enum{FREE=0,LOCK=1,WAIT=2};//values for mutex->v
2020
void jtpthread_mutex_init(jtpthread_mutex_t *m,B recursive){*m=(jtpthread_mutex_t){.recursive=recursive};}
2121
C jtpthread_mutex_lock(J jt,jtpthread_mutex_t *m,I self){
2222
if(uncommon(m->owner==self)){if(unlikely(!m->recursive))R EVCONCURRENCY; m->ct++;R 0;}
23-
UI4 e=0;if(!(e=lda(&m->v))&&((e=FREE),casa(&m->v,&e,LOCK))){m->ct+=m->recursive;m->owner=self;R 0;} //success
24-
if(common(e!=WAIT))e=xchga(&m->v,WAIT); //penalise the multi-waiters case, since it's slower anyway
23+
UI4 e;if(likely((!(e=lda(&m->v)))&&((e=FREE),casa(&m->v,&e,LOCK)))){m->ct+=m->recursive;m->owner=self;R 0;} //success. test-and-test-and-set is from glibc, mildly optimises the case when many threads swarm a locked mutex
24+
if(e!=WAIT)e=xchga(&m->v,WAIT); //penalise the multi-waiters case, since it's slower anyway
2525
while(e!=FREE){
26-
I i=__ulock_wait(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,&m->v,WAIT,0);
27-
if(unlikely(i<0)){
26+
#if __linux__
27+
I i=_jfutex_waitn(&m->v,WAIT,(UI)-1); //bug? jfutex_wait doesn't get interrupted by signals on linux
28+
#else
29+
I i=jfutex_wait(&m->v,WAIT);
30+
#endif
31+
if(uncommon(i<0)){
2832
if(i==-EINTR){if(JT(jt,adbreakr)[0])R EVATTN;}
2933
else if(i==-EOWNERDEAD)R EVCONCURRENCY;
3034
else if(i==-ENOMEM)R EVWSFULL;//lol
@@ -38,20 +42,12 @@ I jtpthread_mutex_timedlock(J jt,jtpthread_mutex_t *m,UI ns,I self){
3842
tgt.tv_sec=now.tv_sec+ns/1000000000;tgt.tv_nsec=now.tv_nsec+ns%1000000000;if(tgt.tv_nsec>=1000000000){tgt.tv_nsec-=1000000000;tgt.tv_sec++;};
3943
if(common(e!=WAIT)){e=xchga(&m->v,WAIT);if(e==FREE)goto success;} //penalise the multi-waiters case, since it's slower anyway
4044
while(1){
41-
#if __arm64__
42-
// wait2 takes an ns timeout, but it's only available from macos 11 onward; coincidentally, arm macs only support macos 11+
43-
// so we can count on having this
44-
I i=__ulock_wait2(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,&m->v,WAIT,ns,0);
45-
#else
46-
// but for the x86 case, we keep compatibility
47-
I i=__ulock_wait(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,&m->v,WAIT,ns/1000);
48-
#endif
45+
I i=_jfutex_waitn(&m->v,WAIT,ns);
4946
if(uncommon(i==-ETIMEDOUT)); //don't penalise this case too harshly
5047
else if(unlikely(i<0)){
5148
if(i==-EINTR){if(JT(jt,adbreakr)[0])R EVATTN;}
5249
else if(i==-EOWNERDEAD)R EVCONCURRENCY;
5350
else if(i==-ENOMEM)R EVWSFULL;
54-
else if(i==-ETIMEDOUT);
5551
else R EVFACE;}
5652
e=xchga(&m->v,WAIT);
5753
if(e==FREE)goto success; //exit when e==FREE; i.e., _we_ successfully installed WAIT in place of FREE
@@ -70,9 +66,9 @@ C jtpthread_mutex_unlock(jtpthread_mutex_t *m,I self){
7066
if(unlikely(m->owner!=self))R EVCONCURRENCY;
7167
if(uncommon(m->recursive)){if(--m->ct)R 0;} //need to be released more times on this thread
7268
m->owner=0;
73-
if(!casa(&m->v,&(UI4){LOCK},FREE)){sta(&m->v,FREE);__ulock_wake(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,&m->v,0);}
69+
if(!casa(&m->v,&(UI4){LOCK},FREE)){sta(&m->v,FREE);jfutex_wake1(&m->v);}
7470
//below is what drepper does; I think the above is always faster, but it should definitely be faster without xadd
7571
//agner sez lock xadd has one cycle better latency vs lock cmpxchg on intel ... ??
76-
//if(adda(&m->v,-1)){sta(&m->v,FREE);__ulock_wake(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,&m->v,0);}
72+
//if(adda(&m->v,-1)){sta(&m->v,FREE);jfutex_wake1(&m->v);}
7773
R 0;}
7874
#endif //__APPLE__

jsrc/mt.h

Lines changed: 76 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// see mt.c
33

44
#if PYXES
5-
#ifndef __APPLE__
5+
#if !defined(__APPLE__) && !defined(__linux__)
66
#include <pthread.h>
77
typedef pthread_mutex_t jtpthread_mutex_t;
88
static inline void jtpthread_mutex_init(jtpthread_mutex_t *m,B recursive){
@@ -44,6 +44,53 @@ static inline C jtpthread_mutex_unlock(jtpthread_mutex_t *m,I self){
4444
if(r==EPERM)R EVCONCURRENCY;
4545
R EVFACE;}
4646
#else
47+
typedef struct {
48+
B recursive;
49+
I owner; //user-provided; task id
50+
UI4 v;
51+
UI4 ct; //for recursive locks
52+
}jtpthread_mutex_t;//todo should split into multiple cache lines?
53+
54+
void jtpthread_mutex_init(jtpthread_mutex_t*,B recursive);
55+
C jtpthread_mutex_lock(J jt,jtpthread_mutex_t *m,I self);
56+
I jtpthread_mutex_timedlock(J jt,jtpthread_mutex_t*,UI ns,I self); //absolute timers suck; correct the interface. -1=failure; 0=success; positive=error
57+
I jtpthread_mutex_trylock(jtpthread_mutex_t*,I self); //0=success -1=failure positive=error
58+
C jtpthread_mutex_unlock(jtpthread_mutex_t*,I self); //0 or error code
59+
60+
//note: self must be non-zero
61+
#if defined(__linux__)
62+
#include <linux/futex.h>
63+
#include <sys/syscall.h>
64+
static inline void jfutex_wake1(UI4 *p){
65+
__asm__ volatile("syscall" :: "a" (SYS_futex), //eax: syscall#
66+
"D" (p), //rdi: ptr
67+
"S" (FUTEX_WAKE), //rsi: op
68+
"d" (1));} //rdx: count
69+
static inline void jfutex_wakea(UI4 *p){
70+
__asm__ volatile("syscall" :: "a" (SYS_futex), //eax: syscall#
71+
"D" (p), //rdi: ptr
72+
"S" (FUTEX_WAKE), //rsi: op
73+
"d" (0xffffffff));} //rdx: count
74+
static inline int jfutex_wait(UI4 *p,UI4 v){
75+
register struct timespec *pts asm("r10") = 0;
76+
int r;__asm__ volatile("syscall" : "=a"(r) //result in rax
77+
: "a" (SYS_futex), //eax: syscall#
78+
"D" (p), //rdi: ptr
79+
"S" (FUTEX_WAIT), //rsi: op
80+
"d" (v), //rdx: val, espected
81+
"r" (pts)); //r10: timeout (null=no timeout)
82+
return r;}
83+
static inline int _jfutex_waitn(UI4 *p,UI4 v,UI ns){
84+
struct timespec ts={.tv_sec=ns/1000000000, .tv_nsec=ns%1000000000};
85+
register struct timespec *pts asm("r10") = &ts;
86+
int r;__asm__ volatile("syscall" : "=a"(r) //result in rax
87+
: "a" (SYS_futex), //eax: syscall#
88+
"D" (p), //rdi: ptr
89+
"S" (FUTEX_WAIT), //rsi: op
90+
"d" (v), //rdx: val, espected
91+
"r" (pts)); //r10: timeout (relative!)
92+
R r;}
93+
#elif defined(__APPLE__)
4794
// ulock (~futex) junk from xnu. timeout=0 means wait forever
4895
extern int __ulock_wait(uint32_t operation, void *addr, uint64_t value, uint32_t timeout); // timeout in us
4996
extern int __ulock_wait2(uint32_t operation, void *addr, uint64_t value, uint64_t timeout, uint64_t value2); // timeout in ns. only available as of macos 11?
@@ -76,19 +123,32 @@ extern int __ulock_wake(uint32_t operation, void *addr, uint64_t wake_value);
76123

77124
//positive (or just 1?) result from wait means someone else is waiting on this too?
78125

79-
typedef struct {
80-
B recursive;
81-
I owner; //user-provided; task id
82-
UI4 v;
83-
UI4 ct; //for recursive locks
84-
}jtpthread_mutex_t;//todo should split into multiple cache lines?
85-
86-
void jtpthread_mutex_init(jtpthread_mutex_t*,B recursive);
87-
struct JTTstruct; C jtpthread_mutex_lock(struct JTTstruct *jt,jtpthread_mutex_t *m,I self);
88-
I jtpthread_mutex_timedlock(struct JTTstruct *jt,jtpthread_mutex_t*,UI ns,I self); //absolute timers suck; correct the interface. -1=failure; 0=success; positive=error
89-
I jtpthread_mutex_trylock(jtpthread_mutex_t*,I self); //0=success -1=failure positive=error
90-
C jtpthread_mutex_unlock(jtpthread_mutex_t*,I self); //0 or error code
91-
92-
//note: self must be non-zero
93-
#endif //__APPLE__
126+
static inline void jfutex_wake1(UI4 *p){__ulock_wake(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,0);}
127+
static inline void jfutex_wakea(UI4 *p){__ulock_wake(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO|ULF_WAKE_ALL,p,0);}
128+
static inline int jfutex_wait(UI4 *p,UI4 v){R __ulock_wait(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,v,0);}
129+
#if __arm64__
130+
// wait2 takes an ns timeout, but it's only available from macos 11 onward; coincidentally, arm macs only support macos 11+
131+
// so we can count on having this
132+
static inline int _jfutex_waitn(UI4 *p,UI4 v,UI ns){R __ulock_wait2(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,v,ns,0);}
133+
#else
134+
// but for the x86 case, we keep compatibility with older macos. Revisit in the future
135+
// deal with >32 bits; 2^32us is just a little over an hour; just too close for comfort
136+
static inline int _jfutex_waitn(UI4 *p,UI4 v,UI ns){
137+
UI us=ns/1000;
138+
while(us>0xfffffff){
139+
I4 r=__ulock_wait2(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,v,0xffffffff,0);
140+
if(r!=-ETIMEDOUT)R r;
141+
us-=0xffffffff;}
142+
R __ulock_wait2(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,v,us,0);}
143+
#endif
144+
#elif defined(_WIN32)
145+
// untested windows path; make henry test it when he gets back from vacation
146+
#define WIN32_LEAN_AND_MEAN
147+
#include <windows.h>
148+
static inline int jfutex_wait(UI4 *p,UI4 v){R WaitOnAddress(p,&v,4,INFINITE);} //todo return wrong
149+
static inline int _jfutex_waitn(UI4 *p,UI4 v,UI ns){R WaitOnAddress(p,&v,4,ns/1000000);} //ditto
150+
static inline void jfutex_wake1(UI4 *p){WakeByAddressSingle(p);}
151+
static inline void jfutex_wakea(UI4 *p){WakeByAddressAll(p);}
152+
#endif //_WIN32
153+
#endif //__APPLE__ || __linux__
94154
#endif //PYXES

0 commit comments

Comments
 (0)