Skip to content

Commit a6ce7e4

Browse files
committed
i. improvements: ravel fntbl; check in old patch for lit2/lit4 i.; avx512 int table cmp; special code for two-column table. Sample time a cycle or two earlier in timedlock
1 parent 9d21cac commit a6ce7e4

3 files changed

Lines changed: 194 additions & 127 deletions

File tree

jsrc/j.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1586,6 +1586,7 @@ if(likely(!((I)jtinplace&JTWILLBEOPENED)))z=EPILOGNORET(z); RETF(z); \
15861586
// -mavx or /arch:AVX should already generate VEX encoded for SSE instructions
15871587
#define _mm256_zeroupperx(x)
15881588
// this is faster than reusing another register as the source anyway, because it's not a recognised idiom, so we would have a false dependency on the other register
1589+
#define _mm_setone_si128() _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())
15891590
#define _mm256_setone_epi64() _mm256_cmpeq_epi64(_mm256_setzero_si256(), _mm256_setzero_si256())
15901591
#define _mm256_setone_pd() _mm256_castsi256_pd(_mm256_setone_epi64())
15911592
static inline __m256i LOADV32I(void *x) { return _mm256_loadu_si256(x); }
@@ -2186,6 +2187,7 @@ if(likely(type _i<3)){z=(I)&oneone; z=type _i>1?(I)_zzt:z; _zzt=type _i<1?(I*)z:
21862187
#if (C_AVX2&&SY_64)
21872188
#define PEXT(s,m) _pext_u64(s,m)
21882189
#define PDEP(s,m) _pdep_u64(s,m)
2190+
#define BZHI(s,i) _bzhi_u64(s,i)
21892191
#else
21902192
// #define PEXT(s,m) _pext_u32(s,m)
21912193
// #define PDEP(s,m) _pdep_u32(s,m)
@@ -2311,6 +2313,7 @@ static inline UINT _clearfp(void){int r=fetestexcept(FE_ALL_EXCEPT);
23112313
#define DPMULDE(x,y,z) ASSERT(!__builtin_smulll_overflow(x,y,&z),EVLIMIT)
23122314
#define DPUMUL(x,y,z,h) {__int128 _t; _t=(__int128)(x)*(__int128)(y); z=(I)_t; h=(I)(_t>>64);} // product in z and h
23132315
#define DPUMULH(x,y,h) {__int128 _t; _t=(__int128)(x)*(__int128)(y); h=(I)(_t>>64);} // high product in h
2316+
#define DPUMULU(x,y,z,h) {__uint128_t _t=(__uint128_t)(x)*(__uint128_t)(y);z=(UI)_t;h=(UI)(_t>>64);}
23142317
#endif
23152318
#else // C_USEMULTINTRINSIC 0 - use standard-C version (64-bit)
23162319
#define DPMULDECLS

jsrc/mt.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,8 @@ fail:CLRFUTEXWT; R r;} // error return, with our internal errorcode
189189
I jtpthread_mutex_timedlock(J jt,jtpthread_mutex_t *m,UI ns,I self){ //lock m, with a timeout of ns ns. Largely the same as lock
190190
I r; // internal return code in case of error
191191
if(unlikely(!casa((US*)&m->v,&(US){FREE},LOCK))){ //fast and common path: attempt to install LOCK in place of FREE; if so, we have acquired the lock
192-
if(uncommon(m->owner==self)){if(unlikely(!m->recursive))R EVCONCURRENCY; m->ct++;R 0;} //handle deadlock and recursive cases
193192
struct jtimespec tgt=jtmtil(ns);
193+
if(uncommon(m->owner==self)){if(unlikely(!m->recursive))R EVCONCURRENCY; m->ct++;R 0;} //handle deadlock and recursive cases
194194
sta(&jt->futexwt,&m->v); //ensure other threads know how to wake us up for systemlock
195195
while(xchga((US*)&m->v,WAIT)!=FREE){ //exit when _we_ successfully installed WAIT in place of FREE
196196
UI4 waitval=lda(&m->v); C breakb; // get the serial number before we check. Must be atomic; this is supposed to synchronise with writes to the same location via futexwt by wakeall

0 commit comments

Comments
 (0)