Skip to content

Commit f4d6c2d

Browse files
committed
NORMAH8
1 parent c9d78f5 commit f4d6c2d

2 files changed

Lines changed: 15 additions & 13 deletions

File tree

jsrc/j.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -847,11 +847,6 @@ struct jtimespec jmtfclk(void); //'fast clock'; maybe less inaccurate; intended
847847
#define EMSGINVINFL 0x40000 // set to append 'invalid inflection' to msg
848848
#define EMSGNOMSGLINE 0x80000 // set to append 'invalid inflection' to msg
849849

850-
// debugging AD header length
851-
#ifndef NORMAH8
852-
#define NORMAH8 0
853-
#endif
854-
855850
#ifndef PYXES
856851
#if SY_64
857852
#define PYXES 1
@@ -860,6 +855,15 @@ struct jtimespec jmtfclk(void); //'fast clock'; maybe less inaccurate; intended
860855
#endif
861856
#endif
862857

858+
// debugging AD header length
859+
#ifndef NORMAH8
860+
#define NORMAH8 0
861+
#endif
862+
#if !SY_64 && PYXES
863+
#undef NORMAH8
864+
#define NORMAH8 1
865+
#endif
866+
863867
// if we are not multithreading, report the master thread only
864868
#if !PYXES
865869
#undef MAXTHREADS
@@ -1418,6 +1422,7 @@ if(likely(!((I)jtfg&JTWILLBEOPENED)))z=EPILOGNORET(z); RETF(z); \
14181422
#define ACVCACHEWRITEUNLOCK ++JT(jt,fnasgnct); // increment cache count
14191423
#define ACVCACHECLEAR ++JT(jt,fnasgnct); // increment cache count - used when we aren't freeing the only reference to the acv
14201424
#endif
1425+
#if C_AVX2 || EMU_AVX2
14211426
// Support for int-to-float, in parallel. Input is u, 64-bit int with a type of float; result is 64-bit floats. Define DECLS first.
14221427
// we use initecho() to initialize zero and one because the compiler moves the initialization to inside the loop
14231428
#define CVTEPI64DECLS __m256i magic_i_lo = _mm256_castpd_si256(_mm256_broadcast_sd(&two_52)); /* 2^52 */ \
@@ -1438,6 +1443,7 @@ if(likely(!((I)jtfg&JTWILLBEOPENED)))z=EPILOGNORET(z); RETF(z); \
14381443
__m256d u_hi_dbl = _mm256_sub_pd(_mm256_castsi256_pd(u_hi), _mm256_castsi256_pd(magic_i_all)); /* Compute in double precision: */ \
14391444
z = _mm256_add_pd(u_hi_dbl, _mm256_castsi256_pd(u_lo));} /* (u_hi - magic_d_all) + u_lo Do not assume associativity of floating point addition !! */
14401445
#endif
1446+
#endif
14411447
// # turns through a Duff loop of m1+1 elements, with 1<<lgduff instances in the loop. We assume we are handling [1,NPAR] elements at the end
14421448
#define DUFFLPCTV(m1,lgduff,lgeleperiter) ((((m1)+((((I)1<<(lgduff))-1)<<(lgeleperiter)))>>((lgeleperiter)+(lgduff))))
14431449
#define DUFFLPCT(m1,lgduff) DUFFLPCTV(m1,lgduff,LGNPAR)

jsrc/xdic.c

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -143,18 +143,14 @@ typedef struct ADic {
143143
// end of second cacheline. Following changed only by put/del
144144
UI cardinality; // number of kvs in the hashtable
145145
UI emptyn; // index to next empty kv/treeslot. EOC loops back on itself. Resize when empty
146+
#if NORMAH8
147+
I filler3[SY_64?5:0]; // pad to cacheline (24 words on each system).
148+
#else
146149
I filler3[SY_64?6:1]; // pad to cacheline (24 words on each system).
150+
#endif
147151
} bloc;
148152
} DIC;
149-
#if 7==NORMAH
150153
_Static_assert(sizeof(DIC)==32*SZI,"DIC not 32 Is");
151-
#else
152-
#if SY_64
153-
_Static_assert(sizeof(DIC)==33*SZI,"DIC not 33 Is");
154-
#else
155-
_Static_assert(sizeof(DIC)==34*SZI,"DIC not 34 Is");
156-
#endif
157-
#endif
158154
int getsize_xdic_DIC(void){return (int)sizeof(DIC);} // temp for debugging
159155

160156
#define ST UI4 // type of hash slot

0 commit comments

Comments
 (0)