@@ -847,11 +847,6 @@ struct jtimespec jmtfclk(void); //'fast clock'; maybe less inaccurate; intended
847847#define EMSGINVINFL 0x40000 // set to append 'invalid inflection' to msg
848848#define EMSGNOMSGLINE 0x80000 // set to append 'invalid inflection' to msg
849849
850- // debugging AD header length
851- #ifndef NORMAH8
852- #define NORMAH8 0
853- #endif
854-
855850#ifndef PYXES
856851#if SY_64
857852#define PYXES 1
@@ -860,6 +855,15 @@ struct jtimespec jmtfclk(void); //'fast clock'; maybe less inaccurate; intended
860855#endif
861856#endif
862857
858+ // debugging AD header length
859+ #ifndef NORMAH8
860+ #define NORMAH8 0
861+ #endif
862+ #if !SY_64 && PYXES
863+ #undef NORMAH8
864+ #define NORMAH8 1
865+ #endif
866+
863867// if we are not multithreading, report the master thread only
864868#if !PYXES
865869#undef MAXTHREADS
@@ -1418,6 +1422,7 @@ if(likely(!((I)jtfg&JTWILLBEOPENED)))z=EPILOGNORET(z); RETF(z); \
14181422#define ACVCACHEWRITEUNLOCK ++JT(jt,fnasgnct); // increment cache count
14191423#define ACVCACHECLEAR ++JT(jt,fnasgnct); // increment cache count - used when we aren't freeing the only reference to the acv
14201424#endif
1425+ #if C_AVX2 || EMU_AVX2
14211426// Support for int-to-float, in parallel. Input is u, 64-bit int with a type of float; result is 64-bit floats. Define DECLS first.
14221427// we use initecho() to initialize zero and one because the compiler moves the initialization to inside the loop
14231428#define CVTEPI64DECLS __m256i magic_i_lo = _mm256_castpd_si256(_mm256_broadcast_sd(&two_52)); /* 2^52 */ \
@@ -1438,6 +1443,7 @@ if(likely(!((I)jtfg&JTWILLBEOPENED)))z=EPILOGNORET(z); RETF(z); \
14381443 __m256d u_hi_dbl = _mm256_sub_pd (_mm256_castsi256_pd (u_hi ), _mm256_castsi256_pd (magic_i_all )); /* Compute in double precision: */ \
14391444 z = _mm256_add_pd (u_hi_dbl , _mm256_castsi256_pd (u_lo ));} /* (u_hi - magic_d_all) + u_lo Do not assume associativity of floating point addition !! */
14401445#endif
1446+ #endif
14411447// # turns through a Duff loop of m1+1 elements, with 1<<lgduff instances in the loop. We assume we are handling [1,NPAR] elements at the end
14421448#define DUFFLPCTV (m1 ,lgduff ,lgeleperiter ) ((((m1)+((((I)1<<(lgduff))-1)<<(lgeleperiter)))>>((lgeleperiter)+(lgduff))))
14431449#define DUFFLPCT (m1 ,lgduff ) DUFFLPCTV(m1,lgduff,LGNPAR)
0 commit comments