You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
#defineGAE0(v,t,n,r,erraction) {HISTOCALL if(unlikely(!(v=jtga0(jt,(I)(t),(I)(r),(I)(n)))))erraction; AN(v)=(n);} // used when shape=0 and rank is never 1 or will always be filled in by user even if rank 1
#defineGA00(v,t,n,r) {GAE0(v,t,n,r,R 0)} // used when rank will always be filled in by user. Default error action is to exit
1236
+
#defineGA00(v,t,n,r) {GAE0(v,t,n,r,R 0)} // used when shape will always be filled in by user. Default error action is to exit
1237
1237
#defineGA(v,t,n,r,s) {GA00(v,t,n,r) MCISH(AS(v),(I*)(s),(r))} // s points to shape
1238
1238
#defineGA0(v,t,n,r) {GA00(v,t,n,r) *((r)==1?AS(v):jt->shapesink)=(n);} // used when shape=0 but rank may be 1 and must fill in with AN if so - never for sparse blocks
1239
1239
#defineGA10(v,t,n) {GA00(v,t,n,1) AS(v)[0]=(n);} // used when rank is known to be 1
Copy file name to clipboardExpand all lines: jsrc/ja.h
+4-1Lines changed: 4 additions & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -428,7 +428,10 @@
428
428
#definefplus(x,y) jtfplus(jt,(x),(y))
429
429
#definefpoly(x,y) jtfpoly(jt,(x),(y))
430
430
#definefpolyc(x) jtfpolyc(jt,(x))
431
-
#definefr(x) {if(likely((x)!=0)){I Zs = AC(x); if(likely(!ACISPERM(Zs))){if(likely(--Zs<=0))mf(x);else AC(x)=Zs;}}} // use fr for known nonrecursives, and for locales
#definefrcommon(x,f) {if(likely((x)!=0)){I Zs = AC(x); if(likely(!ACISPERM(Zs))){if(likely(--Zs<=0)){f(x);}else AC(x)=Zs;}}} // use fr for known nonrecursives, and for locales
433
+
#definefr(x) frcommon(x,mf)
434
+
#definefrgmp(x) frcommon(x,gmpmfree) // to free GMP blocks
Iit=MAX(AT(a),AT(w)); it=FAV(self)->id==CFIT?FL:it; // if input types are dissimilar, convert to the larger. For +/@:*"1!.0, convert everything to float
1007
+
Iit=MAX(AT(a),AT(w)); it=fit!=0?FL:it; // if input types are dissimilar, convert to the larger. For +/@:*"1!.[01], convert everything to float
1004
1008
if(unlikely(it!=(AT(w)|AT(a)))){
1005
1009
if(TYPESNE(it,AT(a))){RZ(a=cvt(it,a));} // convert to common input type
1006
1010
if(TYPESNE(it,AT(w))){RZ(w=cvt(it,w));}
@@ -1017,9 +1021,9 @@ DF2(jtsumattymes1){
1017
1021
Az;
1018
1022
// if there is frame, create the outer loop values
1019
1023
Infro,nfri; // outer loop counts, and which arg is repeated
1020
-
if(likely(((ar-acr)|(wr-wcr))==0)){ // normal case
1024
+
if(likely(((ar-acr)|(wr-wcr))==0)){ // normal case of no frame
1021
1025
nfro=nfri=1; // no outer loops, repeata immaterial
1022
-
GA(z,FL>>(it&B01),ndpo*ndpi,wcr-1,AS(w)); // type is INT if inputs booleans, otherwise FL
1026
+
GA(z,FL>>(it&B01),(ndpo*ndpi)<<(fit>>1),wcr-1+(fit>>1),AS(w)); // type is INT if inputs booleans, otherwise FL
if(unlikely(fit==2))AS(w)[AR(w)-1]=2; // if +/@:*"1!.1, we store two atoms per sum
1036
1041
1037
-
if(likely(FAV(self)->id!=CFIT)){RZ(jtsumattymesprods(jt,it,voidAV(a),voidAV(w),dplen,nfro,nfri,ndpo,ndpi,voidAV(z))); // eval standard dot-product, check for error
1042
+
if(likely(fit==0)){RZ(jtsumattymesprods(jt,it,voidAV(a),voidAV(w),dplen,nfro,nfri,ndpo,ndpi,voidAV(z))); // eval standard dot-product, check for error
1038
1043
}else{
1039
-
// here for +/@:*"1!.0, double-precision dot product https://www-pequan.lip6.fr/~graillat/papers/IC2012.pdf
1044
+
// here for +/@:*"1!.[01], double-precision dot product https://www-pequan.lip6.fr/~graillat/papers/IC2012.pdf
1040
1045
NAN0;
1041
1046
#if (C_AVX2&&SY_64) ||EMU_AVX2
1042
1047
#if1// higher precision. Required when a large product is added to a small total. Dependency loop for acc is 4 clocks; for c is 4 clocks. Total 12 insts, so unrolled 2 would do
0 commit comments