You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
// create double-precision sum of inputs, where it is not known which is larger NOTE in0 and outhi might be identical. Needs sgnbit.
1907
+
// create double-precision sum of inputs, where it is not known which is larger NOTE in0 and outhi might be identical. outlo must not be an input. Needs sgnbit.
if(n2==1)goto oneloop; // if we don't have to loop here, avoid the data-dependent branch and fold the comparisons into the last batch
118
-
if(~_mm256_movemask_epi8(allmatches))goto fail; // if searches are long, kick out when there is a miscompare
118
+
// obsolete if(~_mm256_movemask_epi8(allmatches))goto fail; // if searches are long, kick out when there is a miscompare
119
+
if(!_mm256_testc_si256(allmatches,ones))goto fail; // if searches are long, kick out when there is a miscompare. test is '!(all bits of allmatches =1)'
// obsolete R 0xf!=_mm256_movemask_pd(_mm256_xor_pd(_mm256_cmp_pd(u,_mm256_mul_pd(v,cct),_CMP_GT_OQ),_mm256_cmp_pd(v,_mm256_mul_pd(u,cct),_CMP_LE_OQ)));
228
+
R !_mm256_testc_pd(_mm256_xor_pd(_mm256_cmp_pd(u,_mm256_mul_pd(v,cct),_CMP_GT_OQ),_mm256_cmp_pd(v,_mm256_mul_pd(u,cct),_CMP_LE_OQ)),ones);
b ^= 0xf==_mm256_movemask_pd(_mm256_xor_pd(_mm256_cmp_pd(u,_mm256_mul_pd(v,cct),_CMP_GT_OQ),_mm256_cmp_pd(v,_mm256_mul_pd(u,cct),_CMP_LE_OQ)));
295
+
// obsolete b ^= 0xf==_mm256_movemask_pd(_mm256_xor_pd(_mm256_cmp_pd(u,_mm256_mul_pd(v,cct),_CMP_GT_OQ),_mm256_cmp_pd(v,_mm256_mul_pd(u,cct),_CMP_LE_OQ)));
296
+
b ^= _mm256_testc_pd(_mm256_xor_pd(_mm256_cmp_pd(u,_mm256_mul_pd(v,cct),_CMP_GT_OQ),_mm256_cmp_pd(v,_mm256_mul_pd(u,cct),_CMP_LE_OQ)),ones);
dotprod=_mm256_permute4x64_pd(dotproducth,0b00000000); // copy next value into all lanes
853
+
dotproducth=_mm256_permute4x64_pd(dotproducth,0b11111001); dotproducth=_mm256_blend_pd(dotproducth,_mm256_setzero_pd(),0b1000) // shift down one value for next time
854
+
i=_mm256_extract_epi64(indexes,0); indexes=_mm256_permute4x64_pd(indexes,0b11111001); // get the row number we are trying to swap out; shift row number down for next loop
855
+
PROCESSROWRATIOS
856
+
}
857
+
}
858
+
}
859
+
}while((bvgrd+=NPAR)<bvgrde);
776
860
#else
777
861
#defineCOLLPINIT I *bvgrd=bvgrd0; I i=-1; D *mv=mv0-n; D bkold=inf, cold=1.0; I bkle0=1;
778
862
#defineCOLLP do{if(unlikely(zv!=0)){++i; mv+=n;}else{i=*bvgrd; mv=mv0+n*i;} // for each row, i is the row#, mv points to the beginning of the row of M. If we take the whole col, take it in order for cache. Prefetch next row?
// rc=5 (not created - means problem is infeasible) rc=6=empty M, problem is malformed
971
1055
// if the exclusion list is given, we stop on the first nonimproving pivot, and the exclusion list is used to prevent repetition of basis
972
1056
// If Frow is empty, we are looking for nonimproving pivots in rows where the selector is 0. In that case the bkgrd puts the bk values in descending order. We return the first column that will make more 0 B rows non0 than non0 B rows 0.
973
-
// If bk is empty, we are counting the #places where c>=PivTol and accumulating into Dpiv under control of Dpivdir (-1=decr, 1=incr; init to 0 if neg)
1057
+
// If bk is empty, we are looking in bkgrd columns and counting the #places where c>=PivTol and accumulating into Dpiv under control of Dpivdir (-1=decr, 1=incr; init to 0 if neg)
0 commit comments