Skip to content

Commit 7c80ff3

Browse files
committed
in load emsg, make [-nnn] 1-origin line#
1 parent c2eba09 commit 7c80ff3

7 files changed

Lines changed: 111 additions & 41 deletions

File tree

jsrc/ar.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ REDUCCPFX(tymesinsO, D, I, TYMESO)
228228
acc1=prim(acc1,acc5); acc2=prim(acc2,acc6); acc3=prim(acc3,acc7); acc0=prim(acc0,acc4); \
229229
acc2=prim(acc2,acc3); acc0=prim(acc0,acc1); acc0=prim(acc0,acc2); /* combine accumulators vertically */ \
230230
acc0=prim(acc0,_mm256_permute4x64_pd(acc0,0b11111110)); acc0=prim(acc0,_mm256_permute_pd(acc0,0xf)); /* combine accumulators horizontally 01+=23, 0+=1 */ \
231-
*z=_mm256_cvtsd_f64(acc0); ++z; /* store the single result from 0 */ \
231+
*(I*)z=_mm256_extract_epi64(_mm256_castpd_si256(acc0),0x0); /* AVX2 *z=_mm256_cvtsd_f64(acc0); */ ++z; /* store the single result */ \
232232
)
233233

234234
// f/ on rank>1, going down columns to save bandwidth
@@ -429,7 +429,7 @@ DF1(jtcompsum){
429429
c0=_mm256_add_pd(c0,_mm256_permute_pd(c0,0xf)); acc1=_mm256_permute_pd(acc0,0xf); // combine c0+c1, acc1<-1
430430
TWOSUM(acc0,acc1,acc0,c1); c0=_mm256_add_pd(c0,c1); // combine 0123, combine all low parts
431431
acc0=_mm256_add_pd(acc0,c0); // add low parts back into high in case there is overlap
432-
*zv=_mm256_cvtsd_f64(acc0); ++zv; // store the single result
432+
*(I*)zv=_mm256_extract_epi64(_mm256_castpd_si256(acc0),0x0); /* AVX2 *zv=_mm256_cvtsd_f64(acc0);*/ ++zv; // store the single result
433433
// obsolete _mm_storel_pd(zv++,_mm256_castpd256_pd128(acc0));
434434
}
435435
}else{

jsrc/d.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ void jtdebdisp(J jt,DC d){A*x,y;I e,t;
171171
switch(t){
172172
case DCPARSE: dhead(3,d); seeparse(d); if(NETX==jt->etxn)--jt->etxn; eputc(CLF); break;
173173
case DCCALL: dhead(0,d); seecall(d); eputc(CLF); break;
174-
case DCSCRIPT: dhead(0,d); efmt("[-"FMTI"] ", d->dcn-1);
174+
case DCSCRIPT: dhead(0,d); efmt("[-"FMTI"] ", d->dcn); // keep the line number as 1-origin since that's what editors do
175175
if(0<=d->dcm){READLOCK(JT(jt,startlock)) y=AAV(JT(jt,slist))[d->dcm]; ep(AN(y),CAV(y)); READUNLOCK(JT(jt,startlock))}
176176
eputc(CLF); break;
177177
}}

jsrc/v1.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ static B eqv(I af,I wf,I m,I n,I k,C* RESTRICT av,C* RESTRICT wv,B* RESTRICT z,B
116116
x+=8*NPAR; y+=8*NPAR;
117117
if(n2==1)goto oneloop; // if we don't have to loop here, avoid the data-dependent branch and fold the comparisons into the last batch
118118
// obsolete if(~_mm256_movemask_epi8(allmatches))goto fail; // if searches are long, kick out when there is a miscompare
119-
if(!_mm256_testc_pd(_mm256_castsi256_pd(allmatches),ones))goto fail; // if searches are long, kick out when there is a miscompare. test is '!(all bits of allmatches =1)'
119+
if(!_mm256_testc_pd(_mm256_castsi256_pd(allmatches),ones))goto fail; // if searches are long, kick out when there is a miscompare. test is '!(all sign bits of allmatches =1)'
120120
}while(--i>0);
121121
}
122122
oneloop:;
@@ -150,7 +150,7 @@ I memcmpne(void *s, void *t, I l){
150150

151151
UI n2=DUFFLPCT(n-1,3); /* # turns through duff loop */
152152
if(n2>0){
153-
__m256i allmatches =ones; // accumuland for compares init to all 1
153+
__m256i allmatches =_mm256_castpd_si256(ones); // accumuland for compares init to all 1
154154
UI backoff=DUFFBACKOFF(n-1,3);
155155
x+=(backoff+1)*NPAR; y+=(backoff+1)*NPAR;
156156
switch(backoff){
@@ -165,7 +165,7 @@ I memcmpne(void *s, void *t, I l){
165165
case -8: u=_mm256_loadu_si256 ((__m256i*)(x+7*NPAR)); v=_mm256_loadu_si256 ((__m256i*)(y+7*NPAR)); allmatches=_mm256_and_si256(allmatches,_mm256_cmpeq_epi64(u,v));
166166
x+=8*NPAR; y+=8*NPAR;
167167
// obsolete if(~_mm256_movemask_epi8(allmatches))R 1;
168-
if(!_mm256_testc_pd(_mm256_castsi256_pd(allmatches),ones))R 1; // test is '!(all bits of allmatches=1)'
168+
if(!_mm256_testc_pd(_mm256_castsi256_pd(allmatches),ones))R 1; // test is '!(all sign bits of allmatches=1)'
169169
}while(--n2>0);
170170
}
171171
}
@@ -272,7 +272,7 @@ static B eqvfl(I af,I wf,I m,I n,I k,D* RESTRICT av,D* RESTRICT wv,B* RESTRICT z
272272
x+=8*NPAR; y+=8*NPAR;
273273
if(n2==1)goto oneloop; // if we don't have to loop here, avoid the data-dependent branch and fold the comparisons into the last batch
274274
// obsolete if(0xf!=_mm256_movemask_pd(allmatches))goto fail;
275-
if(!_mm256_testc_pd(allmatches,ones))goto fail; // test is '!(all bits of allmatches=1)'
275+
if(!_mm256_testc_pd(allmatches,ones))goto fail; // test is '!(all sign bits of allmatches=1)'
276276
}while(--i>0);
277277
}
278278
}

jsrc/va2.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -794,10 +794,10 @@ static A jtva2(J jt,AD * RESTRICT a,AD * RESTRICT w,AD * RESTRICT self,UI allran
794794
ti * RESTRICT wv1=wv+dplen; wv1=j==1?wv:wv1; \
795795
oneprod2 \
796796
if(j>1){--j; _mm_storeu_pd(zv,_mm256_castpd256_pd128 (acc000)); _mm_storeu_pd(zv+ndpi,_mm256_castpd256_pd128 (acc100)); wv+=dplen; zv +=2;} \
797-
else{*zv=_mm256_cvtsd_f64(acc000); *(zv+ndpi)=_mm256_cvtsd_f64(acc100); zv+=1;} \
797+
else{*(I*)zv=_mm256_extract_epi64(_mm256_castpd_si256(acc000),0x0); *(I*)(zv+ndpi)=_mm256_extract_epi64(_mm256_castpd_si256(acc100),0x0); /* AVX2 *zv=_mm256_cvtsd_f64(acc000); *(zv+ndpi)=_mm256_cvtsd_f64(acc100); */ zv+=1;} \
798798
}else{ \
799799
oneprod1 \
800-
*zv=_mm256_cvtsd_f64(acc000); \
800+
*(I*)zv=_mm256_extract_epi64(_mm256_castpd_si256(acc000),0x0); /* AVX2 *zv=_mm256_cvtsd_f64(acc000); */ \
801801
zv+=1; \
802802
} \
803803
if(!--j)break; \
@@ -932,7 +932,7 @@ static A jtva2(J jt,AD * RESTRICT a,AD * RESTRICT w,AD * RESTRICT self,UI allran
932932
acc3=MUL_ACC(acc3,_mm256_maskload_pd(av,endmask),_mm256_maskload_pd(wv,endmask)); av+=((dplen-1)&(NPAR-1))+1; wv+=((dplen-1)&(NPAR-1))+1; \
933933
acc0=_mm256_add_pd(acc0,acc1); acc2=_mm256_add_pd(acc2,acc3); acc0=_mm256_add_pd(acc0,acc2); /* combine accumulators vertically */ \
934934
acc0=_mm256_add_pd(acc0,_mm256_permute4x64_pd(acc0,0b11111110)); acc0=_mm256_add_pd(acc0,_mm256_permute_pd(acc0,0xf)); /* combine accumulators horizontally 01+=23, 0+=1 */ \
935-
*zv=_mm256_cvtsd_f64(acc0); ++zv;
935+
*(I*)zv=_mm256_extract_epi64(_mm256_castpd_si256(acc0),0x0); /* AVX2 *zv=_mm256_cvtsd_f64(acc0); */ ++zv;
936936
#else
937937
#define ONEPRODD D total0=0.0; D total1=0.0; if(dplen&1)total1=(D)*av++*(D)*wv++; DQ(dplen>>1, total0+=(D)*av++*(D)*wv++; total1+=(D)*av++*(D)*wv++;); *zv++=total0+total1;
938938
#endif
@@ -1088,15 +1088,15 @@ DF2(jtsumattymes1){
10881088
// the largest intermediate total encountered; sometimes we get a little more.
10891089
c0=_mm256_add_pd(c0,_mm256_permute4x64_pd(c0,0b11111110)); acc1=_mm256_permute4x64_pd(acc0,0b11111110); // c0: lo01+=lo23, acc1<-hi23
10901090
TWOSUM(acc0,acc1,acc0,c1); c0=_mm256_add_pd(c0,c1); // combine acc0 = hi0+2/1+3, c0 accumulates lo0+lo2+extension0, lo1+lo3+extension1
1091-
c0=_mm256_add_pd(c0,_mm256_permute_pd(c0,0xf)); acc1=_mm256_permute_pd(acc0,0xf); // c0[0] has total of all loe parts, acc1=hi1+hi3
1091+
c0=_mm256_add_pd(c0,_mm256_permute_pd(c0,0xf)); acc1=_mm256_permute_pd(acc0,0xf); // c0[0] has total of all low parts, acc1=hi1+hi3
10921092
TWOSUM(acc0,acc1,acc0,c1); c0=_mm256_add_pd(c0,c1); // acc0 has sum of all hi parts, c1 sum of all low parts+extensions
10931093
if(fit==1){
10941094
// normal result. Just add the extensions into the hi part
10951095
acc0=_mm256_add_pd(acc0,c0); // add low parts back into high in case there is overlap
10961096
}else{
10971097
// extended result. We must preserve the extension bits in the total and write them out
10981098
TWOSUM(acc0,c0,acc0,c1); // extended total
1099-
zv[1]=_mm256_cvtsd_f64(c1); // store it out
1099+
((I*)zv)[1]=_mm256_extract_epi64(_mm256_castpd_si256(c1),0x0); /* AVX2 zv[1]=_mm256_cvtsd_f64(c1); */ // store it out
11001100

11011101
}
11021102
#else // obsolete
@@ -1109,7 +1109,7 @@ DF2(jtsumattymes1){
11091109
acc0=_mm256_add_pd(acc0,_mm256_permute_pd(acc0,0xf));
11101110
acc0=_mm256_add_pd(acc0,c0); // add low parts back into high in case there is overlap
11111111
#endif
1112-
*zv=_mm256_cvtsd_f64(acc0); zv+=fit; // store out high (perhaps only) part
1112+
((I*)zv)[0]=_mm256_extract_epi64(_mm256_castpd_si256(acc0),0x0); /* AVX2 *zv=_mm256_cvtsd_f64(acc0); */ zv+=fit; // store out high (perhaps only) part
11131113
if(!--j)break; av=av0; // repeat a if needed
11141114
}
11151115
}

jsrc/vfrom.c

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,7 @@ static unsigned char jtmvmsparsex(J jt,void* const ctx,UI4 ti){
725725
if(unlikely(nc==0))R 0; // abort with no action if no columns (possible only for DIP)
726726
if(bv!=0&&prirow>=0)zv=0; // if we have DIP with a priority row, signal to process ALL rows in bk order. It's not really needed but it ensures that if the prirow is tied for pivot in the first
727727
// column, we will take it
728-
if(bv==0)thresh=_mm256_permute4x64_pd(thresh,0b00000000); // for Dpiv, repurpose thresh to all thresholds
728+
if(bv==0)thresh=_mm256_permute4x64_pd(thresh,0b00000000); // for Dpiv or one-column, repurpose thresh to all thresholds
729729

730730
// loop over all columns requested
731731
do{
@@ -871,8 +871,11 @@ static unsigned char jtmvmsparsex(J jt,void* const ctx,UI4 ti){
871871
oldcol=_mm256_castsi256_pd(_mm256_sub_epi64(_mm256_castpd_si256(oldcol),_mm256_castpd_si256(_mm256_cmp_pd(dotproducth,thresh,_CMP_GT_OQ))));
872872
}else{
873873
// one-column mode: just store out the values
874+
__m256d force0=_mm256_cmp_pd(_mm256_andnot_pd(sgnbit,dotproducth),thresh,_CMP_LT_OQ); // 1s where we need to clamp
875+
dotproducth=_mm256_blendv_pd(dotproducth,_mm256_setzero_pd(),force0); // set values < threshold to +0
874876
if(likely(_mm256_testc_pd(endmask,sgnbit)))_mm256_storeu_pd(zv,dotproducth);else _mm256_maskstore_pd(zv,endmask,dotproducth); // store, masking if needed
875877
if(limitrow==-3){
878+
dotproductl=_mm256_blendv_pd(dotproductl,_mm256_setzero_pd(),force0); // set values < threshold to +0
876879
if(likely(_mm256_testc_pd(endmask,sgnbit)))_mm256_storeu_pd(zv+n,dotproductl);else _mm256_maskstore_pd(zv+n,endmask,dotproductl); // repeat for low part
877880
}
878881
zv+=NPAR; // advance to next output location
@@ -1101,11 +1104,12 @@ static unsigned char jtmvmsparsex(J jt,void* const ctx,UI4 ti){
11011104

11021105
// 128!:9 matrix times sparse vector with optional early exit
11031106
// product mode:
1104-
// y is ndx;Ax;Am;Av;(M, shape m,n) where ndx is an atom
1107+
// y is ndx;Ax;Am;Av;(M, shape m,n);threshold where ndx is an atom
11051108
// if ndx<m, the column is ndx {"1 M; otherwise ((((ndx-m){Ax) ];.0 Am) {"1 M) +/@:*"1 ((ndx-m){Ax) ];.0 Av
1106-
// if M has rank 3 (with 2={.$M), do the product in extended precision
1107-
// Result for product mode (exitvec is scalar) is the product, one column of M
1109+
// if M has rank 3 (with 2={.$M), do the product in quad precision
1110+
// Result for product mode (exitvec is scalar) is the product, one column of M. Values closer to 0 than the threshold are clamped to 0
11081111
// DIP/Dpiv mode:
1112+
// Only the high part of M is used if it is quad-precision
11091113
// y is ndx;Ax;Am;Av;(M, shape m,n);bkgrd;(ColThreshold/PivTol,MinPivot,bkmin,NFreeCols,NCols,ImpFac,Virtx/Dpivdir);bk/'';Frow[;exclusion list/Dpiv;Yk]
11101114
// Result is rc,best row,best col,#cols scanned,#dot-products evaluated,best gain (if rc e. 0 1 2)
11111115
// rc,failing column of NTT, an element of ndx (if rc=4)
@@ -1154,7 +1158,7 @@ F1(jtmvmsparse){PROLOG(832);
11541158
I n=AS(C(AAV(w)[4]))[1]; // n=#rows/cols in M
11551159
// convert types as needed; set ?v=pointer to data area for ?
11561160
D *bv; // pointer to b values if there are any
1157-
__m256d thresh; // ColThr Inf bkmin MinPivot validity thresholds, small positive values
1161+
__m256d thresh; // ColThr Inf bkmin MinPivot validity thresholds, small positive values - for one-column mode, all lanes have the threshold for zero-clamp
11581162
I bestcol=1LL<<(BW-1), bestcolrow=0; // col# and row#+mask for best value found from previous column, init to no col found, and best value 'dangerous or not found'
11591163
// obsolete 1LL<<(32+3);
11601164
A z; D *zv; D *Frow; // pointer to output for product mode, Frow
@@ -1166,11 +1170,13 @@ F1(jtmvmsparse){PROLOG(832);
11661170

11671171
if(AR(C(AAV(w)[0]))==0){
11681172
// single index value. set bv=0, zv non0 as a flag that we are storing the column
1169-
bv=0; ASSERT(AN(w)==5,EVLENGTH); // if goodvec is an atom, set bv=0 to indicate that bv is not used and verify no more input
1173+
bv=0; ASSERT(AN(w)==6,EVLENGTH); // if goodvec is an atom, set bv=0 to indicate that bv is not used and verify no more input
11701174
if(unlikely(n==0)){R reshape(drop(num(-1),shape(C(AAV(w)[4]))),zeroionei(0));} // empty M, each product is 0
1175+
ASSERT(AR(C(AAV(w)[5]))==0,EVRANK); ASSERT(AT(C(AAV(w)[5]))&FL,EVDOMAIN); // thresh must be a float atom
11711176
I epcol=AR(C(AAV(w)[4]))==3; // flag if we are doing an extended-precision column fetch
11721177
GATV(z,FL,n<<epcol,1+epcol,AS(C(AAV(w)[4]))); zv=DAV(z); // allocate the result area for column extraction. Set zv nonzero so we use bkgrd of i. #M
11731178
bvgrd0=0; bvgrde=bvgrd0+n; // length of column is #M
1179+
thresh=_mm256_set1_pd(DAV(C(AAV(w)[5]))[0]); // load threshold in all lanes
11741180
}else{
11751181
// A list of index values. We are doing the DIP calculation or Dpiv
11761182
ASSERT(AR(C(AAV(w)[5]))==1,EVRANK); ASSERT(AN(C(AAV(w)[5]))==0||AT(C(AAV(w)[5]))&INT,EVDOMAIN); bvgrd0=IAV(C(AAV(w)[5])); bvgrde=bvgrd0+AN(C(AAV(w)[5])); // bkgrd: the order of processing the rows, and end+1 ptr normally /: bk
@@ -1180,6 +1186,7 @@ F1(jtmvmsparse){PROLOG(832);
11801186
if(AN(C(AAV(w)[8]))==0)Frow=0;else{ASSERT(AT(C(AAV(w)[8]))&FL,EVDOMAIN); ASSERT(AN(C(AAV(w)[8]))==AS(C(AAV(w)[4]))[0]+AS(C(AAV(w)[1]))[0],EVLENGTH); Frow=DAV(C(AAV(w)[8]));} // if Frow omitted we are looking to make bks nonzero
11811187
ASSERT(AR(C(AAV(w)[6]))<=1,EVRANK); ASSERT(AT(C(AAV(w)[6]))&FL,EVDOMAIN); ASSERT(AN(C(AAV(w)[6]))==7,EVLENGTH); // 7 float constants
11821188
if(unlikely(n==0)){RETF(num(6))} // empty M - should not occur, give error result 6
1189+
DO(AN(C(AAV(w)[5])), ASSERT((UI)bvgrd0[i]<(UI)n,EVINDEX); ) // verify bv indexes in bounds if M not empty
11831190
bkmin=DAV(C(AAV(w)[6]))[2];
11841191
thresh=_mm256_set_pd(DAV(C(AAV(w)[6]))[1],bkmin,inf,DAV(C(AAV(w)[6]))[0]); nfreecolsd=(DAV(C(AAV(w)[6]))[3]); ncolsd=(DAV(C(AAV(w)[6]))[4]); impfac=DAV(C(AAV(w)[6]))[5]; prirow=(I)DAV(C(AAV(w)[6]))[6];
11851192
ASSERT(AR(C(AAV(w)[7]))<=1,EVRANK);
@@ -1236,6 +1243,7 @@ struct mvmctx opctx={.ctxlock=0,.abortcolandrow=-1,.bestcolandrow={-1,-1},YC(ndx
12361243
#endif
12371244
}
12381245

1246+
#if C_AVX2
12391247
// everything we need for one core's execution
12401248
struct __attribute__((aligned(CACHELINESIZE))) ekctx {
12411249
I taskmask; // a bit for each task as we take it for work
@@ -1248,8 +1256,6 @@ struct __attribute__((aligned(CACHELINESIZE))) ekctx {
12481256
A newrownon0;
12491257
D relfuzz;
12501258
} ;
1251-
1252-
12531259
// the processing loop for one core. We take a slice of the columns depending on our proc# in the threadpool
12541260
// ti is the job#, not used except to detect error
12551261
static unsigned char jtekupdatex(J jt,void* const ctx,UI4 ti){
@@ -1263,7 +1269,7 @@ static unsigned char jtekupdatex(J jt,void* const ctx,UI4 ti){
12631269
__m256d mrelfuzz=_mm256_set1_pd(relfuzz); // comparison tolerance
12641270
__m256d sgnbit=_mm256_broadcast_sd((D*)&Iimin);
12651271
I dpflag=0; // precision flags: 1=Qk 2=pivotcolnon0 4=newrownon0
1266-
D *qkv=DAV(qk); I qksize=AS(qk)[AR(qk)-1]; I qksizesq=qksize*qksize; dpflag|=AR(qk)>2; // pointer to qk data, length of a row, offset to low part if present
1272+
D *qkv=DAV(qk); I qksize=AS(qk)[AR(qk)-1]; I t=AR(prx)+1; t=(t!=1)?qksize:t; I qksizesq=qksize*t; dpflag|=AR(qk)>AR(prx)+1; // pointer to qk data, length of a row, offset to low part if present. offset is qksize^2, or bksize
12671273
UI rowx=0, rown=AN(prx); I *rowxv=IAV(prx); D *pcn0v=DAV(pivotcolnon0); dpflag|=(AR(pivotcolnon0)>1)<<1; // current row, # rows, address of row indexes, column data
12681274
UI coln=AN(pcx); I *colxv=IAV(pcx); D *prn0v=DAV(newrownon0); dpflag|=(AR(newrownon0)>1)<<2; // # cols, address of col indexes. row data
12691275
// for each row
@@ -1352,28 +1358,33 @@ static unsigned char jtekupdatex(J jt,void* const ctx,UI4 ti){
13521358
}
13531359

13541360
// 128!:12 calculate
1355-
// Qk=: (((<prx;pcx) { Qk) ((~:!.relfuzz) * -) pivotcolnon0 */ newrownon0) (<prx;pcx)} Qk
1361+
// Qk/bk=: (((<prx;pcx) { Qk) ((~:!.relfuzz) * -) pivotcolnon0 */ newrownon0) (<prx;pcx)} Qk/bk
13561362
// with high precision
13571363
// a is prx;pcx;pivotcolnon0;newrownon0;relfuzz
1358-
// w is Qk
1359-
// If Qk has rank 3, or pivotcolnon0/newrownon0 rank 2, calculate them in extended FP precision
1360-
// Qk is modified in place
1364+
// w is Qk or bk. If bk, prx must be scalar 0
1365+
// If Qk has rank > rank newrownon0 + rank prx, or pivotcolnon0/newrownon0 rank 2, calculate them in extended FP precision
1366+
// Qk/bk is modified in place
13611367
F2(jtekupdate){F2PREFIP;
13621368
ARGCHK2(a,w);
13631369
// extract the inputs
1364-
A qk=w; ASSERT(AT(w)&FL,EVDOMAIN) ASSERT(ASGNINPLACESGN(SGNIF(jtinplace,JTINPLACEWX),w),EVNONCE) ASSERT(BETWEENC(AR(w),2,3),EVRANK)
1365-
ASSERT(AR(w)==2||AS(w)[0]==2, EVLENGTH) ASSERT(AS(w)[AR(w)-1]==AS(w)[AR(w)-2],EVLENGTH)
1370+
A qk=w; ASSERT(AT(w)&FL,EVDOMAIN) ASSERT(ASGNINPLACESGN(SGNIF(jtinplace,JTINPLACEWX),w),EVNONCE)
13661371
ASSERT(AT(a)&BOX,EVDOMAIN) ASSERT(AR(a)==1,EVRANK) ASSERT(AN(a)==5,EVLENGTH) // a is 5 boxes
1367-
A prx=AAV(a)[0]; ASSERT(AT(prx)&INT,EVDOMAIN) ASSERT(AR(prx)==1,EVRANK) // prx is integer list
1372+
A prx=AAV(a)[0]; ASSERT(AT(prx)&INT,EVDOMAIN) ASSERT(AR(prx)<=1,EVRANK) // prx is integer list or atom
13681373
A pcx=AAV(a)[1]; ASSERT(AT(pcx)&INT,EVDOMAIN) ASSERT(AR(pcx)==1,EVRANK) // pcx is integer list
13691374
A pivotcolnon0=AAV(a)[2]; ASSERT(AT(pivotcolnon0)&FL,EVDOMAIN) ASSERT(BETWEENC(AR(pivotcolnon0),1,2),EVRANK)
13701375
ASSERT(AR(pivotcolnon0)==1||AS(pivotcolnon0)[0]==2, EVLENGTH) // pivotcolnon0 is float or extended list
13711376
A newrownon0=AAV(a)[3]; ASSERT(AT(newrownon0)&FL,EVDOMAIN) ASSERT(BETWEENC(AR(newrownon0),1,2),EVRANK)
1372-
ASSERT(AR(newrownon0)==1||AS(newrownon0)[0]==2, EVLENGTH) // newrownon0 is float or extended list
1377+
ASSERT(AR(newrownon0)==1||AS(newrownon0)[0]==2,EVLENGTH) // newrownon0 is float or extended list
13731378
A tmp=AAV(a)[4]; if(!(AT(tmp)&FL))RZ(tmp=cvt(FL,tmp)); ASSERT(AR(tmp)==0,EVRANK) D relfuzz=DAV(tmp)[0]; // relfuzz is a float atom
13741379
// agreement
1380+
ASSERT(BETWEENC(AR(w),AR(prx)+1,AR(prx)+2),EVRANK) // Qk is nxn; bk is n, treated as a single row. Each may be quadprec
1381+
ASSERT(AR(w)==AR(prx)+1||AS(w)[0]==2,EVLENGTH)
1382+
if(AR(prx)!=0){ASSERT(AS(w)[AR(w)-1]==AS(w)[AR(w)-2],EVLENGTH) DO(AN(prx), ASSERT(IAV(prx)[i]<AS(w)[AR(w)-2],EVINDEX))} else{ASSERT(IAV(prx)[0]==0,EVINDEX)} // Qk must be square; bk not; valid row indexes
13751383
ASSERT(AN(prx)==AS(pivotcolnon0)[AR(pivotcolnon0)-1],EVLENGTH) ASSERT(AN(pcx)==AS(newrownon0)[AR(newrownon0)-1],EVLENGTH) // indexes and values must agree
1384+
// audit the indexes
1385+
DO(AN(pcx), ASSERT(IAV(pcx)[i]<AS(w)[AR(w)-1],EVINDEX)) // verify valid column indexes
13761386
// do the work
1387+
13771388
#define YC(n) .n=n,
13781389
struct ekctx opctx={YC(prx)YC(qk)YC(pcx)YC(pivotcolnon0)YC(newrownon0)YC(relfuzz)};
13791390

@@ -1382,3 +1393,6 @@ struct ekctx opctx={YC(prx)YC(qk)YC(pcx)YC(pivotcolnon0)YC(newrownon0)YC(relfuzz
13821393

13831394
R qk;
13841395
}
1396+
#else
1397+
F2(jtekupdate){ASSERT(0,EVNONCE);}
1398+
#endif

jsrc/xs.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ void setftype(C*v,OSType type,OSType crea){C p[256];FInfo f;
4343
/* tso: echo to stdout */
4444

4545
#define SEEKLEAK 0
46+
47+
// handler for load command, 0!:0-112
4648
static A jtline(J jt,A w,I si,C ce,B tso){A x=mtv,z;DC d;
4749
#if NAMETRACK
4850
// bring out the name, locale, and script into easy-to-display name
@@ -159,6 +161,7 @@ F1(jtscriptnum){
159161
R rv; // return prev value
160162
}
161163

164+
// entry points for 0!:0-0!:112
162165
F1(jtscm00 ){I r; ARGCHK1(w); r=ISDENSETYPE(AT(w),LIT+C2T+C4T); F1RANK( r,jtscm00, DUMMYSELF); R r?line(w,-1L,0,0):linf(mark,w,0,0);}
163166
F1(jtscm01 ){I r; ARGCHK1(w); r=ISDENSETYPE(AT(w),LIT+C2T+C4T); F1RANK( r,jtscm01, DUMMYSELF); R r?line(w,-1L,0,1):linf(mark,w,0,1);}
164167
F1(jtscm10 ){I r; ARGCHK1(w); r=ISDENSETYPE(AT(w),LIT+C2T+C4T); F1RANK( r,jtscm10, DUMMYSELF); R r?line(w,-1L,1,0):linf(mark,w,1,0);}

0 commit comments

Comments
 (0)