@@ -108,6 +108,7 @@ static NUMH(jtnumj){C*t,*ta;D x,y;Z*v;
108108 R 1 ;
109109}
110110
111+ // return 0 if overflow or not numeric, 1 otherwise
111112static NUMH (jtnumi ){UI neg ;UI j ; // must be UI to avoid signed overflow
112113 neg = '-' == s [0 ]; s += neg ; n -= neg ; if (!n )R 0 ; // extract & skip sign; exit if no digits
113114 for (;* s == '0' ;-- n ,++ s ); // skip leading zeros, even down to nothing, which will be 0 value
@@ -312,7 +313,7 @@ A jtconnum(J jt,I n,C*s){PROLOG(0101);A y,z;B (*f)(J,I,C*,void*),p=1;C c,*v;I d=
312313 k = bpnoun (t ); // size in bytes of 1 result value
313314 GA0 (z ,t ,m ,1 != m ); v = CAVn (1 != m ,z );
314315 if (t == INT ){ // if we think the values are ints, see if they really are
315- DO (m , d = i + i ; e = yv [d ]; if (!numi (yv [1 + d ]- e ,e + s ,v )){t = FL ; break ;} v += k ;); // read all values, stopping if a value overflows
316+ DO (m , d = i + i ; e = yv [d ]; if (!numi (yv [1 + d ]- e ,e + s ,v )){t = FL ; bcvtmask |= 6 ; break ;} v += k ;); // read all values, stopping if a value overflows. In that case, suppress conversion to INT
316317 if (t != INT ){f = jtnumfd ; if (SZI == SZD ){AT (z )= FL ;}else {GATV0 (z ,FL ,m ,1 != m );} v = CAVn (1 != m ,z );} // if there was overflow, repurpose/allocate the input with enough space for floats
317318 }
318319 if (t != INT )DO (m , d = i + i ; e = yv [d ]; ASSERT (f (jt ,yv [1 + d ]- e ,e + s ,v ),EVILNUM ); v += k ;); // read the values as larger-than-int
@@ -348,14 +349,14 @@ static EXEC2F(jtexec2z,numbpx,CMPX,Z)
348349
349350// Try to convert a numeric field to integer. Result is the integer, and *out is the
350351// output pointer after the integer - this will be either the \0 at the end of the field, or
351- // ==in to indicate failure. Result is 0 on failure, and the output pointer equals the input.
352+ // ==in to indicate failure. On failure, the output pointer equals the input; the result is 0 for invalid number, 1 for overflow .
352353// This routine must recognize all valid ints, We accept at most one sign, followed by any number
353354// of digits or commas, followed optionally by a decimal point, followed optionally by a
354355// string of 0s. So, 123,456.00 is recognized as an integer. If the input overflows an
355356// integer, failure. For compatibility with non-integer code, we allow commas anywhere in
356357// the number except the beginning or end.
357358// This routine assumes 64-bit signed integers.
358- I strtoint (C * in , C * * out ) {
359+ static I strtoint (C * in , C * * out ) {
359360 UI res = 0 ; // init result
360361 I neg , dig ; // negative flag, digit value
361362 * out = in ; // assume failure
@@ -370,7 +371,7 @@ I strtoint(C* in, C** out) {
370371 if ((UI )dig <=(UI )9 ){ // numeric digit. Accept it and check for overflow
371372 if (res >= 1 + IMAX /10 ) R 0 ; // fail if this will overflow for sure. res could be IMIN
372373 res = res * 10 + dig ; // accept the digit. This may overflow, but that's not fatal yet if it overflows to IMIN
373- if ((I )((UI )0 - res ) > 0 )R 0 ; // If result overflowed to neg, fail. We allow IMIN to continue on, representing IMAX+1
374+ if ((I )((UI )0 - res ) > 0 )R 1 ; // If result overflowed to neg, fail. We allow IMIN to continue on, representing IMAX+1
374375 continue ;
375376 }
376377 if (* in == C0 || * in == '.' )break ; // end-of-field or end-of-integer part: exit
@@ -403,9 +404,10 @@ I strtoint(C* in, C** out) {
403404static A jtexec2r (J jt ,A a ,A w ,I n ,I m ,I c ,I fillreqd ){A z ;B b ,e ;C d ,* u ,* uu ,* v ,* x ,* y ;D a0 ,* zv ;I k ,j ,mc ,r ;
404405 B tryingint ; // set if we have to attempt to convert to int before float, if ints can hold
405406 // higher precision than float
406- B valueisint ; // set if the value we are processing is really an int
407+ B valueisint ; // set if the value we are processing is really an int
407408 // Calculate total # result values; set input scan pointer u; set &next row of input y; set end-of-input pointer uu
408409 // set end-of-result-row counter j
410+ B intoflo = 0 ; // if we hit integer overflow, we must return CONJ in type to suppress conversion to INT
409411 k = 0 ; mc = m * c ; u = CAV (w ); y = u + n ; j = c ; uu = u + AN (w );
410412 // Rank of result is rank of w, unless the rows have only 1 value; make rows atoms then, removing them from rank
411413 r = AR (w )- (I )(1 == c ); r = MAX (0 ,r );
@@ -429,11 +431,11 @@ B valueisint; // set if the value we are processing is really an int
429431 if (k >=mc )break ; // exit loop if all inputs processed
430432 // Read a number from the input, leaving v pointing to the character that stopped the conversion
431433 // If we are trying ints first to avoid floating-point truncation, do so
432- if (valueisint = tryingint ) {
433- ((I * )zv )[k ] = strtoint (u ,& v ); // returns an I, which we store into the nominally-D result array
434- if (u == v ){valueisint = 0 ;}
434+ if (valueisint = tryingint ) {I ival ;
435+ ((I * )zv )[k ]= ival = strtoint (u ,& v ); // returns an I, which we store into the nominally-D result array
436+ if (u == v ){valueisint = 0 ; intoflo |= ival ; }
435437 // The conversion to int failed, but that's not enough for us to write off ints. Maybe the
436- // value was invalid, and we will use the default, which is known to be int.
438+ // value was invalid, and we will use the default, which is known to be int. If overflow, remember that fact
437439 }
438440 if (!valueisint )zv [k ]= strtod (u ,(char * * )& v );
439441 // We have read a number, either as an int or a float. Analyze the stopper character
@@ -478,7 +480,7 @@ B valueisint; // set if the value we are processing is really an int
478480 }
479481 }
480482 // All done. If we ended still looking for ints, the whole result must be int, so flag it as such
481- if (tryingint )AT (z ) = INT ;
483+ if (tryingint )AT (z )= INT ; if ( unlikely ( intoflo )) AT ( z )|= CONJ ; // set CONJ as flag in type if integer overflow, which must suppress conversion to INT
482484 R z ;
483485}
484486
@@ -524,6 +526,7 @@ F2(jtexec2){F12IP;A z;B b,p;C d,*v;I at,c,i,k,m,n,r,*s;
524526 // Select the precision to use: the smallest that can hold the data, but never less than the precision of x
525527 C cvtmask = (~AT (a )& B01 )<<1 ; // if x is not B01, set mask to suppress conversion to B01
526528 cvtmask = AT (a )& B01 + INT ?cvtmask :6 ; // if not B01 or INT, suppress conversion to INT (but it may be INT already)
529+ if (unlikely (AT (z )& CONJ )){cvtmask = 6 ; AT (z )&=~CONJ ;} // if there was integer overflow, suppress conversion to INT/B01
527530 cvtmask = AT (a )& B01 + INT + FL ?cvtmask :14 ; // if not B01/INT/FL, suppress conversion to FL
528531 R bcvt (cvtmask ,z );
529532}
0 commit comments