Skip to content

Commit 7c55463

Browse files
committed
Update dtype for NaN values to use float32 instead of string
1 parent 199cdab commit 7c55463

4 files changed

Lines changed: 148 additions & 271 deletions

File tree

danfojs-browser/src/core/utils.js

Lines changed: 63 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -210,148 +210,82 @@ export class Utils {
210210
}
211211
}
212212

213-
//infer types from an array of array
214-
__get_t(arr_val) {
215-
if (this.__is_1D_array(arr_val)) {
216-
let dtypes = [];
217-
let int_tracker = [];
218-
let float_tracker = [];
219-
let string_tracker = [];
220-
let bool_tracker = [];
221-
let lim;
222-
223-
//remove NaNs from array
224-
let arr = [];
225-
arr_val.map((val) => {
226-
if (!(isNaN(val) && typeof val != "string")) {
227-
arr.push(val);
228-
}
229-
});
230-
231-
if (arr.length == 0){
232-
dtypes.push("string");
233-
}
213+
__checker(arr_val){
214+
let dtypes = [];
215+
let lim;
216+
let int_tracker = [];
217+
let float_tracker = [];
218+
let string_tracker = [];
219+
let bool_tracker = [];
220+
221+
if (arr_val.length == 0){
222+
dtypes.push("string");
223+
}
234224

235-
if (arr.length < config.get_dtype_test_lim) {
236-
lim = arr.length - 1;
237-
} else {
238-
lim = config.get_dtype_test_lim - 1;
239-
}
240-
arr.forEach((ele, indx) => {
241-
let count = indx;
242-
if (typeof ele == "boolean") {
243-
float_tracker.push(false);
225+
if (arr_val.length < config.get_dtype_test_lim) {
226+
lim = arr_val.length - 1;
227+
} else {
228+
lim = config.get_dtype_test_lim - 1;
229+
}
230+
arr_val.forEach((ele, indx) => {
231+
let count = indx;
232+
if (typeof ele == "boolean") {
233+
float_tracker.push(false);
234+
int_tracker.push(false);
235+
string_tracker.push(false);
236+
bool_tracker.push(true);
237+
} else if (isNaN(ele) && typeof ele != "string"){
238+
float_tracker.push(true);
239+
int_tracker.push(false);
240+
string_tracker.push(false);
241+
bool_tracker.push(false);
242+
} else if (!isNaN(Number(ele))) {
243+
if (ele.toString().includes(".")) {
244+
float_tracker.push(true);
244245
int_tracker.push(false);
245246
string_tracker.push(false);
246-
bool_tracker.push(true);
247-
} else if (!isNaN(Number(ele))) {
248-
if (ele.toString().includes(".")) {
249-
float_tracker.push(true);
250-
int_tracker.push(false);
251-
string_tracker.push(false);
252-
bool_tracker.push(false);
253-
} else {
254-
float_tracker.push(false);
255-
int_tracker.push(true);
256-
string_tracker.push(false);
257-
bool_tracker.push(false);
258-
}
247+
bool_tracker.push(false);
259248
} else {
260249
float_tracker.push(false);
261-
int_tracker.push(false);
262-
string_tracker.push(true);
250+
int_tracker.push(true);
251+
string_tracker.push(false);
263252
bool_tracker.push(false);
264253
}
254+
} else {
255+
float_tracker.push(false);
256+
int_tracker.push(false);
257+
string_tracker.push(true);
258+
bool_tracker.push(false);
259+
}
265260

266-
if (count == lim) {
261+
if (count == lim) {
267262
//if atleast one string appears return string dtype
268-
const even = (element) => element == true;
269-
if (string_tracker.some(even)) {
270-
dtypes.push("string");
271-
} else if (float_tracker.some(even)) {
272-
dtypes.push("float32");
273-
} else if (int_tracker.some(even)) {
274-
dtypes.push("int32");
275-
} else if (bool_tracker.some(even)) {
276-
dtypes.push("boolean");
277-
} else {
278-
dtypes.push("undefined");
279-
}
280-
}
281-
});
282-
283-
return dtypes;
284-
} else {
285-
let dtypes = [];
286-
let lim;
287-
288-
arr_val.forEach((ele) => {
289-
let int_tracker = [];
290-
let float_tracker = [];
291-
let string_tracker = [];
292-
let bool_tracker = [];
293-
294-
//remove NaNs from array before checking dtype
295-
let arr = [];
296-
ele.map((val) => {
297-
if (!(isNaN(val) && typeof val != "string")) {
298-
arr.push(val);
299-
}
300-
});
301-
302-
if (arr.length == 0){
303-
dtypes.push("string");
304-
}
305-
306-
if (arr.length < config.get_dtype_test_lim) {
307-
lim = arr.length - 1;
263+
const even = (element) => element == true;
264+
if (string_tracker.some(even)) {
265+
dtypes = "string";
266+
} else if (float_tracker.some(even)) {
267+
dtypes = "float32";
268+
} else if (int_tracker.some(even)) {
269+
dtypes = "int32";
270+
} else if (bool_tracker.some(even)) {
271+
dtypes = "boolean";
308272
} else {
309-
lim = config.get_dtype_test_lim - 1;
273+
dtypes = "undefined";
310274
}
311-
arr.forEach((ele, indx) => {
312-
let count = indx;
313-
if (typeof ele == "boolean") {
314-
float_tracker.push(false);
315-
int_tracker.push(false);
316-
string_tracker.push(false);
317-
bool_tracker.push(true);
318-
} else if (!isNaN(Number(ele))) {
319-
if (ele.toString().includes(".")) {
320-
float_tracker.push(true);
321-
int_tracker.push(false);
322-
string_tracker.push(false);
323-
bool_tracker.push(false);
324-
} else {
325-
float_tracker.push(false);
326-
int_tracker.push(true);
327-
string_tracker.push(false);
328-
bool_tracker.push(false);
329-
}
330-
} else {
331-
float_tracker.push(false);
332-
int_tracker.push(false);
333-
string_tracker.push(true);
334-
bool_tracker.push(false);
335-
}
275+
}
276+
});
336277

337-
if (count == lim) {
338-
//if atleast one string appears return string dtype
339-
const even = (element) => element == true;
340-
if (string_tracker.some(even)) {
341-
dtypes.push("string");
342-
} else if (float_tracker.some(even)) {
343-
dtypes.push("float32");
344-
} else if (int_tracker.some(even)) {
345-
dtypes.push("int32");
346-
} else if (bool_tracker.some(even)) {
347-
dtypes.push("boolean");
348-
} else {
349-
dtypes.push("undefined");
350-
}
351-
}
352-
});
278+
return dtypes;
279+
}
280+
//infer types from an array of array
281+
__get_t(arr_val) {
282+
const self = this;
283+
if (this.__is_1D_array(arr_val)) {
284+
return [ this.__checker(arr_val) ];
285+
} else {
286+
const dtypes = arr_val.map((arr) => {
287+
return self.__checker(arr);
353288
});
354-
355289
return dtypes;
356290
}
357291
}

danfojs-browser/tests/core/utils.js

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,18 @@ describe("Utils Functions", function () {
7070
let result = [ 'float32' ];
7171
assert.deepEqual(utils.__get_t(data), result);
7272
});
73-
73+
it("Returns float type when NaN is present in an 1D array", function () {
74+
let data = [ 1, 2, 3, 45, NaN ];
75+
let result = [ 'float32' ];
76+
assert.deepEqual(utils.__get_t(data), result);
77+
});
78+
it("Returns correct dtype if NaN present in data", function () {
79+
let data = [
80+
[ 18.7, 17.4, 18, NaN, 19.3 ],
81+
[ 20, NaN, 19, 18, 20 ] ];
82+
let result = [ 'float32', 'float32' ];
83+
assert.deepEqual(utils.__get_t(data), result);
84+
});
7485
it("Returns the data type present in an 2D array", function () {
7586
let data = [ [ 'Alice', 'Boy', 'Girl', "39" ], [ 2, 5, 30, 89 ], [ 3.1, 6.1, 40.1, 78.2 ] ];
7687
let result = [ 'string', 'int32', 'float32' ];
@@ -91,16 +102,9 @@ describe("Utils Functions", function () {
91102
let result = [ 'boolean', 'string', 'int32' ];
92103
assert.deepEqual(utils.__get_t(data), result);
93104
});
94-
it("Returns string type if values are NaN", function () {
105+
it("Returns string type if values are all NaN", function () {
95106
let data = [ [ true, false, true ], [ "boy", "girl", "boy" ], [ NaN, undefined, NaN ] ];
96-
let result = [ 'boolean', 'string', 'string' ];
97-
assert.deepEqual(utils.__get_t(data), result);
98-
});
99-
it("Returns correct dtype if NaN present in data", function () {
100-
let data = [
101-
[ 18.7, 17.4, 18, NaN, 19.3 ],
102-
[ 20, NaN, 19, 18, 20 ] ];
103-
let result = [ 'float32', 'int32' ];
107+
let result = [ 'boolean', 'string', 'float32' ];
104108
assert.deepEqual(utils.__get_t(data), result);
105109
});
106110
});

0 commit comments

Comments
 (0)