Skip to content

Commit db3a551

Browse files
authored
Merge pull request #147 from opensource9ja/fix/dtype-check
Fix dtype check bug
2 parents d19f267 + 7c55463 commit db3a551

4 files changed

Lines changed: 157 additions & 252 deletions

File tree

danfojs-browser/src/core/utils.js

Lines changed: 65 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -210,145 +210,87 @@ export class Utils {
210210
}
211211
}
212212

213-
//infer types from an array of array
214-
__get_t(arr_val) {
215-
if (this.__is_1D_array(arr_val)) {
216-
const dtypes = [];
217-
let int_tracker = [];
218-
let float_tracker = [];
219-
let string_tracker = [];
220-
let bool_tracker = [];
221-
let lim;
222-
223-
//remove NaNs from array
224-
let arr = [];
225-
arr_val.map((val) => {
226-
if (!(isNaN(val) && typeof val != "string")) {
227-
arr.push(val);
228-
}
229-
});
213+
__checker(arr_val){
214+
let dtypes = [];
215+
let lim;
216+
let int_tracker = [];
217+
let float_tracker = [];
218+
let string_tracker = [];
219+
let bool_tracker = [];
220+
221+
if (arr_val.length == 0){
222+
dtypes.push("string");
223+
}
230224

231-
if (arr.length < config.get_dtype_test_lim) {
232-
lim = arr.length - 1;
233-
} else {
234-
lim = config.get_dtype_test_lim - 1;
235-
}
236-
arr.forEach((ele, indx) => {
237-
let count = indx;
238-
if (typeof ele == "boolean") {
239-
float_tracker.push(false);
225+
if (arr_val.length < config.get_dtype_test_lim) {
226+
lim = arr_val.length - 1;
227+
} else {
228+
lim = config.get_dtype_test_lim - 1;
229+
}
230+
arr_val.forEach((ele, indx) => {
231+
let count = indx;
232+
if (typeof ele == "boolean") {
233+
float_tracker.push(false);
234+
int_tracker.push(false);
235+
string_tracker.push(false);
236+
bool_tracker.push(true);
237+
} else if (isNaN(ele) && typeof ele != "string"){
238+
float_tracker.push(true);
239+
int_tracker.push(false);
240+
string_tracker.push(false);
241+
bool_tracker.push(false);
242+
} else if (!isNaN(Number(ele))) {
243+
if (ele.toString().includes(".")) {
244+
float_tracker.push(true);
240245
int_tracker.push(false);
241246
string_tracker.push(false);
242-
bool_tracker.push(true);
243-
} else if (!isNaN(Number(ele))) {
244-
if (ele.toString().includes(".")) {
245-
float_tracker.push(true);
246-
int_tracker.push(false);
247-
string_tracker.push(false);
248-
bool_tracker.push(false);
249-
} else {
250-
float_tracker.push(false);
251-
int_tracker.push(true);
252-
string_tracker.push(false);
253-
bool_tracker.push(false);
254-
}
247+
bool_tracker.push(false);
255248
} else {
256249
float_tracker.push(false);
257-
int_tracker.push(false);
258-
string_tracker.push(true);
250+
int_tracker.push(true);
251+
string_tracker.push(false);
259252
bool_tracker.push(false);
260253
}
261-
262-
if (count == lim) {
263-
//if atleast one string appears return string dtype
264-
const even = (element) => element == true;
265-
if (string_tracker.some(even)) {
266-
dtypes.push("string");
267-
} else if (float_tracker.some(even)) {
268-
dtypes.push("float32");
269-
} else if (int_tracker.some(even)) {
270-
dtypes.push("int32");
271-
} else if (bool_tracker.some(even)) {
272-
dtypes.push("boolean");
273-
} else {
274-
dtypes.push("undefined");
275-
}
276-
}
277-
});
278-
279-
return dtypes;
280-
} else {
281-
const dtypes = [];
282-
let lim;
283-
if (arr_val[0].length < config.get_dtype_test_lim) {
284-
lim = arr_val[0].length - 1;
285254
} else {
286-
lim = config.get_dtype_test_lim - 1;
255+
float_tracker.push(false);
256+
int_tracker.push(false);
257+
string_tracker.push(true);
258+
bool_tracker.push(false);
287259
}
288-
arr_val.forEach((ele) => {
289-
let int_tracker = [];
290-
let float_tracker = [];
291-
let string_tracker = [];
292-
let bool_tracker = [];
293-
294-
//remove NaNs from array before checking dtype
295-
let arr = [];
296-
ele.map((val) => {
297-
if (!(isNaN(val) && typeof val != "string")) {
298-
arr.push(val);
299-
} else {
300-
arr.push("NaN"); //set NaN to string and return dtype ""string". The caller should explicitly convert the dtype
301-
}
302-
});
303260

304-
arr.forEach((ele, indx) => {
305-
let count = indx;
306-
if (typeof ele == "boolean") {
307-
float_tracker.push(false);
308-
int_tracker.push(false);
309-
string_tracker.push(false);
310-
bool_tracker.push(true);
311-
} else if (!isNaN(Number(ele))) {
312-
if (ele.toString().includes(".")) {
313-
float_tracker.push(true);
314-
int_tracker.push(false);
315-
string_tracker.push(false);
316-
bool_tracker.push(false);
317-
} else {
318-
float_tracker.push(false);
319-
int_tracker.push(true);
320-
string_tracker.push(false);
321-
bool_tracker.push(false);
322-
}
323-
} else {
324-
float_tracker.push(false);
325-
int_tracker.push(false);
326-
string_tracker.push(true);
327-
bool_tracker.push(false);
328-
}
261+
if (count == lim) {
262+
//if atleast one string appears return string dtype
263+
const even = (element) => element == true;
264+
if (string_tracker.some(even)) {
265+
dtypes = "string";
266+
} else if (float_tracker.some(even)) {
267+
dtypes = "float32";
268+
} else if (int_tracker.some(even)) {
269+
dtypes = "int32";
270+
} else if (bool_tracker.some(even)) {
271+
dtypes = "boolean";
272+
} else {
273+
dtypes = "undefined";
274+
}
275+
}
276+
});
329277

330-
if (count == lim) {
331-
//if atleast one string appears return string dtype
332-
const even = (element) => element == true;
333-
if (string_tracker.some(even)) {
334-
dtypes.push("string");
335-
} else if (float_tracker.some(even)) {
336-
dtypes.push("float32");
337-
} else if (int_tracker.some(even)) {
338-
dtypes.push("int32");
339-
} else if (bool_tracker.some(even)) {
340-
dtypes.push("boolean");
341-
} else {
342-
dtypes.push("undefined");
343-
}
344-
}
345-
});
278+
return dtypes;
279+
}
280+
//infer types from an array of array
281+
__get_t(arr_val) {
282+
const self = this;
283+
if (this.__is_1D_array(arr_val)) {
284+
return [ this.__checker(arr_val) ];
285+
} else {
286+
const dtypes = arr_val.map((arr) => {
287+
return self.__checker(arr);
346288
});
347-
348289
return dtypes;
349290
}
350291
}
351292

293+
352294
__unique(data) {
353295
let unique = new Set();
354296

danfojs-browser/tests/core/utils.js

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,18 @@ describe("Utils Functions", function () {
7070
let result = [ 'float32' ];
7171
assert.deepEqual(utils.__get_t(data), result);
7272
});
73-
73+
it("Returns float type when NaN is present in an 1D array", function () {
74+
let data = [ 1, 2, 3, 45, NaN ];
75+
let result = [ 'float32' ];
76+
assert.deepEqual(utils.__get_t(data), result);
77+
});
78+
it("Returns correct dtype if NaN present in data", function () {
79+
let data = [
80+
[ 18.7, 17.4, 18, NaN, 19.3 ],
81+
[ 20, NaN, 19, 18, 20 ] ];
82+
let result = [ 'float32', 'float32' ];
83+
assert.deepEqual(utils.__get_t(data), result);
84+
});
7485
it("Returns the data type present in an 2D array", function () {
7586
let data = [ [ 'Alice', 'Boy', 'Girl', "39" ], [ 2, 5, 30, 89 ], [ 3.1, 6.1, 40.1, 78.2 ] ];
7687
let result = [ 'string', 'int32', 'float32' ];
@@ -91,9 +102,9 @@ describe("Utils Functions", function () {
91102
let result = [ 'boolean', 'string', 'int32' ];
92103
assert.deepEqual(utils.__get_t(data), result);
93104
});
94-
it("Returns string type if values are NaN", function () {
105+
it("Returns string type if values are all NaN", function () {
95106
let data = [ [ true, false, true ], [ "boy", "girl", "boy" ], [ NaN, undefined, NaN ] ];
96-
let result = [ 'boolean', 'string', 'string' ];
107+
let result = [ 'boolean', 'string', 'float32' ];
97108
assert.deepEqual(utils.__get_t(data), result);
98109
});
99110
});

0 commit comments

Comments
 (0)