Skip to content

Commit 499f1d7

Browse files
authored
Merge pull request #148 from opensource9ja/fix/reader
Fix reader params and test
2 parents db3a551 + c3b452e commit 499f1d7

5 files changed

Lines changed: 138 additions & 71 deletions

File tree

danfojs-browser/lib/bundle.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

danfojs-browser/lib/bundle.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

danfojs-browser/src/io/reader.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,14 @@ export const read_json = async (source) => {
4747
*
4848
* * @param {kwargs} kwargs --> {
4949
* source : string, URL or local file path to retreive Excel file.
50-
* sheet_name : string, (Optional) Name of the sheet which u want to parse. Default will be the first sheet.
50+
* sheet : string, (Optional) Name of the sheet which u want to parse. Default will be the first sheet.
5151
* header_index : int, (Optional) Index of the row which represents the header(columns) of the data. Default will be the first non empty row.
5252
* data_index : int, (Optional)Index of the row from which actual data(content) starts. Default will be the next row of `header_index`
5353
* }
5454
* @returns {Promise} DataFrame structure of parsed Excel data
5555
*/
56-
export const read_excel = async (kwargs) => {
57-
let { source, sheet_name, header_index, data_index } = kwargs;
56+
export const read_excel = async (source, configs = {}) => {
57+
let { sheet, header_index, data_index } = configs;
5858
let workbook;
5959
if (!header_index) {
6060
//default header_index
@@ -72,7 +72,7 @@ export const read_excel = async (kwargs) => {
7272

7373

7474
// Parse worksheet from workbook
75-
const worksheet = workbook.Sheets[sheet_name || workbook.SheetNames[0]];
75+
const worksheet = workbook.Sheets[sheet || workbook.SheetNames[0]];
7676
let range = XLSX.utils.decode_range(worksheet["!ref"]);
7777
let column_names = [],
7878
data = [];

danfojs-browser/tests/io/reader.js

Lines changed: 19 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,44 +12,25 @@ describe("read_csv", async function () {
1212
});
1313
});
1414

15-
// describe("read_json", async function () {
16-
// this.timeout(10000); // all tests in this suite get 10 seconds before timeout
17-
// it("reads a json file from source over the internet", async function () {
18-
// const jUrl =
19-
// "https://raw.githubusercontent.com/risenW/Tensorflowjs_Projects/master/recommender-sys/Python-Model/web_book_data.json";
20-
21-
// dfd.read_json(jUrl).then((df) => {
22-
// const num_of_columns = df.column_names.length;
23-
// assert.equal(num_of_columns, 4);
24-
// });
25-
// });
26-
27-
// it("reads a json file from source from local disk", async function () {
28-
// const jUrl = "danfojs/tests/samples/book.json";
15+
describe("read_json", async function () {
16+
this.timeout(10000); // all tests in this suite get 10 seconds before timeout
17+
it("reads a json file from source over the internet", async function () {
18+
const jUrl =
19+
"https://raw.githubusercontent.com/risenW/Tensorflowjs_Projects/master/recommender-sys/Python-Model/web_book_data.json";
2920

30-
// dfd.read_json(jUrl).then((df) => {
31-
// const num_of_columns = df.column_names.length;
32-
// assert.equal(num_of_columns, 4);
33-
// });
34-
// });
35-
// });
21+
const df = await dfd.read_json(jUrl);
22+
const num_of_columns = df.column_names.length;
23+
assert.equal(num_of_columns, 4);
3624

37-
// describe("read_excel", async function () {
38-
// this.timeout(10000); // all tests in this suite get 10 seconds before timeout
39-
// it("reads an excel file from source over the internet", async function () {
40-
// const remote_url =
41-
// "https://file-examples-com.github.io/uploads/2017/02/file_example_XLS_100.xls";
42-
// dfd.read_excel({ source: remote_url }).then((df) => {
43-
// assert(df.columns.length, 8);
44-
// });
45-
// });
25+
});
26+
});
4627

47-
// it("reads an excel file from source from local disk", async function () {
48-
// const file_url = "danfojs/tests/samples/SampleData.xlsx";
49-
// dfd.read_excel({ source: file_url, header_index: 7, data_index: 8 }).then(
50-
// (df) => {
51-
// assert(df.columns.length, 4);
52-
// }
53-
// );
54-
// });
55-
// });
28+
describe("read_excel", async function () {
29+
this.timeout(10000); // all tests in this suite get 10 seconds before timeout
30+
it("reads an excel file from source over the internet", async function () {
31+
const remote_url =
32+
"https://file-examples-com.github.io/uploads/2017/02/file_example_XLS_100.xls";
33+
const df = await dfd.read_excel(remote_url);
34+
assert(df.columns.length, 8);
35+
});
36+
});

danfojs-node/dist/core/utils.js

Lines changed: 113 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -171,35 +171,120 @@ class Utils {
171171
}
172172
}
173173

174+
__checker(arr_val) {
175+
let dtypes = [];
176+
let lim;
177+
let int_tracker = [];
178+
let float_tracker = [];
179+
let string_tracker = [];
180+
let bool_tracker = [];
181+
182+
if (arr_val.length == 0) {
183+
dtypes.push("string");
184+
}
185+
186+
if (arr_val.length < config.get_dtype_test_lim) {
187+
lim = arr_val.length - 1;
188+
} else {
189+
lim = config.get_dtype_test_lim - 1;
190+
}
191+
192+
arr_val.forEach((ele, indx) => {
193+
let count = indx;
194+
195+
if (typeof ele == "boolean") {
196+
float_tracker.push(false);
197+
int_tracker.push(false);
198+
string_tracker.push(false);
199+
bool_tracker.push(true);
200+
} else if (isNaN(ele) && typeof ele != "string") {
201+
float_tracker.push(true);
202+
int_tracker.push(false);
203+
string_tracker.push(false);
204+
bool_tracker.push(false);
205+
} else if (!isNaN(Number(ele))) {
206+
if (ele.toString().includes(".")) {
207+
float_tracker.push(true);
208+
int_tracker.push(false);
209+
string_tracker.push(false);
210+
bool_tracker.push(false);
211+
} else {
212+
float_tracker.push(false);
213+
int_tracker.push(true);
214+
string_tracker.push(false);
215+
bool_tracker.push(false);
216+
}
217+
} else {
218+
float_tracker.push(false);
219+
int_tracker.push(false);
220+
string_tracker.push(true);
221+
bool_tracker.push(false);
222+
}
223+
224+
if (count == lim) {
225+
const even = element => element == true;
226+
227+
if (string_tracker.some(even)) {
228+
dtypes = "string";
229+
} else if (float_tracker.some(even)) {
230+
dtypes = "float32";
231+
} else if (int_tracker.some(even)) {
232+
dtypes = "int32";
233+
} else if (bool_tracker.some(even)) {
234+
dtypes = "boolean";
235+
} else {
236+
dtypes = "undefined";
237+
}
238+
}
239+
});
240+
return dtypes;
241+
}
242+
174243
__get_t(arr_val) {
244+
const self = this;
245+
175246
if (this.__is_1D_array(arr_val)) {
176-
const dtypes = [];
247+
return [this.__checker(arr_val)];
248+
} else {
249+
const dtypes = arr_val.map(arr => {
250+
return self.__checker(arr);
251+
});
252+
return dtypes;
253+
}
254+
}
255+
256+
__get_tt(arr_val) {
257+
if (this.__is_1D_array(arr_val)) {
258+
let dtypes = [];
177259
let int_tracker = [];
178260
let float_tracker = [];
179261
let string_tracker = [];
180262
let bool_tracker = [];
181263
let lim;
182-
let arr = [];
183-
arr_val.map(val => {
184-
if (!(isNaN(val) && typeof val != "string")) {
185-
arr.push(val);
186-
}
187-
});
188264

189-
if (arr.length < config.get_dtype_test_lim) {
190-
lim = arr.length - 1;
265+
if (arr_val.length == 0) {
266+
dtypes.push("string");
267+
}
268+
269+
if (arr_val.length < config.get_dtype_test_lim) {
270+
lim = arr_val.length - 1;
191271
} else {
192272
lim = config.get_dtype_test_lim - 1;
193273
}
194274

195-
arr.forEach((ele, indx) => {
275+
arr_val.forEach((ele, indx) => {
196276
let count = indx;
197277

198278
if (typeof ele == "boolean") {
199279
float_tracker.push(false);
200280
int_tracker.push(false);
201281
string_tracker.push(false);
202282
bool_tracker.push(true);
283+
} else if (isNaN(ele) && typeof ele != "string") {
284+
float_tracker.push(true);
285+
int_tracker.push(false);
286+
string_tracker.push(false);
287+
bool_tracker.push(false);
203288
} else if (!isNaN(Number(ele))) {
204289
if (ele.toString().includes(".")) {
205290
float_tracker.push(true);
@@ -237,28 +322,24 @@ class Utils {
237322
});
238323
return dtypes;
239324
} else {
240-
const dtypes = [];
325+
let dtypes = [];
241326
let lim;
242-
243-
if (arr_val[0].length < config.get_dtype_test_lim) {
244-
lim = arr_val[0].length - 1;
245-
} else {
246-
lim = config.get_dtype_test_lim - 1;
247-
}
248-
249-
arr_val.forEach(ele => {
327+
arr_val.forEach(arr => {
250328
let int_tracker = [];
251329
let float_tracker = [];
252330
let string_tracker = [];
253331
let bool_tracker = [];
254-
let arr = [];
255-
ele.map(val => {
256-
if (!(isNaN(val) && typeof val != "string")) {
257-
arr.push(val);
258-
} else {
259-
arr.push("NaN");
260-
}
261-
});
332+
333+
if (arr.length == 0) {
334+
dtypes.push("string");
335+
}
336+
337+
if (arr.length < config.get_dtype_test_lim) {
338+
lim = arr.length - 1;
339+
} else {
340+
lim = config.get_dtype_test_lim - 1;
341+
}
342+
262343
arr.forEach((ele, indx) => {
263344
let count = indx;
264345

@@ -279,6 +360,11 @@ class Utils {
279360
string_tracker.push(false);
280361
bool_tracker.push(false);
281362
}
363+
} else if (isNaN(ele) && typeof ele != "string") {
364+
float_tracker.push(true);
365+
int_tracker.push(false);
366+
string_tracker.push(false);
367+
bool_tracker.push(false);
282368
} else {
283369
float_tracker.push(false);
284370
int_tracker.push(false);

0 commit comments

Comments
 (0)