Skip to content

Commit e15aff1

Browse files
authored
Merge pull request #186 from opensource9ja/bugzy
Groupby
2 parents 69873ad + 432f984 commit e15aff1

13 files changed

Lines changed: 83 additions & 63 deletions

File tree

danfojs-browser/lib/bundle.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

danfojs-browser/lib/bundle.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

danfojs-browser/src/core/frame.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,10 @@ export class DataFrame extends Ndframe {
11121112
let column_names = this.column_names;
11131113
let col_dict = {};
11141114
let key_column = null;
1115+
let col_index = col.map((val) => column_names.indexOf(val));
1116+
let col_dtype = this.dtypes.filter((val, index) => {
1117+
return col_index.includes(index);
1118+
});
11151119

11161120
if (col.length == 2) {
11171121
if (column_names.includes(col[0])) {
@@ -1174,7 +1178,8 @@ export class DataFrame extends Ndframe {
11741178
col_dict,
11751179
key_column,
11761180
this.values,
1177-
column_names
1181+
column_names,
1182+
col_dtype
11781183
).group();
11791184

11801185
return groups;

danfojs-browser/src/core/groupby.js

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@ const utils = new Utils;
1212
* @param {column_name} Array of all column name in the dataframe.
1313
*/
1414
export class GroupBy {
15-
constructor(col_dict, key_col, data, column_name) {
15+
constructor(col_dict, key_col, data, column_name, col_dtype) {
1616

1717
this.key_col = key_col;
1818
this.col_dict = col_dict;
1919
this.data = data;
2020
this.column_name = column_name;
2121
this.data_tensors = {}; //store the tensor version of the groupby data
22+
this.col_dtype = col_dtype;
2223

2324
}
2425

@@ -150,7 +151,8 @@ export class GroupBy {
150151
null,
151152
this.key_col,
152153
null,
153-
col_names
154+
col_names,
155+
this.col_dtype
154156
);
155157

156158
gp.group_col = group_col;
@@ -376,8 +378,8 @@ export class GroupBy {
376378

377379
if (typeof key_data[j] === "undefined" ){
378380
key_data[j] = [];
379-
key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
380-
key_data[j][1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
381+
key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
382+
key_data[j][1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
381383
key_data[j].push(col_data[j]);
382384
} else {
383385
key_data[j].push(col_data[j]);
@@ -387,8 +389,8 @@ export class GroupBy {
387389
df_data.push(...key_data);
388390

389391
} else {
390-
key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
391-
key_data[1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
392+
key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
393+
key_data[1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
392394
key_data.push(...k_data);
393395
df_data.push(key_data);
394396
}
@@ -422,17 +424,17 @@ export class GroupBy {
422424

423425
if (typeof key_data[j] === "undefined" ){
424426
key_data[j] = [];
425-
key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
427+
key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
426428
key_data[j].push(col_data[j]);
427429
} else {
428430
key_data[j].push(col_data[j]);
429431
}
430432
}
431-
df_data.push(...key_data);
433+
// df_data.push(...key_data);
432434
}
433-
435+
df_data.push(...key_data);
434436
} else {
435-
key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
437+
key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
436438
key_data.push(...key_val);
437439
df_data.push(key_data);
438440
}
@@ -465,10 +467,11 @@ export class GroupBy {
465467
let data = [];
466468
let count_group = {};
467469
if (this.key_col.length == 2) {
468-
469-
for (let key in this.data_tensors) {
470+
471+
for (let key in df_data) {
470472
count_group[key] = {};
471-
for (let key2 in this.data_tensors[key]) {
473+
for (let key2 in df_data[key]) {
474+
let index;
472475
count_group[key][key2] = [];
473476
for (let i = 0; i < df_data[key][key2].length; i++ ) {
474477
let callable_rslt = callable(df_data[key][key2][i]);

danfojs-browser/src/io/reader.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import { DataFrame } from "../core/frame";
1919
*/
2020
export const read_csv = async (source, configs = {}) => {
2121
let { start, end } = configs;
22-
if (!(source.startsWith("https") || source.startsWith("http"))) {
22+
if (!(source.startsWith("https") || source.startsWith("http") || source.startsWith("blob"))) {
2323
throw new Error("Cannot read local file in browser environment");
2424
}
2525
let tfdata = [];

danfojs-browser/tests/core/groupby.js

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -260,14 +260,14 @@ describe("groupby", function () {
260260
let df = new dfd.DataFrame(data);
261261
let group_df = df.groupby([ "A"]);
262262
let rslt = [
263-
[ 'foo', 5, 3 ], [ 'foo', 6, 4 ],
264-
[ 'foo', 7, 7 ], [ 'foo', 9, 8 ],
265-
[ 'foo', 10, 9 ], [ 'foo', 5, 3 ],
266-
[ 'foo', 6, 4 ], [ 'foo', 7, 7 ],
267-
[ 'foo', 9, 8 ], [ 'foo', 10, 9 ],
268-
[ 'bar', 4, 5 ], [ 'bar', 3, 6 ],
269-
[ 'bar', 8, 4 ], [ 'bar', 4, 5 ],
270-
[ 'bar', 3, 6 ], [ 'bar', 8, 4 ]
263+
[ 'foo', 5, 3 ],
264+
[ 'foo', 6, 4 ],
265+
[ 'foo', 7, 7 ],
266+
[ 'foo', 9, 8 ],
267+
[ 'foo', 10, 9 ],
268+
[ 'bar', 4, 5 ],
269+
[ 'bar', 3, 6 ],
270+
[ 'bar', 8, 4 ]
271271
];
272272
assert.deepEqual(group_df.col(['D', 'C']).apply((x) => x.add(2)).values, rslt);
273273
});

danfojs-node/dist/core/frame.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,10 @@ class DataFrame extends _generic.default {
928928
let column_names = this.column_names;
929929
let col_dict = {};
930930
let key_column = null;
931+
let col_index = col.map(val => column_names.indexOf(val));
932+
let col_dtype = this.dtypes.filter((val, index) => {
933+
return col_index.includes(index);
934+
});
931935

932936
if (col.length == 2) {
933937
if (column_names.includes(col[0])) {
@@ -986,7 +990,7 @@ class DataFrame extends _generic.default {
986990
}
987991
}
988992

989-
let groups = new _groupby.GroupBy(col_dict, key_column, this.values, column_names).group();
993+
let groups = new _groupby.GroupBy(col_dict, key_column, this.values, column_names, col_dtype).group();
990994
return groups;
991995
}
992996

danfojs-node/dist/core/groupby.js

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@ var _series = require("./series");
1414
const utils = new _utils.Utils();
1515

1616
class GroupBy {
17-
constructor(col_dict, key_col, data, column_name) {
17+
constructor(col_dict, key_col, data, column_name, col_dtype) {
1818
this.key_col = key_col;
1919
this.col_dict = col_dict;
2020
this.data = data;
2121
this.column_name = column_name;
2222
this.data_tensors = {};
23+
this.col_dtype = col_dtype;
2324
}
2425

2526
group() {
@@ -118,7 +119,7 @@ class GroupBy {
118119
}
119120
}
120121

121-
const gp = new GroupBy(null, this.key_col, null, col_names);
122+
const gp = new GroupBy(null, this.key_col, null, col_names, this.col_dtype);
122123
gp.group_col = group_col;
123124
gp.group_col_name = col_names;
124125
return gp;
@@ -315,8 +316,8 @@ class GroupBy {
315316
for (let j = 0; j < col_data.length; j++) {
316317
if (typeof key_data[j] === "undefined") {
317318
key_data[j] = [];
318-
key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
319-
key_data[j][1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
319+
key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
320+
key_data[j][1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
320321
key_data[j].push(col_data[j]);
321322
} else {
322323
key_data[j].push(col_data[j]);
@@ -326,8 +327,8 @@ class GroupBy {
326327

327328
df_data.push(...key_data);
328329
} else {
329-
key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
330-
key_data[1] = isNaN(parseInt(key_2)) ? key_2 : parseInt(key_2);
330+
key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
331+
key_data[1] = this.col_dtype[1] === "string" ? key_2 : parseInt(key_2);
331332
key_data.push(...k_data);
332333
df_data.push(key_data);
333334
}
@@ -360,17 +361,17 @@ class GroupBy {
360361
for (let j = 0; j < col_data.length; j++) {
361362
if (typeof key_data[j] === "undefined") {
362363
key_data[j] = [];
363-
key_data[j][0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
364+
key_data[j][0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
364365
key_data[j].push(col_data[j]);
365366
} else {
366367
key_data[j].push(col_data[j]);
367368
}
368369
}
369-
370-
df_data.push(...key_data);
371370
}
371+
372+
df_data.push(...key_data);
372373
} else {
373-
key_data[0] = isNaN(parseInt(key_1)) ? key_1 : parseInt(key_1);
374+
key_data[0] = this.col_dtype[0] === "string" ? key_1 : parseInt(key_1);
374375
key_data.push(...key_val);
375376
df_data.push(key_data);
376377
}
@@ -408,10 +409,11 @@ class GroupBy {
408409
let count_group = {};
409410

410411
if (this.key_col.length == 2) {
411-
for (let key in this.data_tensors) {
412+
for (let key in df_data) {
412413
count_group[key] = {};
413414

414-
for (let key2 in this.data_tensors[key]) {
415+
for (let key2 in df_data[key]) {
416+
let index;
415417
count_group[key][key2] = [];
416418

417419
for (let i = 0; i < df_data[key][key2].length; i++) {

danfojs-node/dist/io/reader.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ const read_csv = async (source, configs = {}) => {
2525
end
2626
} = configs;
2727

28-
if (!(source.startsWith("file://") || source.startsWith("http"))) {
28+
if (!(source.startsWith("file://") || source.startsWith("http") || source.startsWith("blob"))) {
2929
source = source.startsWith("/") ? `file://${source}` : `file://${process.cwd()}/${source}`;
3030
}
3131

danfojs-node/src/core/frame.js

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,7 +1110,10 @@ export class DataFrame extends Ndframe {
11101110
let column_names = this.column_names;
11111111
let col_dict = {};
11121112
let key_column = null;
1113-
1113+
let col_index = col.map((val) => column_names.indexOf(val));
1114+
let col_dtype = this.dtypes.filter((val, index) => {
1115+
return col_index.includes(index);
1116+
});
11141117
if (col.length == 2) {
11151118
if (column_names.includes(col[0])) {
11161119
// eslint-disable-next-line no-unused-vars
@@ -1172,7 +1175,8 @@ export class DataFrame extends Ndframe {
11721175
col_dict,
11731176
key_column,
11741177
this.values,
1175-
column_names
1178+
column_names,
1179+
col_dtype
11761180
).group();
11771181

11781182
return groups;

0 commit comments

Comments
 (0)