Skip to content

Commit 9ef8935

Browse files
authored
Merge pull request #196 from sponsfreixes/many_column_grouby
Generalize groupby to support more than two columns
2 parents 2827bbf + c95c201 commit 9ef8935

4 files changed

Lines changed: 289 additions & 705 deletions

File tree

danfojs-browser/src/core/frame.js

Lines changed: 27 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,84 +1105,51 @@ export class DataFrame extends Ndframe {
11051105

11061106
/**
11071107
*
1108-
* @param {col} col is a list of column with maximum length of two
1108+
* @param {col} col is a list of columns
11091109
*/
11101110
groupby(col) {
1111-
let len = this.shape[0];
1112-
let column_names = this.column_names;
1113-
let col_dict = {};
1114-
let key_column = null;
1115-
let col_index = col.map((val) => column_names.indexOf(val));
1116-
let col_dtype = this.dtypes.filter((val, index) => {
1111+
const len = this.shape[0];
1112+
const column_names = this.column_names;
1113+
const col_index = col.map((val) => column_names.indexOf(val));
1114+
const col_dtype = this.dtypes.filter((val, index) => {
11171115
return col_index.includes(index);
11181116
});
11191117

1120-
if (col.length == 2) {
1121-
if (column_names.includes(col[0])) {
1122-
// eslint-disable-next-line no-unused-vars
1123-
var [ data1, col_name1 ] = indexLoc(this, {
1118+
const self = this;
1119+
const data = col.map(
1120+
(column_name) => {
1121+
if (!(column_names.includes(column_name)))
1122+
throw new Error(`column ${column_name} does not exist`);
1123+
const [ column_data, _ ] = indexLoc(self, {
11241124
rows: [ `0:${len}` ],
1125-
columns: [ `${col[0]}` ],
1125+
columns: [ `${column_name}` ],
11261126
type: "loc"
11271127
});
1128-
} else {
1129-
throw new Error(`column ${col[0]} does not exist`);
1130-
}
1131-
if (column_names.includes(col[1])) {
1132-
// eslint-disable-next-line no-unused-vars
1133-
var [ data2, col_name2 ] = indexLoc(this, {
1134-
rows: [ `0:${len}` ],
1135-
columns: [ `${col[1]}` ],
1136-
type: "loc"
1137-
});
1138-
} else {
1139-
throw new Error(`column ${col[1]} does not exist`);
1128+
return column_data;
11401129
}
1130+
);
11411131

1142-
key_column = [ col[0], col[1] ];
1143-
var column_1_Unique = utils.__unique(data1);
1144-
var column_2_unique = utils.__unique(data2);
1145-
1146-
for (var i = 0; i < column_1_Unique.length; i++) {
1147-
let col_value = column_1_Unique[i];
1148-
col_dict[col_value] = {};
1132+
const unique_columns = data.map((column_data) => utils.__unique(column_data));
11491133

1150-
for (var j = 0; j < column_2_unique.length; j++) {
1151-
let col2_value = column_2_unique[j];
1152-
col_dict[col_value][col2_value] = [];
1153-
}
1154-
}
1155-
} else {
1156-
if (column_names.includes(col[0])) {
1157-
// eslint-disable-next-line no-redeclare
1158-
var [ data1, col_name1 ] = indexLoc(this, {
1159-
rows: [ `0:${len}` ],
1160-
columns: [ `${col[0]}` ],
1161-
type: "loc"
1162-
});
1163-
// console.log(data1)
1164-
} else {
1165-
throw new Error(`column ${col[0]} does not exist`);
1166-
}
1167-
key_column = [ col[0] ];
1168-
1169-
var column_Unique = utils.__unique(data1);
1170-
1171-
for (let i = 0; i < column_Unique.length; i++) {
1172-
let col_value = column_Unique[i];
1173-
col_dict[col_value] = [];
1174-
}
1134+
function getRecursiveDict(uniq_columns) {
1135+
const first_uniq_columns = uniq_columns[0];
1136+
const remaining_columns = uniq_columns.slice(1);
1137+
const c_dict = {};
1138+
if (!remaining_columns.length)
1139+
first_uniq_columns.forEach((col_value) => c_dict[col_value] = []);
1140+
else
1141+
first_uniq_columns.forEach((col_value) => c_dict[col_value] = getRecursiveDict(remaining_columns));
1142+
return c_dict;
11751143
}
1144+
const col_dict = getRecursiveDict(unique_columns);
11761145

1177-
let groups = new GroupBy(
1146+
return new GroupBy(
11781147
col_dict,
1179-
key_column,
1148+
col,
11801149
this.values,
11811150
column_names,
11821151
col_dtype
11831152
).group();
1184-
1185-
return groups;
11861153
}
11871154

11881155
/**

0 commit comments

Comments
 (0)