Skip to content

Commit 1eeb39f

Browse files
committed
accept Series as input in encodings
1 parent fdf94ae commit 1eeb39f

3 files changed

Lines changed: 90 additions & 72 deletions

File tree

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,32 @@
11
// import * as tf from '@tensorflow/tfjs-node'
22
import { Series } from "../core/series"
3-
import { Utils} from "../core/utils"
3+
import { Utils } from "../core/utils"
44
import { DataFrame } from "../core/frame"
55

66
const utils = new Utils
77

8-
export class LabelEncoder{
8+
export class LabelEncoder {
99

1010
/**
1111
*
1212
* @param {data} data [Array|Series]
1313
* @returns Array.
1414
*/
15-
fit(data){
15+
fit(data) {
1616
let in_data = null;
17-
if(Array.isArray(data)){
17+
if (Array.isArray(data)) {
1818
in_data = data;
19-
}
20-
else if(data instanceof Series){
19+
} else if (data instanceof Series) {
2120
in_data = data.values;
22-
}else{
23-
throw new Error("data must be an array")
21+
} else {
22+
throw new Error("data must be an array or a Series")
2423
}
2524

2625
let data_set = new Set(in_data);
2726
this.label = Array.from(data_set);
2827

2928
let self = this;
30-
let output_data = in_data.map((x)=>{
29+
let output_data = in_data.map((x) => {
3130
return self.label.indexOf(x)
3231
});
3332

@@ -39,19 +38,18 @@ export class LabelEncoder{
3938
* @param {data} data [Array|Series]
4039
* @returns Array
4140
*/
42-
transform(data){
41+
transform(data) {
4342
let in_data = null;
44-
if(Array.isArray(data)){
43+
if (Array.isArray(data)) {
4544
in_data = data;
46-
}
47-
else if(in_data instanceof Series){
48-
in_data = in_data.values;
49-
}else{
50-
throw new Error("data must be an array")
45+
} else if (data instanceof Series) {
46+
in_data = data.values;
47+
} else {
48+
throw new Error("data must be an array or a Series")
5149
}
5250

5351
let self = this;
54-
let output_data = in_data.map((x)=>{
52+
let output_data = in_data.map((x) => {
5553
return self.label.indexOf(x)
5654
});
5755
return new Series(output_data)
@@ -60,14 +58,13 @@ export class LabelEncoder{
6058

6159
export class OneHotEncoder {
6260

63-
fit(data){
61+
fit(data) {
6462
let in_data = null;
65-
if(Array.isArray(data)){
63+
if (Array.isArray(data)) {
6664
in_data = data;
67-
}
68-
else if(data instanceof Series){
65+
} else if (data instanceof Series) {
6966
in_data = data.values;
70-
}else{
67+
} else {
7168
throw new Error("data must be an array")
7269
}
7370

@@ -76,38 +73,37 @@ export class OneHotEncoder {
7673

7774
let onehot_data = utils.__zeros(in_data.length, this.label.length)
7875

79-
for(let i=0; i < in_data.length; i++){
76+
for (let i = 0; i < in_data.length; i++) {
8077

8178
let elem = in_data[i]
8279
let elem_index = this.label.indexOf(elem)
8380
onehot_data[i][elem_index] = 1
8481
}
8582

86-
return new DataFrame(onehot_data,{columns: this.label});
83+
return new DataFrame(onehot_data, { columns: this.label });
8784

8885
}
8986

90-
transform(data){
87+
transform(data) {
9188
let in_data = null;
92-
if(Array.isArray(data)){
89+
90+
if (Array.isArray(data)) {
9391
in_data = data;
94-
}
95-
else if(data instanceof Series){
92+
} else if (data instanceof Series) {
9693
in_data = data.values;
97-
}else{
94+
} else {
9895
throw new Error("data must be an array")
9996
}
10097

10198
let onehot_data = utils.__zeros(in_data.length, this.label.length)
10299

103-
for(let i=0; i < in_data.length; i++){
104-
100+
for (let i = 0; i < in_data.length; i++) {
105101
let elem = in_data[i]
106102
let elem_index = this.label.indexOf(elem)
107103
onehot_data[i][elem_index] = 1
108104
}
109105

110-
return new DataFrame(onehot_data,{columns: this.label});
106+
return new DataFrame(onehot_data, { columns: this.label });
111107

112108
}
113109
}

danfojs/tests/preprocessing/encodings.js

Lines changed: 61 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2,72 +2,94 @@ import { assert } from "chai"
22
import { LabelEncoder, OneHotEncoder } from "../../src/preprocessing/encodings"
33
import { Series } from "../../src/core/series"
44

5-
describe("Encodings", function(){
5+
describe("Encodings", function () {
66

7-
describe("LabelEncoder", function(){
7+
describe("LabelEncoder", function () {
88

9-
it("test the label encoding on array", function(){
10-
let data = ["dog","cat","man","dog","cat","man","man","cat"]
9+
it("test the label encoding on array", function () {
10+
let data = ["dog", "cat", "man", "dog", "cat", "man", "man", "cat"]
1111
let encode = new LabelEncoder()
1212
let fit_data = [
1313
0, 1, 2, 0,
1414
1, 2, 2, 1
15-
]
16-
assert.deepEqual(encode.fit(data).values,fit_data)
17-
assert.deepEqual(encode.transform(["dog","man"]).values,[0,2])
15+
]
16+
assert.deepEqual(encode.fit(data).values, fit_data)
17+
assert.deepEqual(encode.transform(["dog", "man"]).values, [0, 2])
1818
});
19-
it("test the label encoding on Series", function(){
20-
let data = ["dog","cat","man","dog","cat","man","man","cat"]
19+
it("test the label encoding on Series", function () {
20+
let data = ["dog", "cat", "man", "dog", "cat", "man", "man", "cat"]
2121
let series = new Series(data)
2222
let encode = new LabelEncoder()
2323
let fit_data = [
2424
0, 1, 2, 0,
2525
1, 2, 2, 1
26-
]
27-
assert.deepEqual(encode.fit(series).values,fit_data)
28-
assert.deepEqual(encode.transform(["dog","man"]).values,[0,2])
26+
]
27+
assert.deepEqual(encode.fit(series).values, fit_data)
28+
assert.deepEqual(encode.transform(["dog", "man"]).values, [0, 2])
2929
});
30+
it("label encoding directly from a Series", function () {
31+
let data = new Series(["dog", "cat", "man", "dog", "cat", "man", "man", "cat"])
32+
let to_label_encode = new Series(["dog", "man"])
33+
let encode = new LabelEncoder()
34+
let fit_data = [
35+
0, 1, 2, 0,
36+
1, 2, 2, 1
37+
]
38+
assert.deepEqual(encode.fit(data).values, fit_data)
39+
assert.deepEqual(encode.transform(to_label_encode).values, [0, 2])
40+
});
41+
// it("Label encoding on Series", function () {
42+
// let data = ["dog", "cat", "man", "dog", "cat", "man", "man", "cat"]
43+
// let series = new Series(data)
44+
// let encode = new LabelEncoder()
45+
// let fit_data = [
46+
// 0, 1, 2, 0,
47+
// 1, 2, 2, 1
48+
// ]
49+
// assert.deepEqual(encode.fit(series).values, fit_data)
50+
// assert.deepEqual(encode.transform(["dog", "man"]).values, [0, 2])
51+
// });
3052

3153
})
3254

33-
describe("OneHotEncoder", function(){
55+
describe("OneHotEncoder", function () {
3456

35-
it("test onehotencoding on array", function(){
36-
let data = ["dog","cat","man","dog","cat","man","man","cat"]
57+
it("test onehotencoding on array", function () {
58+
let data = ["dog", "cat", "man", "dog", "cat", "man", "man", "cat"]
3759
let encode = new OneHotEncoder()
3860
let fit_data = [
39-
[ 1, 0, 0 ],
40-
[ 0, 1, 0 ],
41-
[ 0, 0, 1 ],
42-
[ 1, 0, 0 ],
43-
[ 0, 1, 0 ],
44-
[ 0, 0, 1 ],
45-
[ 0, 0, 1 ],
46-
[ 0, 1, 0 ]
47-
]
48-
let transform_data = [ [ 0, 0, 1 ], [ 0, 1, 0 ] ]
61+
[1, 0, 0],
62+
[0, 1, 0],
63+
[0, 0, 1],
64+
[1, 0, 0],
65+
[0, 1, 0],
66+
[0, 0, 1],
67+
[0, 0, 1],
68+
[0, 1, 0]
69+
]
70+
let transform_data = [[0, 0, 1], [0, 1, 0]]
4971

5072
assert.deepEqual(encode.fit(data).values, fit_data);
51-
assert.deepEqual(encode.transform(["man","cat"]).values, transform_data)
73+
assert.deepEqual(encode.transform(["man", "cat"]).values, transform_data)
5274
})
53-
it("test onehotencoding on Series", function(){
54-
let data = ["dog","cat","man","dog","cat","man","man","cat"]
75+
it("test onehotencoding on Series", function () {
76+
let data = ["dog", "cat", "man", "dog", "cat", "man", "man", "cat"]
5577
let series = new Series(data)
5678
let encode = new OneHotEncoder()
5779
let fit_data = [
58-
[ 1, 0, 0 ],
59-
[ 0, 1, 0 ],
60-
[ 0, 0, 1 ],
61-
[ 1, 0, 0 ],
62-
[ 0, 1, 0 ],
63-
[ 0, 0, 1 ],
64-
[ 0, 0, 1 ],
65-
[ 0, 1, 0 ]
66-
]
67-
let transform_data = [ [ 0, 0, 1 ], [ 0, 1, 0 ] ]
80+
[1, 0, 0],
81+
[0, 1, 0],
82+
[0, 0, 1],
83+
[1, 0, 0],
84+
[0, 1, 0],
85+
[0, 0, 1],
86+
[0, 0, 1],
87+
[0, 1, 0]
88+
]
89+
let transform_data = [[0, 0, 1], [0, 1, 0]]
6890

6991
assert.deepEqual(encode.fit(series).values, fit_data);
70-
assert.deepEqual(encode.transform(["man","cat"]).values, transform_data)
92+
assert.deepEqual(encode.transform(new Series(["man", "cat"])).values, transform_data)
7193
})
7294
});
7395

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
"table": "^5.4.6"
2424
},
2525
"scripts": {
26-
"test": "nyc mocha --require @babel/register danfojs/tests/core/generic",
26+
"test": "nyc mocha --require @babel/register danfojs/tests/preprocessing/encodings",
2727
"dev": "npm run lint && babel ./danfojs/src -d dist --no-comments",
2828
"build": "babel ./danfojs/src -d ./dist --no-comments",
2929
"lint": "eslint ./danfojs/src",

0 commit comments

Comments
 (0)