11// import { Series } from "./series"
2- import { DataFrame } from "./frame"
2+ import { DataFrame } from "./frame"
33import { Utils } from "./utils"
44
55const utils = new Utils
66
7- function oneHot ( in_data , prefix , prefix_sep ) {
7+ function oneHot ( in_data , prefix , prefix_sep ) {
88 let data_set = new Set ( in_data ) ;
99 let labels = Array . from ( data_set ) ;
1010 let prefix_labels = null ;
11- if ( prefix ) {
12- prefix_labels = labels . map ( ( x ) => {
11+ if ( prefix ) {
12+ prefix_labels = labels . map ( ( x ) => {
1313 return prefix + prefix_sep + x
1414 } )
15- } else {
16- prefix_labels = labels . map ( ( x ) => {
15+ } else {
16+ prefix_labels = labels . map ( ( x ) => {
1717 return x
1818 } )
1919 }
20-
20+
2121 let onehot_data = utils . __zeros ( in_data . length , labels . length )
2222
23- for ( let i = 0 ; i < in_data . length ; i ++ ) {
23+ for ( let i = 0 ; i < in_data . length ; i ++ ) {
2424
2525 let elem = in_data [ i ]
2626 let elem_index = labels . indexOf ( elem )
@@ -39,104 +39,104 @@ function oneHot(in_data, prefix, prefix_sep){
3939 * columns: [Array] columns to be encoded in DataFrame.
4040 * }
4141 */
42- function get_dummy ( kwargs = { } ) {
43- utils . __in_object ( kwargs , "data" , "data not provided" )
42+ function get_dummy ( kwargs = { } ) {
43+ utils . __in_object ( kwargs , "data" , "data not provided" )
4444
45- let prefix = kwargs [ "prefix" ] || null
46- let prefix_sep = kwargs [ "prefix_sep" ] || [ "_" ]
47- let columns = kwargs [ "columns" ] || null
45+ let prefix = kwargs [ "prefix" ] || null
46+ let prefix_sep = kwargs [ "prefix_sep" ] || [ "_" ]
47+ let columns = kwargs [ "columns" ] || null
4848
49- let is_dataframe = false
50- let in_data = null ;
49+ let is_dataframe = false
50+ let in_data = null ;
5151
52- if ( Array . isArray ( kwargs [ "data" ] ) ) {
53- in_data = kwargs [ "data" ]
54- }
55- else if ( kwargs [ "data" ] instanceof DataFrame ) {
56- in_data = kwargs [ "data" ]
57- is_dataframe = true ;
58- }
59- else {
60- in_data = kwargs [ "data" ] . values
52+ if ( Array . isArray ( kwargs [ "data" ] ) ) {
53+ in_data = kwargs [ "data" ]
54+ }
55+ else if ( kwargs [ "data" ] instanceof DataFrame ) {
56+ in_data = kwargs [ "data" ]
57+ is_dataframe = true ;
58+ }
59+ else {
60+ in_data = kwargs [ "data" ] . values
61+ }
62+
63+ if ( ! is_dataframe ) {
64+ let [ onehot_data , prefix_labels ] = oneHot ( in_data , prefix , prefix_sep )
65+
66+ return new DataFrame ( onehot_data , { columns : prefix_labels } )
67+ }
68+ else {
69+
70+ let column_index = [ ]
71+ if ( ! columns ) {
72+ columns = [ ] ;
73+ in_data . col_types . map ( ( x , i ) => {
74+
75+ if ( x == "string" ) {
76+ let name_column = in_data . columns [ i ]
77+ columns . push ( name_column ) ;
78+ column_index . push ( i ) ;
79+ }
80+ } ) ;
81+ } else {
82+
83+ columns . forEach ( ( x ) => {
84+ let col_idx = columns . indexOf ( x )
85+ column_index . push ( col_idx ) ;
86+ } ) ;
6187 }
6288
63- if ( ! is_dataframe ) {
64- let [ onehot_data , prefix_labels ] = oneHot ( in_data , prefix , prefix_sep )
65-
66- return new DataFrame ( onehot_data , { columns : prefix_labels } )
89+ if ( prefix ) {
90+ if ( Array . isArray ( prefix ) ) {
91+ if ( prefix . length != columns . length ) {
92+ throw new Error ( "prefix must be the same length with the number of onehot encoding column" )
93+ }
94+ } else {
95+ throw new Error ( "prefix for dataframe must be an array" )
96+ }
97+ } else {
98+ prefix = columns
6799 }
68- else {
69-
70- let column_index = [ ]
71- if ( ! columns ) {
72- columns = [ ] ;
73- in_data . col_types . map ( ( x , i ) => {
74-
75- if ( x == "string" ) {
76- let name_column = in_data . columns [ i ]
77- columns . push ( name_column ) ;
78- column_index . push ( i ) ;
79- }
80- } ) ;
81- } else {
82100
83- columns . forEach ( ( x ) => {
84- let col_idx = columns . indexOf ( x )
85- column_index . push ( col_idx ) ;
101+ let df_data = in_data . values
102+ let df_columns = in_data . columns
103+ let col_data = in_data . col_data
104+
105+ let column_data = [ ]
106+ column_index . forEach ( ( x ) => {
107+ column_data . push ( col_data [ x ] )
108+ } ) ;
109+
110+ let one_hotColumns = [ ]
111+ let one_hotData = [ ]
112+ column_data . forEach ( ( data , i ) => {
113+
114+ let [ onehot_data , prefix_labels ] = oneHot ( data , prefix [ i ] , prefix_sep )
115+ one_hotColumns . push ( ...prefix_labels ) ;
116+
117+ if ( one_hotData . length == 0 ) {
118+ one_hotData . push ( ...onehot_data ) ;
119+ } else {
120+ onehot_data . forEach ( ( x , i ) => {
121+ one_hotData [ i ] . push ( ...x ) ;
86122 } ) ;
87123 }
88-
89- if ( prefix ) {
90- if ( Array . isArray ( prefix ) ) {
91- if ( prefix . length != columns . length ) {
92- throw new Error ( "prefix must be the same length with the number of onehot encoding column" )
93- }
94- } else {
95- throw new Error ( "prefix for dataframe must be an array" )
96- }
97- } else {
98- prefix = columns
99- }
100124
101- let df_data = in_data . values
102- let df_columns = in_data . columns
103- let col_data = in_data . col_data
125+ } )
104126
105- let column_data = [ ]
106- column_index . forEach ( ( x ) => {
107- column_data . push ( col_data [ x ] )
108- } ) ;
127+ let final_data = df_data . map ( ( elem , i ) => {
109128
110- let one_hotColumns = [ ]
111- let one_hotData = [ ]
112- column_data . forEach ( ( data , i ) => {
113-
114- let [ onehot_data , prefix_labels ] = oneHot ( data , prefix [ i ] , prefix_sep )
115- one_hotColumns . push ( ...prefix_labels ) ;
116-
117- if ( one_hotData . length == 0 ) {
118- one_hotData . push ( ...onehot_data ) ;
119- } else {
120- onehot_data . forEach ( ( x , i ) => {
121- one_hotData [ i ] . push ( ...x ) ;
122- } ) ;
123- }
129+ let ele = elem . slice ( ) ;
130+ let dt = utils . __remove_arr ( ele , column_index )
131+ dt . push ( ...one_hotData [ i ] )
132+ return dt
133+ } ) ;
124134
125- } )
135+ let final_columns = utils . __remove_arr ( df_columns , column_index ) ;
136+ final_columns . push ( ...one_hotColumns )
126137
127- let final_data = df_data . map ( ( elem , i ) => {
128-
129- let ele = elem . slice ( ) ;
130- let dt = utils . __remove_arr ( ele , column_index )
131- dt . push ( ...one_hotData [ i ] )
132- return dt
133- } ) ;
134-
135- let final_columns = utils . __remove_arr ( df_columns , column_index ) ;
136- final_columns . push ( ...one_hotColumns )
137-
138- return new DataFrame ( final_data , { columns :final_columns } ) ;
139- }
138+ return new DataFrame ( final_data , { columns : final_columns } ) ;
139+ }
140140
141141}
142142
0 commit comments