@@ -17,17 +17,20 @@ using Tables: materializer
1717
1818export add_label_column!, pivot, subsetMD, top_n_values
1919
20+ COLUMN_TYPES = Union{String, Symbol}
21+ OPTIONAL_COLUMN_TYPES = Union{COLUMN_TYPES, Nothing}
22+
2023"""
21- function add_label_column!(to_df, from_df, new_col_name [, id])::Nothing
24+ function add_label_column!(feature_df, source_df, new_column [, id])
2225
2326Add column to a DataFrame based on symbol presence in the target DataFrame
2427
2528# Arguments
2629- `
27- - `to_df ::AbstractDataFrame`: feature DataFrame to which label column is added
28- - `from_df ::AbstractDataFrame`: DataFrame containing the label column
29- - `new_col_name `: name assigned to label column
30- - `id=nothing `: row IDs (Default: first column)
30+ - `feature_df ::AbstractDataFrame`: feature DataFrame to which label column is added
31+ - `source_df ::AbstractDataFrame`: DataFrame containing the label column
32+ - `new_column::Union{String, Symbol} `: name assigned to label column
33+ - `id::Union{Nothing, String, Symbol} `: row IDs (Default: first column)
3134
3235# Examples
3336```jldoctest
5659
5760```
5861"""
59- function add_label_column! (to_df:: AbstractDataFrame , from_df:: AbstractDataFrame , new_col_name, id= nothing ):: Nothing
62+ function add_label_column! (
63+ feature_df:: AbstractDataFrame ,
64+ source_df:: AbstractDataFrame ,
65+ new_column:: COLUMN_TYPES ,
66+ id:: OPTIONAL_COLUMN_TYPES = nothing ,
67+ ):: Nothing
6068
6169 # Error checks
62- for arg in [to_df, from_df ]
70+ for arg in [feature_df, source_df ]
6371 if size (arg)[1 ] < 1
6472 # @warn "DataFrame must have at least 1 row"
6573 throw (DomainError (arg))
@@ -72,31 +80,30 @@ function add_label_column!(to_df::AbstractDataFrame, from_df::AbstractDataFrame,
7280
7381 # Assign missing arguments
7482 if isnothing (id)
75- id = names (to_df )[1 ]
83+ id = names (feature_df )[1 ]
7684 end
7785
7886 # Add column
79- # insertcols!(to_df, new_col_name => [x[id] in from_df [!,id] for x in eachrow(to_df )])
80- insertcols! (to_df, new_col_name => map (x -> x in from_df [! , id], to_df [! , id]))
87+ # insertcols!(feature_df, new_column => [x[id] in source_df [!,id] for x in eachrow(feature_df )])
88+ insertcols! (feature_df, new_column => map (x -> x in source_df [! , id], feature_df [! , id]))
8189
82- coerce! (to_df, new_col_name => OrderedFactor{2 })
90+ coerce! (feature_df, new_column => OrderedFactor{2 })
8391 return nothing
8492end
85- function add_label_column! (to_table, from_table, id= nothing , new_col_name= nothing
86- ):: Nothing
87- assert_is_table (to_table)
88- assert_is_table (from_table)
93+ function add_label_column! (feature_table:: Any , source_table:: Any , id:: OPTIONAL_COLUMN_TYPES = nothing , new_column:: OPTIONAL_COLUMN_TYPES = nothing ):: Nothing
94+ assert_is_table (feature_table)
95+ assert_is_table (source_table)
8996
90- to_df = DataFrame (to_table ):: DataFrame
91- from_df = DataFrame (to_table ):: DataFrame
97+ feature_df = DataFrame (feature_table ):: DataFrame
98+ source_df = DataFrame (feature_table ):: DataFrame
9299
93- to_df :: DataFrame
94- from_df :: DataFrame
100+ feature_df :: DataFrame
101+ source_df :: DataFrame
95102
96- return add_label_column! (to_df, from_df , id, new_col_name )
103+ return add_label_column! (feature_df, source_df , id, new_column )
97104end
98105
99- function assert_is_table (x)
106+ function assert_is_table (x:: Any ) :: Nothing
100107 if ! istable (x)
101108 msg = " Input must be a table, but $(typeof (x)) is not a table"
102109 throw (ArgumentError (msg))
@@ -131,7 +138,11 @@ pivot(df)
131138
132139```
133140"""
134- function pivot (df:: AbstractDataFrame , newcols= nothing , y= nothing ):: AbstractDataFrame
141+ function pivot (
142+ df:: AbstractDataFrame ,
143+ newcols:: OPTIONAL_COLUMN_TYPES = nothing ,
144+ y:: OPTIONAL_COLUMN_TYPES = nothing ,
145+ ):: AbstractDataFrame
135146
136147 # Error checks
137148 if size (df)[1 ] < 1
@@ -169,7 +180,7 @@ function pivot(df::AbstractDataFrame, newcols=nothing, y=nothing)::AbstractDataF
169180 end
170181 return B
171182end
172- function pivot (obj)
183+ function pivot (obj:: Any ) :: Any
173184 assert_is_table (obj)
174185 df = DataFrame (obj):: DataFrame
175186 df:: DataFrame
@@ -238,7 +249,12 @@ subsetMD(X,Y)
238249
239250```
240251"""
241- function subsetMD (main_df:: AbstractDataFrame , check_df:: AbstractDataFrame , main_id= nothing , check_id= nothing ):: AbstractDataFrame
252+ function subsetMD (
253+ main_df:: AbstractDataFrame ,
254+ check_df:: AbstractDataFrame ,
255+ main_id:: OPTIONAL_COLUMN_TYPES = nothing ,
256+ check_id:: OPTIONAL_COLUMN_TYPES = nothing ,
257+ ):: AbstractDataFrame
242258
243259 # Assign missing arguments
244260 if isnothing (main_id)
@@ -257,11 +273,11 @@ end
257273=#
258274
259275"""
260- function top_n_values(df::AbstractDataFrame, col, n::Int)::AbstractDataFrame
276+ function top_n_values(df::AbstractDataFrame, col::Union{String, Symbol} , n::Int)::AbstractDataFrame
261277Find top n values by occurence
262278Useful for initial feasibility checks, but medical codes are not considered
263279"""
264- function top_n_values (df:: AbstractDataFrame , col, n:: Int ):: AbstractDataFrame
280+ function top_n_values (df:: AbstractDataFrame , col:: COLUMN_TYPES , n:: Int ):: AbstractDataFrame
265281 return first (sort (combine (nrow, groupby (df, col)), " nrow" ; rev= true ), n)
266282end
267283
0 commit comments