-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAttribute_Value_Counts_adjunct_pass2.py
More file actions
45 lines (32 loc) · 1.45 KB
/
Attribute_Value_Counts_adjunct_pass2.py
File metadata and controls
45 lines (32 loc) · 1.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 5 11:57:42 2021
@author: xcxg109
"""
import pandas as pd
def attr_values(df):
all_vals = pd.DataFrame()
func_df = df.copy()
func_df['Count'] =1
func_df['Comma Separated Values'] = ''
print('func_df = ', func_df.columns)
atts = func_df['WS_Attr_ID'].unique().tolist()
vals = pd.DataFrame(func_df.groupby(['WS_Attr_ID', 'WS_Attribute_Name', 'Normalized_Value'])['Count'].sum())
vals = vals.reset_index()
for attribute in atts:
temp_df_att = vals.loc[vals['WS_Attr_ID']== attribute]
temp_df_att = temp_df_att.sort_values(by=['Count'], ascending=[False])
# temp_df_att.to_csv('C:/Users/xcxg109/NonDriveFiles/hoist.csv')
for row in temp_df_att.itertuples():
val = str(temp_df_att.at[row.Index, 'Normalized_Value'])
ct = str(temp_df_att.at[row.Index, 'Count'])
val_count = val + '[' + ct + ']'
temp_df_att.at[row.Index, 'val_counts'] = val_count
# concat list items into string
temp_df_att['WS ALL Values'] = '; '.join(item for item in temp_df_att['val_counts'] if item)
#pull the top 10 values and put into 'Sample_Values' field
all_vals = pd.concat([all_vals, temp_df_att], axis=0)
if all_vals.empty == False:
all_vals = all_vals[['WS_Attr_ID', 'WS ALL Values']]
all_vals = all_vals.drop_duplicates(subset=['WS_Attr_ID'])
return all_vals