This repository was archived by the owner on Feb 21, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 58
Expand file tree
/
Copy pathMeetupDF.py
More file actions
67 lines (52 loc) · 2.58 KB
/
MeetupDF.py
File metadata and controls
67 lines (52 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
import re
import os
from datetime import datetime
pd.options.mode.chained_assignment = 'raise'
class MeetupDF(pd.DataFrame):
@property
def _constructor(self):
return MeetupDF
@staticmethod
def clean_description(desc):
#remove html tags
desc = re.sub('<[^>]+>', '', desc)
desc_spl = desc.split('.')
for i, sentence in enumerate(desc_spl):
sentence = sentence + '.'
if i == 0:
new_desc = sentence
elif len(new_desc) < 285:
new_desc = new_desc + sentence
else:
break
new_desc = new_desc.replace('\n', '')
new_desc = new_desc.replace(' ', '')
return new_desc
@staticmethod
def parse_extra_col_names(extra_fields):
s = extra_fields.replace(' ','')
return s.split(',')
def edit_df(self, extra_fields=None):
col_to_keep = ['name','members', 'city', 'description', 'next_event', 'join_mode','link','score']
if extra_fields is not None:
extra_cols = self.parse_extra_col_names(extra_fields)
col_to_keep = col_to_keep + extra_cols
self = self[col_to_keep]
self = self.rename(columns={'score':'relevancy_score'})
self['description'] = self['description'].apply(self.clean_description)
if 'last_event' in col_to_keep:
self['prev_event_name'] = self['last_event'].apply(lambda x: x['name'] if pd.notnull(x) else -1)
self['prev_event_rsvp_count'] = self['last_event'].apply(lambda x: x['yes_rsvp_count'] if pd.notnull(x) else -1)
self.drop(['last_event'], axis=1, inplace=True)
self['next_event_name'] = self['next_event'].apply(lambda x: x['name'] if pd.notnull(x) else -1)
self['next_event_time'] = self['next_event'].apply(lambda x: datetime.fromtimestamp(x['time']/1000) if pd.notnull(x) else -1)
self['next_event_rsvp_count'] = self['next_event'].apply(lambda x: x['yes_rsvp_count'] if pd.notnull(x) else -1)
self.drop(['next_event'], axis=1, inplace=True)
self = self.sort_values(['members'],ascending=False)
return self
def save_wb(self, path = os.path.expanduser('~/Documents/'), title = 'Meetup Groups'):
print('\nsaving excel file to {}'.format(path))
writer = pd.ExcelWriter(path+ datetime.now().strftime("%Y-%m-%d ") + title + '.xlsx',engine='xlsxwriter', date_format = "m/d/yyy",datetime_format = "m/d/yyy")
self.to_excel(writer,sheet_name=title, merge_cells=False,index=False)
writer.save()