11"""Download Les Miserables data from the web and process it into Multinet CSV files."""
2-
32import csv
43import json
54import sys
65
76
87def add_key (rec , idx ):
98 """Add a key value to the character records."""
9+ rec ['_key' ] = rec ['id' ]
10+ rec ['influential' ] = 'false' if rec ['influential' ] == 'False' else 'true'
11+ rec ['original' ] = 'false' if rec ['original' ] == 'False' else 'true'
1012
11- rec ["_key" ] = rec ["id" ]
12- rec ["influential" ] = "false" if rec ["influential" ] == "False" else "true"
13- rec ["original" ] = "false" if rec ["original" ] == "False" else "true"
14-
15- del rec ["utc_offset" ]
16- del rec ["id" ]
13+ del rec ['utc_offset' ]
14+ del rec ['id' ]
1715
1816 return rec
1917
2018
2119def convert_link (link , idx ):
2220 """Convert the D3 JSON link data into a Multinet-style record."""
23-
2421 return {
25- " _key" : str (idx ),
26- " _from" : f""" people/{ link [" source" ] } """ ,
27- " _to" : f""" people/{ link [" target" ] } """ ,
22+ ' _key' : str (idx ),
23+ ' _from' : f''' people/{ link [' source' ] } ''' ,
24+ ' _to' : f''' people/{ link [' target' ] } ''' ,
2825 }
2926
3027
3128def write_csv (data , fields , filename ):
3229 """Write a CSV file from data and field names."""
33-
34- with open (filename , "w" ) as f :
30+ with open (filename , 'w' ) as f :
3531 writer = csv .DictWriter (f , fieldnames = fields )
3632
3733 writer .writeheader ()
@@ -41,17 +37,16 @@ def write_csv(data, fields, filename):
4137
4238def main ():
4339 """Run main function."""
44-
4540 data = json .loads (sys .stdin .read ())
4641
4742 # Prepare the node data by adjoining a key value equal to each record's
4843 # index in the original data.
49- nodes = [add_key (record , index ) for (index , record ) in enumerate (data [" nodes" ])]
44+ nodes = [add_key (record , index ) for (index , record ) in enumerate (data [' nodes' ])]
5045
5146 # Convert the link data to Multinet form. Note that the D3 JSON format uses
5247 # node list indices to refer to the source and target nodes; these can be
5348 # used unchanged because of how the key value for the nodes was set above.
54- links = [convert_link (link , index ) for (index , link ) in enumerate (data [" links" ])]
49+ links = [convert_link (link , index ) for (index , link ) in enumerate (data [' links' ])]
5550
5651 # Reduce the total number of nodes by truncating
5752 nodes = [nodes [i ] for i in range (0 , 100 )]
@@ -61,8 +56,8 @@ def main():
6156 link
6257 for link in links
6358 if (
64- any (f" people/{ node ['_key' ]} " == link [" _from" ] for node in nodes )
65- and any (f" people/{ node ['_key' ]} " == link [" _to" ] for node in nodes )
59+ any (f''' people/{ node ['_key' ]} ''' == link [' _from' ] for node in nodes )
60+ and any (f''' people/{ node ['_key' ]} ''' == link [' _to' ] for node in nodes )
6661 )
6762 ]
6863 links = [link for (index , link ) in enumerate (links ) if index % 10 == 0 ]
@@ -71,25 +66,25 @@ def main():
7166 write_csv (
7267 nodes ,
7368 [
74- " _key" ,
75- " followers_count" ,
76- " query_tweet_count" ,
77- " friends_count" ,
78- " statuses_count" ,
79- " listed_count" ,
80- " favourites_count" ,
81- " count_followers_in_query" ,
82- " screen_name" ,
83- " profile_image_url" ,
84- " influential" ,
85- " original" ,
69+ ' _key' ,
70+ ' followers_count' ,
71+ ' query_tweet_count' ,
72+ ' friends_count' ,
73+ ' statuses_count' ,
74+ ' listed_count' ,
75+ ' favourites_count' ,
76+ ' count_followers_in_query' ,
77+ ' screen_name' ,
78+ ' profile_image_url' ,
79+ ' influential' ,
80+ ' original' ,
8681 ],
87- " people.csv" ,
82+ ' people.csv' ,
8883 )
89- write_csv (links , [" _key" , " _from" , " _to" ], " connections.csv" )
84+ write_csv (links , [' _key' , ' _from' , ' _to' ], ' connections.csv' )
9085
9186 return 0
9287
9388
94- if __name__ == " __main__" :
89+ if __name__ == ' __main__' :
9590 sys .exit (main ())
0 commit comments