1010import requests
1111from bs4 import BeautifulSoup
1212from contextlib import contextmanager
13+ try :
14+ import pandas as pd
15+ except ImportError :
16+ pd = None
1317
1418
1519class TestLuminati (unittest .TestCase ):
@@ -140,6 +144,21 @@ def setUpClass(cls):
140144
141145 scholarly .use_proxy (proxy_generator , secondary_proxy_generator )
142146
147+ # Try storing the file temporarily as `scholarly.csv` and delete it.
148+ # If there exists already a file with that name, generate a random name
149+ # that does not exist yet, so we can safely delete it.
150+ cls .mandates_filename = "scholarly.csv"
151+ while os .path .exists (cls .mandates_filename ):
152+ cls .mandates_filename = '' .join (random .choices ('abcdefghijklmnopqrstuvwxyz' , k = 10 )) + ".csv"
153+
154+ @classmethod
155+ def tearDownClass (cls ):
156+ """
157+ Clean up the mandates csv fiile downloaded.
158+ """
159+ if os .path .exists (cls .mandates_filename ):
160+ os .remove (cls .mandates_filename )
161+
143162 @staticmethod
144163 @contextmanager
145164 def suppress_stdout ():
@@ -632,53 +651,79 @@ def test_pubs_custom_url(self):
632651 self .assertGreaterEqual (pub ['num_citations' ], 581 )
633652
634653 def test_download_mandates_csv (self ):
635- # Try storing the file temporarily as `scholarly.csv` and delete it.
636- # If there exists already a file with that name, generate a random name
637- # that does not exist yet, so we can safely delete it.
638- filename = "scholarly.csv"
639- while os .path .exists (filename ):
640- filename = '' .join (random .choices ('abcdefghijklmnopqrstuvwxyz' , k = 10 )) + ".csv"
641-
642- # Delete the file with a finally block no matter what happens
643- try :
644- scholarly .download_mandates_csv (filename )
645- funder , policy , percentage2020 , percentageOverall = [], [], [], []
646- with open (filename , "r" ) as f :
647- csv_reader = csv .DictReader (f )
648- for row in csv_reader :
649- funder .append (row ['\ufeff Funder' ])
650- policy .append (row ['Policy' ])
651- percentage2020 .append (row ['2020' ])
652- percentageOverall .append (row ['Overall' ])
653-
654- agency_policy = {
655- "US National Science Foundation" : "https://www.nsf.gov/pubs/2015/nsf15052/nsf15052.pdf" ,
656- "Department of Science & Technology, India" : "http://www.dst.gov.in/sites/default/files/APPROVED%20OPEN%20ACCESS%20POLICY-DBT%26DST%2812.12.2014%29_1.pdf" ,
657- "Swedish Research Council" : "https://www.vr.se/english/applying-for-funding/requirements-terms-and-conditions/publishing-open-access.html" ,
658- "Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning" : ""
659- }
660- agency_2020 = {
661- "US National Science Foundation" : "87%" ,
662- "Department of Science & Technology, India" : "49%" ,
663- "Swedish Research Council" : "89%" ,
664- "Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning" : "88%"
665- }
666-
667- response = requests .get ("https://scholar.google.com/citations?view_op=mandates_leaderboard&hl=en" )
668- soup = BeautifulSoup (response .text , "html.parser" )
669- agency_overall = soup .find_all ("td" , class_ = "gsc_mlt_n gsc_mlt_bd" )
670-
671- for agency , index in zip (agency_policy , [4 - 1 ,10 - 1 , 19 - 1 , 64 - 1 ]):
672- agency_index = funder .index (agency )
673- self .assertEqual (policy [agency_index ], agency_policy [agency ])
674- # Check that the percentage values from CSV and on the page agree.
675- self .assertEqual (percentageOverall [agency_index ], agency_overall [index ].text )
676- # The percentage fluctuates, so we can't check the exact value.
677- self .assertAlmostEqual (int (percentage2020 [agency_index ][:- 1 ]), int (agency_2020 [agency ][:- 1 ]), delta = 2 )
678- finally :
679- if os .path .exists (filename ):
680- os .remove (filename )
654+ """
655+ Test that we can download the mandates CSV and read it.
656+ """
657+ if not os .path .exists (self .mandates_filename ):
658+ text = scholarly .download_mandates_csv (self .mandates_filename )
659+ self .assertGreater (len (text ), 0 )
660+ funder , policy , percentage2020 , percentageOverall = [], [], [], []
661+ with open (self .mandates_filename , "r" ) as f :
662+ csv_reader = csv .DictReader (f )
663+ for row in csv_reader :
664+ funder .append (row ['\ufeff Funder' ])
665+ policy .append (row ['Policy' ])
666+ percentage2020 .append (row ['2020' ])
667+ percentageOverall .append (row ['Overall' ])
668+
669+ agency_policy = {
670+ "US National Science Foundation" : "https://www.nsf.gov/pubs/2015/nsf15052/nsf15052.pdf" ,
671+ "Department of Science & Technology, India" : "http://www.dst.gov.in/sites/default/files/APPROVED%20OPEN%20ACCESS%20POLICY-DBT%26DST%2812.12.2014%29_1.pdf" ,
672+ "Swedish Research Council" : "https://www.vr.se/english/applying-for-funding/requirements-terms-and-conditions/publishing-open-access.html" ,
673+ "Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning" : ""
674+ }
675+ agency_2020 = {
676+ "US National Science Foundation" : "87%" ,
677+ "Department of Science & Technology, India" : "49%" ,
678+ "Swedish Research Council" : "89%" ,
679+ "Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning" : "88%"
680+ }
681681
682+ response = requests .get ("https://scholar.google.com/citations?view_op=mandates_leaderboard&hl=en" )
683+ soup = BeautifulSoup (response .text , "html.parser" )
684+ agency_overall = soup .find_all ("td" , class_ = "gsc_mlt_n gsc_mlt_bd" )
685+
686+ for agency , index in zip (agency_policy , [4 - 1 ,10 - 1 , 19 - 1 , 64 - 1 ]):
687+ agency_index = funder .index (agency )
688+ self .assertEqual (policy [agency_index ], agency_policy [agency ])
689+ # Check that the percentage values from CSV and on the page agree.
690+ self .assertEqual (percentageOverall [agency_index ], agency_overall [index ].text )
691+ # The percentage fluctuates, so we can't check the exact value.
692+ self .assertAlmostEqual (int (percentage2020 [agency_index ][:- 1 ]), int (agency_2020 [agency ][:- 1 ]), delta = 2 )
693+
694+ @unittest .skipIf (pd is None , reason = "pandas is not installed" )
695+ def test_download_mandates_csv_with_pandas (self ):
696+ """
697+ Test that we can use pandas to read the CSV file
698+ """
699+ if not os .path .exists (self .mandates_filename ):
700+ text = scholarly .download_mandates_csv (self .mandates_filename )
701+ self .assertGreater (len (text ), 0 )
702+ df = pd .read_csv (self .mandates_filename , usecols = ["Funder" , "Policy" , "2020" , "Overall" ]).fillna ("" )
703+ self .assertGreater (len (df ), 0 )
704+
705+ funders = ["US National Science Foundation" ,
706+ "Department of Science & Technology, India" ,
707+ "Swedish Research Council" ,
708+ "Swedish Research Council for Environment, Agricultural Sciences and Spatial Planning"
709+ ]
710+
711+ policies = ["https://www.nsf.gov/pubs/2015/nsf15052/nsf15052.pdf" ,
712+ "http://www.dst.gov.in/sites/default/files/APPROVED%20OPEN%20ACCESS%20POLICY-DBT%26DST%2812.12.2014%29_1.pdf" ,
713+ "https://www.vr.se/english/applying-for-funding/requirements-terms-and-conditions/publishing-open-access.html" ,
714+ ""
715+ ]
716+ percentage_overall = [84 , 54 , 83 , 83 ]
717+ percentage_2020 = [87 , 49 , 89 , 88 ]
718+
719+ rows = df ["Funder" ].isin (funders )
720+ self .assertEqual (rows .sum (), 4 )
721+ self .assertEqual (df ["Policy" ][rows ].tolist (), policies )
722+ df_overall = df ["Overall" ][rows ].tolist ()
723+ df_2020 = df ["2020" ][rows ].tolist ()
724+ for idx in range (4 ):
725+ self .assertAlmostEqual (int (df_overall [idx ][:- 1 ]), percentage_overall [idx ], delta = 2 )
726+ self .assertAlmostEqual (int (df_2020 [idx ][:- 1 ]), percentage_2020 [idx ], delta = 2 )
682727
683728 def test_save_journal_leaderboard (self ):
684729 """
@@ -694,7 +739,6 @@ def test_save_journal_leaderboard(self):
694739 with open (filename , "r" ) as f :
695740 csv_reader = csv .DictReader (f )
696741 for row in csv_reader :
697- #import pdb; pdb.set_trace()
698742 self .assertEqual (row ['Publication' ], 'The Astrophysical Journal' )
699743 self .assertEqual (row ['h5-index' ], '161' )
700744 self .assertEqual (row ['h5-median' ], '239' )
0 commit comments