@@ -535,10 +535,11 @@ def test_source_code_body(self):
535535 actual_code = code_info .fileJson [0 ]["body" ]["source_code" ]
536536 assert expected_code == actual_code
537537
538+
538539 def test_license_detection (self ):
539540 input_paths = ["./test_files/Chowlk" , "./test_files/pylops" , "./test_files/somef" ]
540541 output_dir = "./output_dir"
541-
542+ fig = False
542543 ignore_dir_pattern = ["." , "__pycache__" ]
543544 ignore_file_pattern = ["." , "__pycache__" ]
544545 requirements = False
@@ -555,14 +556,42 @@ def test_license_detection(self):
555556 expected_liceses = ['Apache-2.0' , 'LGPL-3.0' , 'MIT' ]
556557 first_rank_licenses = []
557558 for input_path in input_paths :
558- dir_info = invoke_inspector (input_path , output_dir , ignore_dir_pattern , ignore_file_pattern , requirements ,
559- call_list , control_flow , directory_tree , software_invocation , abstract_syntax_tree ,
560- source_code , license_detection , readme , metadata )
561- first_rank_licenses .append (next (iter (dir_info ["detected_license" ][0 ])))
559+ dir_info = invoke_inspector (input_path , output_dir , ignore_dir_pattern ,
560+ ignore_file_pattern , requirements ,
561+ call_list , control_flow , directory_tree ,
562+ software_invocation , abstract_syntax_tree ,
563+ source_code , license_detection , readme , metadata )
564+ first_rank_licenses .append (next (iter (dir_info ["license" ]["detected_type" ][0 ])))
562565 shutil .rmtree (output_dir )
563-
566+
564567 assert first_rank_licenses == expected_liceses
565568
569+ def test_license_text_extraction (self ):
570+ license_text = "A random license."
571+ input_path = "./test_files/test_license_extraction"
572+ output_dir = "./output_dir"
573+ fig = False
574+ ignore_dir_pattern = ["." , "__pycache__" ]
575+ ignore_file_pattern = ["." , "__pycache__" ]
576+ requirements = False
577+ call_list = False
578+ control_flow = False
579+ directory_tree = False
580+ software_invocation = False
581+ abstract_syntax_tree = False
582+ source_code = False
583+ license_detection = True
584+ readme = False
585+ metadata = False
586+
587+ dir_info = invoke_inspector (input_path , output_dir , ignore_dir_pattern ,
588+ ignore_file_pattern , requirements ,
589+ call_list , control_flow , directory_tree , software_invocation ,
590+ abstract_syntax_tree ,
591+ source_code , license_detection , readme , metadata )
592+
593+ assert dir_info ["license" ]["extracted_text" ] == license_text
594+
566595
567596 def test_readme (self ):
568597 input_path = "./test_files/test_readme"
@@ -701,9 +730,13 @@ def invoke_inspector(input_path, output_dir, ignore_dir_pattern, ignore_file_pat
701730 # Extract the first for software type.
702731 dir_info ["software_type" ] = rank_software_invocation (soft_invocation_info_list )
703732 if license_detection :
704- licenses_path = os .path .join (os .path .dirname (os .path .abspath (__file__ )), "../inspect4py/licenses" )
705- rank_list = detect_license (input_path , licenses_path )
706- dir_info ["detected_license" ] = [{k : f"{ v :.1%} " } for k , v in rank_list ]
733+ licenses_path = os .path .join (os .path .dirname (os .path .abspath (__file__ )),
734+ "../inspect4py/licenses" )
735+ license_text = extract_license (input_path )
736+ rank_list = detect_license (license_text , licenses_path )
737+ dir_info ["license" ] = {}
738+ dir_info ["license" ]["detected_type" ] = [{k : f"{ v :.1%} " } for k , v in rank_list ]
739+ dir_info ["license" ]["extracted_text" ] = license_text
707740 if readme :
708741 dir_info ["readme_files" ] = extract_readme (input_path )
709742 if metadata :
0 commit comments