@@ -55,7 +55,8 @@ def extract_directory_tree(input_path, ignore_dirs, ignore_files, visual=0):
5555 ignore_set = tuple (list (ignore_dirs ) + list (ignore_files ) + ignore_set )
5656 if visual :
5757 paths = DisplayablePath .make_tree (Path (input_path ), criteria = lambda
58- path : True if path .name not in ignore_set and not os .path .join ("../" , path .name ).endswith (".pyc" ) else False )
58+ path : True if path .name not in ignore_set and not os .path .join ("../" , path .name ).endswith (
59+ ".pyc" ) else False )
5960 for path in paths :
6061 print (path .displayable ())
6162 return get_directory_structure (input_path , ignore_set )
@@ -75,7 +76,7 @@ def prune_json(json_dict):
7576 else :
7677 for a , b in json_dict .items ():
7778 if a == "ast" and b :
78- final_dict [a ] = b # Avoid pruning AST fields
79+ final_dict [a ] = b # Avoid pruning AST fields
7980 continue
8081 if b or isinstance (b , bool ):
8182 if isinstance (b , dict ):
@@ -100,14 +101,13 @@ def extract_requirements(input_path):
100101 # Answering yes (echo y), we allow searching for PyPI
101102 # for the missing modules and filter some unnecessary modules.
102103
103-
104- #print(sys.version_info)
105- if sys .version_info [0 ] <= 3 and sys .version_info [1 ]<= 9 :
104+ # print(sys.version_info)
105+ if sys .version_info [0 ] <= 3 and sys .version_info [1 ] <= 9 :
106106 cmd = 'echo y | pigar -P ' + input_path + ' -p ' + file_name
107107 else :
108108 cmd = ' pigar generate ' + input_path + ' -f ' + file_name + ' --question-answer yes --auto-select'
109-
110- #print("-----> cmd: %s" %cmd)
109+
110+ # print("-----> cmd: %s" %cmd)
111111 proc = subprocess .Popen (cmd .encode ('utf-8' ), shell = True , stdin = subprocess .PIPE ,
112112 stdout = subprocess .PIPE , stderr = subprocess .PIPE )
113113 stdout , stderr = proc .communicate ()
@@ -118,7 +118,7 @@ def extract_requirements(input_path):
118118 for line in lines :
119119 try :
120120 if line != "\n " :
121- if " == " in line :
121+ if " == " in line :
122122 splitLine = line .split (" == " )
123123 else :
124124 splitLine = line .split ("==" )
@@ -128,8 +128,8 @@ def extract_requirements(input_path):
128128
129129 # Note: Pigar requirement file is being deleted
130130 # in the future we might want to keep it (just commenting the line bellow)
131- #os.system('rm ' + file_name)
132- #print("Exracted requirements :%s" %req_dict)
131+ # os.system('rm ' + file_name)
132+ # print("Exracted requirements :%s" %req_dict)
133133 return req_dict
134134
135135 except :
@@ -175,7 +175,7 @@ def extract_software_invocation(dir_info, dir_tree_info, input_path, call_list,
175175 body_only_files = []
176176 flag_service_main = 0
177177 for key in dir_info : # filter (lambda key: key not in "directory_tree", dir_info):
178- if key != "requirements" and key != "directory_tree" : # Note: We need to filter out directory_tree
178+ if key != "requirements" and key != "directory_tree" : # Note: We need to filter out directory_tree
179179 for elem in dir_info [key ]:
180180 if elem ["main_info" ]["main_flag" ]:
181181 flag_service_main = 0
@@ -188,7 +188,7 @@ def extract_software_invocation(dir_info, dir_tree_info, input_path, call_list,
188188 try :
189189 # 2. Exploration for services in files with "mains"
190190 flag_service , software_invocation_info = service_check (elem , software_invocation_info ,
191- server_dependencies , "main" , readme )
191+ server_dependencies , "main" , readme )
192192 except :
193193 main_files .append (elem ["file" ]["path" ])
194194
@@ -209,19 +209,19 @@ def extract_software_invocation(dir_info, dir_tree_info, input_path, call_list,
209209
210210 # this list (of lists) stores the mains that each main import
211211 import_mains = []
212-
212+
213213 # this list (of lists) stores the mains that each main is imported by
214- imported_by = [None ]* len (main_files )
214+ imported_by = [None ] * len (main_files )
215215
216216 # 3. Exploration for main scripts
217217 for m in range (0 , len (main_files )):
218218 m_calls = find_file_calls (main_files [m ], call_list )
219219 # HERE I STORE WHICH OTHER MAIN FILES CALLS EACH "M" MAIN_FILE
220220 m_imports = extract_relations (main_files [m ], m_calls , main_files , call_list )
221-
221+
222222 # storing those m_imports in the import_mains[m]
223223 import_mains .append (m_imports )
224-
224+
225225 for m_i in m_imports :
226226 m_secondary [main_files .index (m_i )] = 1
227227
@@ -286,7 +286,6 @@ def extract_software_invocation(dir_info, dir_tree_info, input_path, call_list,
286286 return software_invocation_info
287287
288288
289-
290289def generate_output_html (pruned_json , output_file_html ):
291290 """
292291 Method to generate a simple HTML view of the obtained JSON.
@@ -331,9 +330,9 @@ def list_functions_classes_from_module(m, path):
331330
332331 type = "internal"
333332 except :
334-
335- #module = __import__(m)
336- #functions = dir(module)
333+
334+ # module = __import__(m)
335+ # functions = dir(module)
337336 type = "external"
338337 return functions , classes , type
339338
@@ -352,22 +351,22 @@ def type_module(m, i, path):
352351 return "internal"
353352 else :
354353 if m :
355- m = m .replace ("." , "/" )
356- file_module = abs_repo_path + "/" + m + ".py"
357- file_module_path = Path (file_module )
358- if file_module_path .is_file ():
359- return "internal"
360- else :
361- file_module = abs_repo_path + "/" + m + "/main.py"
362- file_module_path = Path (file_module )
363- if file_module_path .is_file ():
364- return "internal"
365- else :
366- return "external"
354+ m = m .replace ("." , "/" )
355+ file_module = abs_repo_path + "/" + m + ".py"
356+ file_module_path = Path (file_module )
357+ if file_module_path .is_file ():
358+ return "internal"
359+ else :
360+ file_module = abs_repo_path + "/" + m + "/main.py"
361+ file_module_path = Path (file_module )
362+ if file_module_path .is_file ():
363+ return "internal"
364+ else :
365+ return "external"
367366 else :
368367 dir_module = abs_repo_path + "/" + i
369368 if os .path .exists (dir_module ):
370- return "internal"
369+ return "internal"
371370 else :
372371 return "external"
373372
@@ -419,7 +418,8 @@ def call_list_dir(dir_info):
419418 call_list [dir ][file_path ]["body" ] = extract_call_functions (file_info , body = 1 )
420419 call_list [dir ][file_path ]["classes" ] = {}
421420 for class_n in file_info ["classes" ]:
422- call_list [dir ][file_path ]["classes" ][class_n ] = extract_call_methods (file_info ["classes" ][class_n ]["methods" ])
421+ call_list [dir ][file_path ]["classes" ][class_n ] = extract_call_methods (
422+ file_info ["classes" ][class_n ]["methods" ])
423423 return call_list
424424
425425
@@ -433,8 +433,8 @@ def find_file_calls(file_name, call_list):
433433def find_module_calls (module , call_list ):
434434 for dir in call_list :
435435 for elem in call_list [dir ]:
436- if "/" + module + "." in elem :
437- #print("---MODULE %s, elem %s, giving call_list[%s][%s]" %(module, elem, dir, elem))
436+ if "/" + module + "." in elem :
437+ # print("---MODULE %s, elem %s, giving call_list[%s][%s]" %(module, elem, dir, elem))
438438 return call_list [dir ][elem ]
439439
440440 # DFS algorithm - Allowing up to 2 levels of depth.
@@ -457,7 +457,7 @@ def file_in_call(base, call, file, m_imports, call_list, orig_base, level):
457457 elif orig_base in call :
458458 return 0
459459
460- elif level < level_depth and call != "" :
460+ elif level < level_depth and call != "" :
461461 m_calls_extern = {}
462462 module_base = call .split ("." )[0 ]
463463 module_base = module_base + "."
@@ -522,7 +522,7 @@ def extract_relations(file_name, m_calls, main_files, call_list):
522522 level = 0
523523 flag_found = extract_data (base , m_calls [m_c ], file , m_imports , flag_found , call_list , orig_base , level )
524524 if flag_found :
525- #return m_imports
525+ # return m_imports
526526 break
527527
528528 return m_imports
@@ -622,6 +622,7 @@ def rank_software_invocation(soft_invocation_info_list):
622622 entry ["ranking" ] = position
623623 return soft_invocation_info_list
624624
625+
625626def ast_to_json (ast_obj ):
626627 """
627628 Function to convert the AST object into JSON format.
@@ -631,6 +632,7 @@ def ast_to_json(ast_obj):
631632 ast_generator .tree = ast_obj
632633 return ast_generator .generate_ast ()
633634
635+
634636def ast_to_source_code (ast_obj ):
635637 """
636638 Function to convert the AST object into source code.
@@ -650,8 +652,8 @@ def dice_coefficient(a, b):
650652 if len (b ) == 1 :
651653 b = b + u"."
652654
653- a_bigrams = {a [i : i + 2 ] for i in range (len (a ) - 1 )}
654- b_bigrams = {b [i : i + 2 ] for i in range (len (b ) - 1 )}
655+ a_bigrams = {a [i : i + 2 ] for i in range (len (a ) - 1 )}
656+ b_bigrams = {b [i : i + 2 ] for i in range (len (b ) - 1 )}
655657
656658 overlap = len (a_bigrams & b_bigrams )
657659 dice_coeff = overlap * 2.0 / (len (a_bigrams ) + len (b_bigrams ))
@@ -727,6 +729,7 @@ def detect_license(license_text, licenses_path, threshold=0.9):
727729
728730 return sorted (rank_list , key = lambda t : t [1 ], reverse = True )
729731
732+
730733def extract_readme (input_path : str , output_dir : str ) -> dict :
731734 """
732735 Function to extract content of all readme file under the input directory.
@@ -744,6 +747,7 @@ def extract_readme(input_path: str, output_dir: str) -> dict:
744747
745748 return readme_files
746749
750+
747751def get_github_metadata (input_path : str ) -> dict :
748752 """
749753 Function to extract metadata from the remote repository using Github api.
@@ -773,8 +777,9 @@ def get_github_metadata(input_path: str) -> dict:
773777
774778 return github_metadata
775779
780+
776781def find_index_init (depInfo , calls , class_init ):
777- index_remove = []
782+ index_remove = []
778783 for dep in depInfo :
779784 if dep ["type_element" ] == "class" :
780785 if dep ["import" ] in calls :
@@ -786,19 +791,21 @@ def find_index_init(depInfo, calls, class_init):
786791 index_remove .append (calls .index (i ))
787792 return index_remove
788793
794+
789795def update_list_calls (info , index_remove ):
790- updated_calls = []
796+ updated_calls = []
791797 for i in range (0 , len (info ["calls" ])):
792798 if i in index_remove :
793799 continue
794800 updated_calls .append (info ["calls" ][i ])
795801 ### These lines are for removing duplicate calls
796802 res = []
797- for i in updated_calls :
803+ for i in updated_calls :
798804 if i not in res :
799805 res .append (i )
800806 return res
801807
808+
802809def tree_to_variable_index (root_node , index_to_code ):
803810 if (len (root_node .children ) == 0 or root_node .type == 'string' ) and root_node .type != 'comment' :
804811 index = (root_node .start_point , root_node .end_point )
@@ -813,6 +820,7 @@ def tree_to_variable_index(root_node, index_to_code):
813820 code_tokens += tree_to_variable_index (child , index_to_code )
814821 return code_tokens
815822
823+
816824def DFG_python (root_node , index_to_code , states ):
817825 assignment = ['assignment' , 'augmented_assignment' , 'for_in_clause' ]
818826 if_statement = ['if_statement' ]
@@ -979,7 +987,8 @@ def DFG_python(root_node, index_to_code, states):
979987 temp , states = DFG_python (child , index_to_code , states )
980988 DFG += temp
981989
982- return sorted (DFG ,key = lambda x :x [1 ]),states
990+ return sorted (DFG , key = lambda x : x [1 ]), states
991+
983992
984993def tree_to_variable_index (root_node , index_to_code ):
985994 if (len (root_node .children ) == 0 or root_node .type == 'string' ) and root_node .type != 'comment' :
@@ -1009,6 +1018,7 @@ def index_to_code_token(index, code):
10091018 s += code [end_point [0 ]][:end_point [1 ]]
10101019 return s
10111020
1021+
10121022def tree_to_token_index (root_node ):
10131023 if (len (root_node .children ) == 0 or root_node .type == 'string' ) and root_node .type != 'comment' :
10141024 return [(root_node .start_point , root_node .end_point )]
@@ -1018,35 +1028,36 @@ def tree_to_token_index(root_node):
10181028 code_tokens += tree_to_token_index (child )
10191029 return code_tokens
10201030
1021- def extract_dataflow (code , parser ,lang ):
1022- #obtain dataflow
1023- if lang == "php" :
1024- code = "<?php" + code + "?>"
1031+
1032+ def extract_dataflow (code , parser , lang ):
1033+ # obtain dataflow
1034+ if lang == "php" :
1035+ code = "<?php" + code + "?>"
10251036 try :
1026- tree = parser [0 ].parse (bytes (code ,'utf8' ))
1037+ tree = parser [0 ].parse (bytes (code , 'utf8' ))
10271038 root_node = tree .root_node
1028- tokens_index = tree_to_token_index (root_node )
1029- code = code .split ('\n ' )
1030- code_tokens = [index_to_code_token (x ,code ) for x in tokens_index ]
1031- index_to_code = {}
1032- for idx ,(index ,code ) in enumerate (zip (tokens_index ,code_tokens )):
1033- index_to_code [index ]= (idx ,code )
1039+ tokens_index = tree_to_token_index (root_node )
1040+ code = code .split ('\n ' )
1041+ code_tokens = [index_to_code_token (x , code ) for x in tokens_index ]
1042+ index_to_code = {}
1043+ for idx , (index , code ) in enumerate (zip (tokens_index , code_tokens )):
1044+ index_to_code [index ] = (idx , code )
10341045 try :
1035- DFG ,_ = parser [1 ](root_node ,index_to_code ,{})
1046+ DFG , _ = parser [1 ](root_node , index_to_code , {})
10361047 except :
1037- DFG = []
1038- DFG = sorted (DFG ,key = lambda x :x [1 ])
1039- indexs = set ()
1048+ DFG = []
1049+ DFG = sorted (DFG , key = lambda x : x [1 ])
1050+ indexs = set ()
10401051 for d in DFG :
1041- if len (d [- 1 ])!= 0 :
1052+ if len (d [- 1 ]) != 0 :
10421053 indexs .add (d [1 ])
10431054 for x in d [- 1 ]:
10441055 indexs .add (x )
1045- new_DFG = []
1056+ new_DFG = []
10461057 for d in DFG :
10471058 if d [1 ] in indexs :
10481059 new_DFG .append (d )
1049- dfg = new_DFG
1060+ dfg = new_DFG
10501061 except :
1051- dfg = []
1052- return code_tokens , dfg
1062+ dfg = []
1063+ return code_tokens , dfg
0 commit comments