1+ import ast
12import json
23import tokenize
34import types
45import builtins
56import click
67from docstring_parser import parse as doc_parse
8+ from tree_sitter import Language , Parser
79
810from inspect4py import __version__
911from inspect4py .staticfg import builder
1012from inspect4py .utils import *
13+ # from utils import *
1114
1215"""
1316Code Inspector
2629
2730
2831class CodeInspection :
29- def __init__ (self , path , out_control_flow_path , out_json_path , control_flow , abstract_syntax_tree , source_code ):
32+ def __init__ (self , path , out_control_flow_path , out_json_path , control_flow , abstract_syntax_tree , source_code , data_flow , parser ):
3033 """ init method initializes the Code_Inspection object
3134 :param self self: represent the instance of the class
3235 :param str path: the file to inspect
@@ -41,6 +44,8 @@ def __init__(self, path, out_control_flow_path, out_json_path, control_flow, abs
4144 self .out_json_path = out_json_path
4245 self .abstract_syntax_tree = abstract_syntax_tree
4346 self .source_code = source_code
47+ self .data_flow = data_flow
48+ self .parser = parser
4449 self .tree = self .parser_file ()
4550 if self .tree != "AST_ERROR" :
4651 self .nodes = self .walk ()
@@ -51,13 +56,14 @@ def __init__(self, path, out_control_flow_path, out_json_path, control_flow, abs
5156 self .bodyInfo = self .inspect_body ()
5257 if control_flow :
5358 self .out_control_flow_path = out_control_flow_path
54- self .controlFlowInfo = self .inspect_controlflow ()
59+ self .controlFlowInfo = self .inspect_controlflow ("png" )
5560 else :
5661 self .controlFlowInfo = {}
5762 self .fileJson = self .file_json ()
5863 else :
5964 self .fileJson = {}
6065
66+
6167 def find_classDef (self ):
6268 classDef_nodes = [node for node in self .nodes if isinstance (node , ast .ClassDef )]
6369 class_init = []
@@ -466,6 +472,13 @@ def file_json(self):
466472 json .dump (prune_json (file_dict ), outfile )
467473 return [file_dict , json_file ]
468474
475+ # def get_parser_data_flow(self):
476+ # parser = Parser()
477+ # LANGUAGE = Language(self.symbol_table, "python")
478+ # parser.set_language(LANGUAGE)
479+ # parser = [parser, DFG_python]
480+ # return parser
481+
469482 def _f_definitions (self , functions_definitions ):
470483 """_f_definitions extracts the name, args, docstring
471484 returns, raises of a list of functions or a methods.
@@ -477,11 +490,15 @@ def _f_definitions(self, functions_definitions):
477490 :param list functions_definitions: represent a list with all functions or methods nodes
478491 :return dictionary: a dictionary with the all the information at function/method level
479492 """
480-
493+ # print(functions_definitions)
481494 funcs_info = {}
482495 for f in functions_definitions :
496+ # for node in ast.walk(f):
497+ # print(node.name)
498+
483499 funcs_info [f .name ] = {}
484500 ds_f = ast .get_docstring (f )
501+ # print(ds_f)
485502 try :
486503 docstring = doc_parse (ds_f )
487504 funcs_info [f .name ]["doc" ] = {}
@@ -577,7 +594,10 @@ def _f_definitions(self, functions_definitions):
577594 funcs_info [f .name ]["ast" ] = ast_to_json (f )
578595 if self .source_code :
579596 funcs_info [f .name ]["source_code" ] = ast_to_source_code (f )
580-
597+ if self .data_flow :
598+ code_tokens , dfg = extract_dataflow (funcs_info [f .name ]["source_code" ], self .parser , "python" )
599+ funcs_info [f .name ]["data_flow" ] = dfg
600+ funcs_info [f .name ]["code_tokens" ] = code_tokens
581601 return funcs_info
582602
583603 def _skip_dynamic_calls (self , funcs_info , classes_info , check_name , name , var_name ):
@@ -1204,6 +1224,7 @@ def create_output_dirs(output_dir, control_flow):
12041224@click .option ('-i' , '--input_path' , type = str , required = True , help = "input path of the file or directory to inspect." )
12051225@click .option ('-o' , '--output_dir' , type = str , default = "output_dir" ,
12061226 help = "output directory path to store results. If the directory does not exist, the tool will create it." )
1227+ @click .option ('-st' ,'--symbol_table' , type = str , default = "my_language.so" , help = "symbol table for the target function" )
12071228@click .option ('-ignore_dir' , '--ignore_dir_pattern' , multiple = True , default = ["." , "__pycache__" ],
12081229 help = "ignore directories starting with a certain pattern. This parameter can be provided multiple times "
12091230 "to ignore multiple directory patterns." )
@@ -1231,16 +1252,35 @@ def create_output_dirs(output_dir, control_flow):
12311252 help = "extract all readme files in the target repository." )
12321253@click .option ('-md' , '--metadata' , type = bool , is_flag = True ,
12331254 help = "extract metadata of the target repository using Github API. (requires repository to have the .git folder)" )
1255+ @click .option ('-df' , '--data_flow' , type = bool , is_flag = True ,
1256+ help = "extract data flow graph of every function in the target repository" )
1257+
12341258def main (input_path , output_dir , ignore_dir_pattern , ignore_file_pattern , requirements , html_output , call_list ,
12351259 control_flow , directory_tree , software_invocation , abstract_syntax_tree , source_code , license_detection , readme ,
1236- metadata ):
1260+ metadata , data_flow , symbol_table ):
1261+ if data_flow :
1262+ if symbol_table == "my_language.so" : # default option
1263+ path_to_languages = str (Path (__file__ ).parent / "resources" )
1264+ if sys .platform .startswith ("win" ) or sys .platform .startswith ("cygwin" ):
1265+ language = Language (path_to_languages + os .path .sep + "python_win.so" , "python" )
1266+ else :
1267+ language = Language (path_to_languages + os .path .sep + "python_unix.so" , "python" )
1268+ else :
1269+ language = Language (symbol_table , "python" )
1270+ parser = Parser ()
1271+ parser .set_language (language )
1272+ parser = [parser , DFG_python ]
1273+ else :
1274+ parser = []
1275+
1276+ # print(parsers)
12371277 if (not os .path .isfile (input_path )) and (not os .path .isdir (input_path )):
12381278 print ('The file or directory specified does not exist' )
12391279 sys .exit ()
12401280
12411281 if os .path .isfile (input_path ):
12421282 cf_dir , json_dir = create_output_dirs (output_dir , control_flow )
1243- code_info = CodeInspection (input_path , cf_dir , json_dir , control_flow , abstract_syntax_tree , source_code )
1283+ code_info = CodeInspection (input_path , cf_dir , json_dir , control_flow , abstract_syntax_tree , source_code , data_flow , parser )
12441284
12451285 # Generate the call list of a file
12461286 call_list_data = call_list_file (code_info )
@@ -1279,18 +1319,20 @@ def main(input_path, output_dir, ignore_dir_pattern, ignore_file_pattern, requir
12791319 for f in files :
12801320 if ".py" in f and not f .endswith (".pyc" ):
12811321 try :
1322+
12821323 path = os .path .join (subdir , f )
12831324 relative_path = Path (subdir ).relative_to (Path (input_path ).parent )
12841325 out_dir = str (Path (output_dir ) / relative_path )
12851326 cf_dir , json_dir = create_output_dirs (out_dir , control_flow )
1286- code_info = CodeInspection (path , cf_dir , json_dir , control_flow , abstract_syntax_tree , source_code )
1327+ code_info = CodeInspection (path , cf_dir , json_dir , control_flow , abstract_syntax_tree , source_code , data_flow , parser )
1328+ # print(parsers)
12871329 if code_info .fileJson :
12881330 if out_dir not in dir_info :
12891331 dir_info [out_dir ] = [code_info .fileJson [0 ]]
12901332 else :
12911333 dir_info [out_dir ].append (code_info .fileJson [0 ])
12921334 except :
1293- print ("Error when processing " + f + ": " , sys .exc_info ()[ 0 ] )
1335+ print ("Error when processing " + f + ": " , sys .exc_info ())
12941336 continue
12951337
12961338 # Generate the call list of the Dir
@@ -1332,7 +1374,7 @@ def main(input_path, output_dir, ignore_dir_pattern, ignore_file_pattern, requir
13321374 dir_info ["software_type" ] = "not found"
13331375 if license_detection :
13341376 try :
1335- licenses_path = os . path . join ( os . path . dirname ( os . path . abspath ( __file__ )), "licenses" )
1377+ licenses_path = str ( Path ( __file__ ). parent / "licenses" )
13361378 license_text = extract_license (input_path )
13371379 rank_list = detect_license (license_text , licenses_path )
13381380 dir_info ["license" ] = {}
0 commit comments