Skip to content

Commit 648ff1a

Browse files
committed
Fixing tests so they become platform-independent
1 parent 6dc499b commit 648ff1a

6 files changed

Lines changed: 281 additions & 234 deletions

File tree

inspect4py/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '0.0.6'
1+
__version__ = '0.0.7'

inspect4py/cli.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,9 +1259,16 @@ def main(input_path, output_dir, ignore_dir_pattern, ignore_file_pattern, requir
12591259
control_flow, directory_tree, software_invocation, abstract_syntax_tree, source_code, license_detection, readme,
12601260
metadata, data_flow, symbol_table):
12611261
if data_flow:
1262-
LANGUAGE = Language(symbol_table, "python")
1262+
if symbol_table == "my_language.so": # default option
1263+
path_to_languages = str(Path(__file__).parent / "resources")
1264+
if sys.platform.startswith("win") or sys.platform.startswith("cygwin"):
1265+
language = Language(path_to_languages + os.path.sep + "python_win.so", "python")
1266+
else:
1267+
language = Language(path_to_languages + os.path.sep + "python_unix.so", "python")
1268+
else:
1269+
language = Language(symbol_table, "python")
12631270
parser = Parser()
1264-
parser.set_language(LANGUAGE)
1271+
parser.set_language(language)
12651272
parser = [parser, DFG_python]
12661273
else:
12671274
parser = []
@@ -1367,7 +1374,7 @@ def main(input_path, output_dir, ignore_dir_pattern, ignore_file_pattern, requir
13671374
dir_info["software_type"] = "not found"
13681375
if license_detection:
13691376
try:
1370-
licenses_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "licenses")
1377+
licenses_path = str(Path(__file__).parent / "licenses")
13711378
license_text = extract_license(input_path)
13721379
rank_list = detect_license(license_text, licenses_path)
13731380
dir_info["license"] = {}
-399 KB
Binary file not shown.

inspect4py/utils.py

Lines changed: 75 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ def extract_directory_tree(input_path, ignore_dirs, ignore_files, visual=0):
5555
ignore_set = tuple(list(ignore_dirs) + list(ignore_files) + ignore_set)
5656
if visual:
5757
paths = DisplayablePath.make_tree(Path(input_path), criteria=lambda
58-
path: True if path.name not in ignore_set and not os.path.join("../", path.name).endswith(".pyc") else False)
58+
path: True if path.name not in ignore_set and not os.path.join("../", path.name).endswith(
59+
".pyc") else False)
5960
for path in paths:
6061
print(path.displayable())
6162
return get_directory_structure(input_path, ignore_set)
@@ -75,7 +76,7 @@ def prune_json(json_dict):
7576
else:
7677
for a, b in json_dict.items():
7778
if a == "ast" and b:
78-
final_dict[a] = b # Avoid pruning AST fields
79+
final_dict[a] = b # Avoid pruning AST fields
7980
continue
8081
if b or isinstance(b, bool):
8182
if isinstance(b, dict):
@@ -100,14 +101,13 @@ def extract_requirements(input_path):
100101
# Answering yes (echo y), we allow searching for PyPI
101102
# for the missing modules and filter some unnecessary modules.
102103

103-
104-
#print(sys.version_info)
105-
if sys.version_info[0] <=3 and sys.version_info[1]<=9:
104+
# print(sys.version_info)
105+
if sys.version_info[0] <= 3 and sys.version_info[1] <= 9:
106106
cmd = 'echo y | pigar -P ' + input_path + ' -p ' + file_name
107107
else:
108108
cmd = ' pigar generate ' + input_path + ' -f ' + file_name + ' --question-answer yes --auto-select'
109-
110-
#print("-----> cmd: %s" %cmd)
109+
110+
# print("-----> cmd: %s" %cmd)
111111
proc = subprocess.Popen(cmd.encode('utf-8'), shell=True, stdin=subprocess.PIPE,
112112
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
113113
stdout, stderr = proc.communicate()
@@ -118,7 +118,7 @@ def extract_requirements(input_path):
118118
for line in lines:
119119
try:
120120
if line != "\n":
121-
if " == " in line:
121+
if " == " in line:
122122
splitLine = line.split(" == ")
123123
else:
124124
splitLine = line.split("==")
@@ -128,8 +128,8 @@ def extract_requirements(input_path):
128128

129129
# Note: Pigar requirement file is being deleted
130130
# in the future we might want to keep it (just commenting the line bellow)
131-
#os.system('rm ' + file_name)
132-
#print("Exracted requirements :%s" %req_dict)
131+
# os.system('rm ' + file_name)
132+
# print("Exracted requirements :%s" %req_dict)
133133
return req_dict
134134

135135
except:
@@ -175,7 +175,7 @@ def extract_software_invocation(dir_info, dir_tree_info, input_path, call_list,
175175
body_only_files = []
176176
flag_service_main = 0
177177
for key in dir_info: # filter (lambda key: key not in "directory_tree", dir_info):
178-
if key!="requirements" and key!="directory_tree": # Note: We need to filter out directory_tree
178+
if key != "requirements" and key != "directory_tree": # Note: We need to filter out directory_tree
179179
for elem in dir_info[key]:
180180
if elem["main_info"]["main_flag"]:
181181
flag_service_main = 0
@@ -188,7 +188,7 @@ def extract_software_invocation(dir_info, dir_tree_info, input_path, call_list,
188188
try:
189189
# 2. Exploration for services in files with "mains"
190190
flag_service, software_invocation_info = service_check(elem, software_invocation_info,
191-
server_dependencies, "main", readme)
191+
server_dependencies, "main", readme)
192192
except:
193193
main_files.append(elem["file"]["path"])
194194

@@ -209,19 +209,19 @@ def extract_software_invocation(dir_info, dir_tree_info, input_path, call_list,
209209

210210
# this list (of lists) stores the mains that each main import
211211
import_mains = []
212-
212+
213213
# this list (of lists) stores the mains that each main is imported by
214-
imported_by = [None]*len(main_files)
214+
imported_by = [None] * len(main_files)
215215

216216
# 3. Exploration for main scripts
217217
for m in range(0, len(main_files)):
218218
m_calls = find_file_calls(main_files[m], call_list)
219219
# HERE I STORE WHICH OTHER MAIN FILES CALLS EACH "M" MAIN_FILE
220220
m_imports = extract_relations(main_files[m], m_calls, main_files, call_list)
221-
221+
222222
# storing those m_imports in the import_mains[m]
223223
import_mains.append(m_imports)
224-
224+
225225
for m_i in m_imports:
226226
m_secondary[main_files.index(m_i)] = 1
227227

@@ -286,7 +286,6 @@ def extract_software_invocation(dir_info, dir_tree_info, input_path, call_list,
286286
return software_invocation_info
287287

288288

289-
290289
def generate_output_html(pruned_json, output_file_html):
291290
"""
292291
Method to generate a simple HTML view of the obtained JSON.
@@ -331,9 +330,9 @@ def list_functions_classes_from_module(m, path):
331330

332331
type = "internal"
333332
except:
334-
335-
#module = __import__(m)
336-
#functions = dir(module)
333+
334+
# module = __import__(m)
335+
# functions = dir(module)
337336
type = "external"
338337
return functions, classes, type
339338

@@ -352,22 +351,22 @@ def type_module(m, i, path):
352351
return "internal"
353352
else:
354353
if m:
355-
m = m.replace(".", "/")
356-
file_module = abs_repo_path + "/" + m + ".py"
357-
file_module_path = Path(file_module)
358-
if file_module_path.is_file():
359-
return "internal"
360-
else:
361-
file_module = abs_repo_path + "/" + m + "/main.py"
362-
file_module_path = Path(file_module)
363-
if file_module_path.is_file():
364-
return "internal"
365-
else:
366-
return "external"
354+
m = m.replace(".", "/")
355+
file_module = abs_repo_path + "/" + m + ".py"
356+
file_module_path = Path(file_module)
357+
if file_module_path.is_file():
358+
return "internal"
359+
else:
360+
file_module = abs_repo_path + "/" + m + "/main.py"
361+
file_module_path = Path(file_module)
362+
if file_module_path.is_file():
363+
return "internal"
364+
else:
365+
return "external"
367366
else:
368367
dir_module = abs_repo_path + "/" + i
369368
if os.path.exists(dir_module):
370-
return "internal"
369+
return "internal"
371370
else:
372371
return "external"
373372

@@ -419,7 +418,8 @@ def call_list_dir(dir_info):
419418
call_list[dir][file_path]["body"] = extract_call_functions(file_info, body=1)
420419
call_list[dir][file_path]["classes"] = {}
421420
for class_n in file_info["classes"]:
422-
call_list[dir][file_path]["classes"][class_n] = extract_call_methods(file_info["classes"][class_n]["methods"])
421+
call_list[dir][file_path]["classes"][class_n] = extract_call_methods(
422+
file_info["classes"][class_n]["methods"])
423423
return call_list
424424

425425

@@ -433,8 +433,8 @@ def find_file_calls(file_name, call_list):
433433
def find_module_calls(module, call_list):
434434
for dir in call_list:
435435
for elem in call_list[dir]:
436-
if "/"+module+"." in elem:
437-
#print("---MODULE %s, elem %s, giving call_list[%s][%s]" %(module, elem, dir, elem))
436+
if "/" + module + "." in elem:
437+
# print("---MODULE %s, elem %s, giving call_list[%s][%s]" %(module, elem, dir, elem))
438438
return call_list[dir][elem]
439439

440440
# DFS algorithm - Allowing up to 2 levels of depth.
@@ -457,7 +457,7 @@ def file_in_call(base, call, file, m_imports, call_list, orig_base, level):
457457
elif orig_base in call:
458458
return 0
459459

460-
elif level < level_depth and call!="":
460+
elif level < level_depth and call != "":
461461
m_calls_extern = {}
462462
module_base = call.split(".")[0]
463463
module_base = module_base + "."
@@ -522,7 +522,7 @@ def extract_relations(file_name, m_calls, main_files, call_list):
522522
level = 0
523523
flag_found = extract_data(base, m_calls[m_c], file, m_imports, flag_found, call_list, orig_base, level)
524524
if flag_found:
525-
#return m_imports
525+
# return m_imports
526526
break
527527

528528
return m_imports
@@ -622,6 +622,7 @@ def rank_software_invocation(soft_invocation_info_list):
622622
entry["ranking"] = position
623623
return soft_invocation_info_list
624624

625+
625626
def ast_to_json(ast_obj):
626627
"""
627628
Function to convert the AST object into JSON format.
@@ -631,6 +632,7 @@ def ast_to_json(ast_obj):
631632
ast_generator.tree = ast_obj
632633
return ast_generator.generate_ast()
633634

635+
634636
def ast_to_source_code(ast_obj):
635637
"""
636638
Function to convert the AST object into source code.
@@ -650,8 +652,8 @@ def dice_coefficient(a, b):
650652
if len(b) == 1:
651653
b = b + u"."
652654

653-
a_bigrams = {a[i : i + 2] for i in range(len(a) - 1)}
654-
b_bigrams = {b[i : i + 2] for i in range(len(b) - 1)}
655+
a_bigrams = {a[i: i + 2] for i in range(len(a) - 1)}
656+
b_bigrams = {b[i: i + 2] for i in range(len(b) - 1)}
655657

656658
overlap = len(a_bigrams & b_bigrams)
657659
dice_coeff = overlap * 2.0 / (len(a_bigrams) + len(b_bigrams))
@@ -727,6 +729,7 @@ def detect_license(license_text, licenses_path, threshold=0.9):
727729

728730
return sorted(rank_list, key=lambda t: t[1], reverse=True)
729731

732+
730733
def extract_readme(input_path: str, output_dir: str) -> dict:
731734
"""
732735
Function to extract content of all readme file under the input directory.
@@ -744,6 +747,7 @@ def extract_readme(input_path: str, output_dir: str) -> dict:
744747

745748
return readme_files
746749

750+
747751
def get_github_metadata(input_path: str) -> dict:
748752
"""
749753
Function to extract metadata from the remote repository using Github api.
@@ -773,8 +777,9 @@ def get_github_metadata(input_path: str) -> dict:
773777

774778
return github_metadata
775779

780+
776781
def find_index_init(depInfo, calls, class_init):
777-
index_remove=[]
782+
index_remove = []
778783
for dep in depInfo:
779784
if dep["type_element"] == "class":
780785
if dep["import"] in calls:
@@ -786,19 +791,21 @@ def find_index_init(depInfo, calls, class_init):
786791
index_remove.append(calls.index(i))
787792
return index_remove
788793

794+
789795
def update_list_calls(info, index_remove):
790-
updated_calls=[]
796+
updated_calls = []
791797
for i in range(0, len(info["calls"])):
792798
if i in index_remove:
793799
continue
794800
updated_calls.append(info["calls"][i])
795801
### These lines are for removing duplicate calls
796802
res = []
797-
for i in updated_calls :
803+
for i in updated_calls:
798804
if i not in res:
799805
res.append(i)
800806
return res
801807

808+
802809
def tree_to_variable_index(root_node, index_to_code):
803810
if (len(root_node.children) == 0 or root_node.type == 'string') and root_node.type != 'comment':
804811
index = (root_node.start_point, root_node.end_point)
@@ -813,6 +820,7 @@ def tree_to_variable_index(root_node, index_to_code):
813820
code_tokens += tree_to_variable_index(child, index_to_code)
814821
return code_tokens
815822

823+
816824
def DFG_python(root_node, index_to_code, states):
817825
assignment = ['assignment', 'augmented_assignment', 'for_in_clause']
818826
if_statement = ['if_statement']
@@ -979,7 +987,8 @@ def DFG_python(root_node, index_to_code, states):
979987
temp, states = DFG_python(child, index_to_code, states)
980988
DFG += temp
981989

982-
return sorted(DFG,key=lambda x:x[1]),states
990+
return sorted(DFG, key=lambda x: x[1]), states
991+
983992

984993
def tree_to_variable_index(root_node, index_to_code):
985994
if (len(root_node.children) == 0 or root_node.type == 'string') and root_node.type != 'comment':
@@ -1009,6 +1018,7 @@ def index_to_code_token(index, code):
10091018
s += code[end_point[0]][:end_point[1]]
10101019
return s
10111020

1021+
10121022
def tree_to_token_index(root_node):
10131023
if (len(root_node.children) == 0 or root_node.type == 'string') and root_node.type != 'comment':
10141024
return [(root_node.start_point, root_node.end_point)]
@@ -1018,35 +1028,36 @@ def tree_to_token_index(root_node):
10181028
code_tokens += tree_to_token_index(child)
10191029
return code_tokens
10201030

1021-
def extract_dataflow(code, parser,lang):
1022-
#obtain dataflow
1023-
if lang=="php":
1024-
code="<?php"+code+"?>"
1031+
1032+
def extract_dataflow(code, parser, lang):
1033+
# obtain dataflow
1034+
if lang == "php":
1035+
code = "<?php" + code + "?>"
10251036
try:
1026-
tree = parser[0].parse(bytes(code,'utf8'))
1037+
tree = parser[0].parse(bytes(code, 'utf8'))
10271038
root_node = tree.root_node
1028-
tokens_index=tree_to_token_index(root_node)
1029-
code=code.split('\n')
1030-
code_tokens=[index_to_code_token(x,code) for x in tokens_index]
1031-
index_to_code={}
1032-
for idx,(index,code) in enumerate(zip(tokens_index,code_tokens)):
1033-
index_to_code[index]=(idx,code)
1039+
tokens_index = tree_to_token_index(root_node)
1040+
code = code.split('\n')
1041+
code_tokens = [index_to_code_token(x, code) for x in tokens_index]
1042+
index_to_code = {}
1043+
for idx, (index, code) in enumerate(zip(tokens_index, code_tokens)):
1044+
index_to_code[index] = (idx, code)
10341045
try:
1035-
DFG,_=parser[1](root_node,index_to_code,{})
1046+
DFG, _ = parser[1](root_node, index_to_code, {})
10361047
except:
1037-
DFG=[]
1038-
DFG=sorted(DFG,key=lambda x:x[1])
1039-
indexs=set()
1048+
DFG = []
1049+
DFG = sorted(DFG, key=lambda x: x[1])
1050+
indexs = set()
10401051
for d in DFG:
1041-
if len(d[-1])!=0:
1052+
if len(d[-1]) != 0:
10421053
indexs.add(d[1])
10431054
for x in d[-1]:
10441055
indexs.add(x)
1045-
new_DFG=[]
1056+
new_DFG = []
10461057
for d in DFG:
10471058
if d[1] in indexs:
10481059
new_DFG.append(d)
1049-
dfg=new_DFG
1060+
dfg = new_DFG
10501061
except:
1051-
dfg=[]
1052-
return code_tokens, dfg
1062+
dfg = []
1063+
return code_tokens, dfg

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ requires = [
99
"setuptools==54.2.0",
1010
"json2html",
1111
"configparser",
12+
"tree-sitter"
1213
]
1314
build-backend = "setuptools.build_meta"

0 commit comments

Comments
 (0)