Skip to content

Commit 0ab9db5

Browse files
committed
Fix evaluation name invocation
1 parent 3c8e9e9 commit 0ab9db5

7 files changed

Lines changed: 28 additions & 20 deletions

evaluation/software_type/README.md

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,8 @@ Annotated benchmark, curated by hand.
3838
- main_file_paths_3: files that are less relevant for running the tool.
3939
- comments: Comments/discussion that occurred during annotation.
4040

41-
### evaluation_summary.csv:
42-
- date: Date when the experiment was carried out.
43-
- #repositories: Number of valid repositories analyzed.
44-
- precision_avg: Average precision.
45-
- recall_avg: Average recall.
46-
- errors: Repositories where errors occurred (for debugging)
47-
48-
### evaluation_summary_scripts_precision_recall.csv:
41+
### evaluation_summary.csv
42+
File with precision/recall metrics for each category type:
4943
- date: Date when the experiment was carried out
5044
- #repositories: Number of valid repositories analyzed.
5145
- precision_package: Average precision for the package category
@@ -61,7 +55,17 @@ Annotated benchmark, curated by hand.
6155
- errors: Repositories where errors occurred (for debugging)
6256

6357
### evaluation_summary_scripts_ndcg.csv
58+
File with the estimation of the ranking results, using normalized discounted cumultative gain
6459
- date: Date when the experiment was carried out
6560
- #repositories: Number of valid repositories analyzed.
6661
- ndcg_avg: Normalized discounted cumulaltive gain (ranking evaluation)
6762
- errors: Repositories where errors occurred (for debugging)
63+
64+
### evaluation_summary_scripts_precision_recall.csv:
65+
File with the precision and recall of all the scripts considered "relevant" in the annotation corpus. **Note: this is ongoing work**.
66+
- date: Date when the experiment was carried out.
67+
- #repositories: Number of valid repositories analyzed.
68+
- precision_avg: Average precision.
69+
- recall_avg: Average recall.
70+
- errors: Repositories where errors occurred (for debugging)
71+

evaluation/software_type/evaluation_summary.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,4 @@ date,#repositories,precision_package,recall_package,precision_library,recall_lib
3333
19/09/2021 18:38:49,95,1.0,0.9166666666666666,0.9310344827586207,1.0,1.0,1.0,0.9090909090909091,0.967741935483871,0.9600313479623824,0.9326406120760959,"['autojump', 'rtdl', 'kgtk-browser', 'DDOS_Python', 'HXL-Data-Science-file-formats']"
3434
19/09/2021 19:02:42,95,1.0,0.9166666666666666,0.9310344827586207,1.0,1.0,1.0,0.967741935483871,0.967741935483871,0.9746941045606229,0.9711021505376344,"['autojump', 'rtdl', 'HXL-Data-Science-file-formats']"
3535
19/09/2021 19:53:02,95,1.0,0.9166666666666666,0.9310344827586207,1.0,1.0,1.0,0.967741935483871,0.967741935483871,0.9746941045606229,0.9711021505376344,"['autojump', 'rtdl', 'HXL-Data-Science-file-formats']"
36+
26/01/2022 14:42:28,95,1.0,0.9166666666666666,0.9310344827586207,1.0,1.0,1.0,0.967741935483871,0.967741935483871,0.9746941045606229,0.9711021505376344,"['autojump', 'rtdl', 'HXL-Data-Science-file-formats']"

evaluation/software_type/evaluation_summary_scripts_ndcg.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ date,#repositories,ndcg_avg,errors
99
19/09/2021 18:38:49,44,0.8654678313783589,"['BoostingMonocularDepth. PENALIZATION: 3', 'gtfs-bench. ndcg:0. relevance: [0]', 'tada-web. PENALIZATION: 1', 'SOLQ. PENALIZATION: 1', 'lynx-py. ndcg:0. relevance: [0, 0, 0, 0, 0, 0, 0]', 'pySRURGS. ndcg:0.9401043262568904. relevance: [3, 2, 0, 1, 2, 2, 2, 2, 2, 2]', 'CU-Net. ndcg:0. relevance: [0, 0]', 'graph-neural-pde. ndcg:0.9428279810283838. relevance: [2, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1]', 'Calliar. ndcg:0.6309297535714575. relevance: [0, 1]', 'rtdl. ndcg:0. relevance: [0, 0, 0, 0]', 'tada-gam. ndcg:0.8868854556705132. relevance: [2, 0, 0, 0, 0, 0, 1, 0]', 'Morph-OME. PENALIZATION: 1', 'deeplab2. ndcg:0.6309297535714575. relevance: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]', 'kgtk-browser. ndcg:0.6309297535714575. relevance: [0, 1]', 'AliceMind. ndcg:0.4179775569776327. relevance: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]']"
1010
19/09/2021 19:02:42,44,0.8698089580019919,"['BoostingMonocularDepth. PENALIZATION: 3', 'gtfs-bench. ndcg:0. relevance: [0]', 'tada-web. PENALIZATION: 1', 'SOLQ. PENALIZATION: 1', 'lynx-py. ndcg:0. relevance: [0, 0, 0, 0, 0, 0, 0]', 'pySRURGS. ndcg:0.9401043262568904. relevance: [3, 2, 0, 1, 2, 2, 2, 2, 2, 2]', 'CU-Net. ndcg:0. relevance: [0, 0]', 'graph-neural-pde. ndcg:0.9428279810283838. relevance: [2, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1]', 'Calliar. ndcg:0.6309297535714575. relevance: [0, 1]', 'rtdl. ndcg:0. relevance: [0, 0, 0, 0]', 'tada-gam. ndcg:0.8868854556705132. relevance: [2, 0, 0, 0, 0, 0, 1, 0]', 'deeplab2. ndcg:0.6309297535714575. relevance: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]', 'DDOS_Python. ndcg:0.8494348345648929. relevance: [1, 0, 1, 0, 1, 0, 0, 0, 0, 1]', 'AliceMind. ndcg:0.4179775569776327. relevance: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]', 'wikidata-property-finder. ndcg:0.9725044904464192. relevance: [3, 1, 2]']"
1111
19/09/2021 19:53:02,44,0.8698089580019919,"['BoostingMonocularDepth. PENALIZATION: 3', 'gtfs-bench. ndcg:0. relevance: [0]', 'tada-web. PENALIZATION: 1', 'SOLQ. PENALIZATION: 1', 'lynx-py. ndcg:0. relevance: [0, 0, 0, 0, 0, 0, 0]', 'pySRURGS. ndcg:0.9401043262568904. relevance: [3, 2, 0, 1, 2, 2, 2, 2, 2, 2]', 'CU-Net. ndcg:0. relevance: [0, 0]', 'graph-neural-pde. ndcg:0.9428279810283838. relevance: [2, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1]', 'Calliar. ndcg:0.6309297535714575. relevance: [0, 1]', 'rtdl. ndcg:0. relevance: [0, 0, 0, 0]', 'tada-gam. ndcg:0.8868854556705132. relevance: [2, 0, 0, 0, 0, 0, 1, 0]', 'deeplab2. ndcg:0.6309297535714575. relevance: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]', 'DDOS_Python. ndcg:0.8494348345648929. relevance: [1, 0, 1, 0, 1, 0, 0, 0, 0, 1]', 'AliceMind. ndcg:0.4179775569776327. relevance: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]', 'wikidata-property-finder. ndcg:0.9725044904464192. relevance: [3, 1, 2]']"
12+
26/01/2022 14:42:28,44,0.8698089580019919,"['BoostingMonocularDepth. PENALIZATION: 3', 'gtfs-bench. ndcg:0. relevance: [0]', 'tada-web. PENALIZATION: 1', 'SOLQ. PENALIZATION: 1', 'lynx-py. ndcg:0. relevance: [0, 0, 0, 0, 0, 0, 0]', 'pySRURGS. ndcg:0.9401043262568904. relevance: [3, 2, 0, 1, 2, 2, 2, 2, 2, 2]', 'CU-Net. ndcg:0. relevance: [0, 0]', 'graph-neural-pde. ndcg:0.9428279810283838. relevance: [2, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1]', 'Calliar. ndcg:0.6309297535714575. relevance: [0, 1]', 'rtdl. ndcg:0. relevance: [0, 0, 0, 0]', 'tada-gam. ndcg:0.8868854556705132. relevance: [2, 0, 0, 0, 0, 0, 1, 0]', 'deeplab2. ndcg:0.6309297535714575. relevance: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]', 'DDOS_Python. ndcg:0.8494348345648929. relevance: [1, 0, 1, 0, 1, 0, 0, 0, 0, 1]', 'AliceMind. ndcg:0.4179775569776327. relevance: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]', 'wikidata-property-finder. ndcg:0.9725044904464192. relevance: [3, 1, 2]']"

evaluation/software_type/evaluation_summary_scripts_precision_recall.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ date,#repositories,precision_avg,recall_avg,errors
88
08/09/2021 23:57,44,0.637374964,0.843398268,"['gtfs-benchP:0.0. R:0.0', 'SOLQP:0.42857142857142855. R:1.0', 'Self-Correction-Human-ParsingP:0.09090909090909091. R:1.0', 'dptkP:1.0. R:0.2', 'graph-neural-pdeP:0.4375. R:1.0', 'DDOS_PythonP:0.4. R:0.8', 'giraffeP:0.0. R:0.0', 'CU-NetP:0.0. R:0.0', 'decision-transformerP:1.0. R:0.3333333333333333', 'wikidata-property-finderP:0.75. R:1.0', 'tada-gamP:0.25. R:1.0', 'BoostingMonocularDepthP:0.5714285714285714. R:1.0', 'lynx-pyP:0.0. R:0.0', 'nominatim-feedback-reporterP:0.5. R:1.0', 'TfaceP:0.23076923076923078. R:1.0', 'CalliarP:0.5. R:1.0', 'rtdlP:0.0. R:0.0', 'pySRURGSP:0.9. R:0.9', 'morph-csvP:0.6666666666666666. R:1.0', 'ssspotterP:0.5. R:1.0', 'ConSERTP:0.15789473684210525. R:1.0', 'code2vecP:1.0. R:0.4', 'xcitP:0.3333333333333333. R:1.0', 'AliceMindP:0.0625. R:0.8333333333333334', 'deeplab2P:0.014925373134328358. R:0.5', 'face_classificationP:0.25. R:0.14285714285714285']""16/09/2021 23:18:46,44,0.63738540167966,0.8433982683982684,""['face_classificationP:0.25. R:0.14285714285714285', 'BoostingMonocularDepthP:0.5714285714285714. R:1.0', 'gtfs-benchP:0.0. R:0.0', 'TfaceP:0.23076923076923078. R:1.0', 'giraffeP:0.0. R:0.0', 'SOLQP:0.42857142857142855. R:1.0', 'lynx-pyP:0.0. R:0.0', 'ConSERTP:0.15789473684210525. R:1.0', 'decision-transformerP:1.0. R:0.3333333333333333', 'pySRURGSP:0.9. R:0.9', 'CU-NetP:0.0. R:0.0', 'graph-neural-pdeP:0.4375. R:1.0', 'CalliarP:0.5. R:1.0', 'code2vecP:1.0. R:0.4', 'ssspotterP:0.5. R:1.0', 'rtdlP:0.0. R:0.0', 'tada-gamP:0.25. R:1.0', 'deeplab2P:0.015384615384615385. R:0.5', 'nominatim-feedback-reporterP:0.5. R:1.0', 'Self-Correction-Human-ParsingP:0.09090909090909091. R:1.0', 'DDOS_PythonP:0.4. R:0.8', 'AliceMindP:0.0625. R:0.8333333333333334', 'morph-csvP:0.6666666666666666. R:1.0', 'xcitP:0.3333333333333333. R:1.0', 'dptkP:1.0. R:0.2', 'wikidata-property-finderP:0.75. R:1.0']"
99
16/09/2021 23:32:14,44,0.63738540167966,0.843398268398268,"['face_classificationP:0.25. R:0.14285714285714285', 'BoostingMonocularDepthP:0.5714285714285714. R:1.0', 'gtfs-benchP:0.0. R:0.0', 'TfaceP:0.23076923076923078. R:1.0', 'giraffeP:0.0. R:0.0', 'SOLQP:0.42857142857142855. R:1.0', 'lynx-pyP:0.0. R:0.0', 'ConSERTP:0.15789473684210525. R:1.0', 'decision-transformerP:1.0. R:0.3333333333333333', 'pySRURGSP:0.9. R:0.9', 'CU-NetP:0.0. R:0.0', 'graph-neural-pdeP:0.4375. R:1.0', 'CalliarP:0.5. R:1.0', 'code2vecP:1.0. R:0.4', 'ssspotterP:0.5. R:1.0', 'rtdlP:0.0. R:0.0', 'tada-gamP:0.25. R:1.0', 'deeplab2P:0.015384615384615385. R:0.5', 'nominatim-feedback-reporterP:0.5. R:1.0', 'Self-Correction-Human-ParsingP:0.09090909090909091. R:1.0', 'DDOS_PythonP:0.4. R:0.8', 'AliceMindP:0.0625. R:0.8333333333333334', 'morph-csvP:0.6666666666666666. R:1.0', 'xcitP:0.3333333333333333. R:1.0', 'dptkP:1.0. R:0.2', 'wikidata-property-finderP:0.75. R:1.0']"
1010
19/09/2021 19:53:02,44,0.6382635128716634,0.7522727272727273,"['face_classificationP:0.25. R:0.14285714285714285', 'BoostingMonocularDepthP:0.5714285714285714. R:1.0', 'gtfs-benchP:0.0. R:0.0', 'tada-webP:0.6666666666666666. R:1', 'TfaceP:0.18181818181818182. R:0.6666666666666666', 'giraffeP:0.4. R:0.5', 'SOLQP:0.2. R:0.3333333333333333', 'lynx-pyP:0.0. R:0.0', 'ConSERTP:0.10344827586206896. R:0.5', 'decision-transformerP:1.0. R:0.3333333333333333', 'pySRURGSP:0.9. R:0.9', 'CU-NetP:0.0. R:0.0', 'graph-neural-pdeP:0.42857142857142855. R:0.8571428571428571', 'CalliarP:0.5. R:1.0', 'code2vecP:1.0. R:0.2', 'ssspotterP:0.5. R:1.0', 'rtdlP:0.0. R:0.0', 'tada-gamP:0.25. R:1.0', 'Morph-OMEP:0.3333333333333333. R:1.0', 'deeplab2P:0.1. R:0.5', 'Self-Correction-Human-ParsingP:0.045454545454545456. R:0.3333333333333333', 'DDOS_PythonP:0.4. R:0.8', 'AliceMindP:0.08620689655172414. R:0.8333333333333334', 'morph-csvP:0.6666666666666666. R:1.0', 'xcitP:0.5. R:1.0', 'dptkP:1.0. R:0.2']"
11+
26/01/2022 14:42:28,44,0.6382635128716634,0.7522727272727273,"['face_classificationP:0.25. R:0.14285714285714285', 'BoostingMonocularDepthP:0.5714285714285714. R:1.0', 'gtfs-benchP:0.0. R:0.0', 'tada-webP:0.6666666666666666. R:1', 'TfaceP:0.18181818181818182. R:0.6666666666666666', 'giraffeP:0.4. R:0.5', 'SOLQP:0.2. R:0.3333333333333333', 'lynx-pyP:0.0. R:0.0', 'ConSERTP:0.10344827586206896. R:0.5', 'decision-transformerP:1.0. R:0.3333333333333333', 'pySRURGSP:0.9. R:0.9', 'CU-NetP:0.0. R:0.0', 'graph-neural-pdeP:0.42857142857142855. R:0.8571428571428571', 'CalliarP:0.5. R:1.0', 'code2vecP:1.0. R:0.2', 'ssspotterP:0.5. R:1.0', 'rtdlP:0.0. R:0.0', 'tada-gamP:0.25. R:1.0', 'Morph-OMEP:0.3333333333333333. R:1.0', 'deeplab2P:0.1. R:0.5', 'Self-Correction-Human-ParsingP:0.045454545454545456. R:0.3333333333333333', 'DDOS_PythonP:0.4. R:0.8', 'AliceMindP:0.08620689655172414. R:0.8333333333333334', 'morph-csvP:0.6666666666666666. R:1.0', 'xcitP:0.5. R:1.0', 'dptkP:1.0. R:0.2']"

inspect4py/evaluation/run_software_invocation_evaluation_deprecated.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def extract_types_from_response(response_data):
6464

6565
for dir_name in os.listdir(repo_path):
6666
print("######## Processing: " + dir_name) # repo_path
67-
cmd = 'code_inspector -i ' + repo_path + dir_name + " -o ../../output_dir/ -si"
67+
cmd = 'inspect4py -i ' + repo_path + dir_name + " -o ../../output_dir/ -si"
6868
proc = subprocess.Popen(cmd.encode('utf-8'), shell=True, stdin=subprocess.PIPE,
6969
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
7070
stdout, stderr = proc.communicate()

inspect4py/evaluation/run_software_type_evaluation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def main():
9090
total_ndcg_scripts = 0
9191
for dir_name in os.listdir(repo_path):
9292
print("######## Processing: " + dir_name + " Repo no. " + str(num_repos)) # repo_path
93-
cmd = 'code_inspector -i ' + repo_path + dir_name + " -o ../../output_dir/ -si"
93+
cmd = 'inspect4py -i ' + repo_path + dir_name + " -o ../../output_dir/ -si"
9494
proc = subprocess.Popen(cmd.encode('utf-8'), shell=True, stdin=subprocess.PIPE,
9595
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
9696
stdout, stderr = proc.communicate()

test/test_inspect4py.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -297,17 +297,18 @@ def test_script(self):
297297
# m.document(destination=output_html_file)
298298
# assert "balance between £1,000 and £1,000,000 GBP" in open(output_html_file).read()
299299

300-
def crop_transform68(rimg, landmark, image_size, src):
301-
302-
assert landmark.shape[0] == 68 or landmark.shape[0] == 5
303-
assert landmark.shape[1] == 2
304-
tform = trans.SimilarityTransform()
300+
# def crop_transform68(rimg, landmark, image_size, src):
301+
#
302+
# assert landmark.shape[0] == 68 or landmark.shape[0] == 5
303+
# assert landmark.shape[1] == 2
304+
# tform = trans.SimilarityTransform()
305+
#
306+
# tform.estimate(landmark, src)
307+
# M = tform.params[0:2, :]
308+
# img = cv2.warpAffine(
309+
# rimg, M, (image_size[1], image_size[0]), borderValue=0.0)
310+
# return img
305311

306-
tform.estimate(landmark, src)
307-
M = tform.params[0:2, :]
308-
img = cv2.warpAffine(
309-
rimg, M, (image_size[1], image_size[0]), borderValue=0.0)
310-
return img
311312

312313
def invoke_inspector(input_path, fig, output_dir, ignore_dir_pattern, ignore_file_pattern, requirements,
313314
call_list, control_flow, directory_tree, software_invocation):

0 commit comments

Comments
 (0)