Fix evaluation name invocation

dgarijo · dgarijo · commit 0ab9db580bfe · 2022-01-26T14:49:45.000+01:00
diff --git a/evaluation/software_type/README.md b/evaluation/software_type/README.md
@@ -38,14 +38,8 @@ Annotated benchmark, curated by hand.
 - main_file_paths_3: files that are less relevant for running the tool.
 - comments: Comments/discussion that occurred during annotation.
 
-### evaluation_summary.csv:
-- date: Date when the experiment was carried out.
-- #repositories: Number of valid repositories analyzed.
-- precision_avg: Average precision.
-- recall_avg: Average recall.
-- errors: Repositories where errors occurred (for debugging)
-
-### evaluation_summary_scripts_precision_recall.csv:
+### evaluation_summary.csv
+File with precision/recall metrics for each category type:
 - date: Date when the experiment was carried out
 - #repositories: Number of valid repositories analyzed.
 - precision_package: Average precision for the package category
@@ -61,7 +55,17 @@ Annotated benchmark, curated by hand.
 - errors: Repositories where errors occurred (for debugging)
 
 ### evaluation_summary_scripts_ndcg.csv
+File with the estimation of the ranking results, using normalized discounted cumultative gain
 - date: Date when the experiment was carried out
 - #repositories: Number of valid repositories analyzed.
 - ndcg_avg: Normalized discounted cumulaltive gain (ranking evaluation)
 - errors: Repositories where errors occurred (for debugging)
+
+### evaluation_summary_scripts_precision_recall.csv:
+File with the precision and recall of all the scripts considered "relevant" in the annotation corpus. **Note: this is ongoing work**.
+- date: Date when the experiment was carried out.
+- #repositories: Number of valid repositories analyzed.
+- precision_avg: Average precision.
+- recall_avg: Average recall.
+- errors: Repositories where errors occurred (for debugging)
+
diff --git a/evaluation/software_type/evaluation_summary.csv b/evaluation/software_type/evaluation_summary.csv
@@ -33,3 +33,4 @@ date,#repositories,precision_package,recall_package,precision_library,recall_lib
 19/09/2021 18:38:49,95,1.0,0.9166666666666666,0.9310344827586207,1.0,1.0,1.0,0.9090909090909091,0.967741935483871,0.9600313479623824,0.9326406120760959,"['autojump', 'rtdl', 'kgtk-browser', 'DDOS_Python', 'HXL-Data-Science-file-formats']"
 19/09/2021 19:02:42,95,1.0,0.9166666666666666,0.9310344827586207,1.0,1.0,1.0,0.967741935483871,0.967741935483871,0.9746941045606229,0.9711021505376344,"['autojump', 'rtdl', 'HXL-Data-Science-file-formats']"
 19/09/2021 19:53:02,95,1.0,0.9166666666666666,0.9310344827586207,1.0,1.0,1.0,0.967741935483871,0.967741935483871,0.9746941045606229,0.9711021505376344,"['autojump', 'rtdl', 'HXL-Data-Science-file-formats']"
+26/01/2022 14:42:28,95,1.0,0.9166666666666666,0.9310344827586207,1.0,1.0,1.0,0.967741935483871,0.967741935483871,0.9746941045606229,0.9711021505376344,"['autojump', 'rtdl', 'HXL-Data-Science-file-formats']"
diff --git a/evaluation/software_type/evaluation_summary_scripts_ndcg.csv b/evaluation/software_type/evaluation_summary_scripts_ndcg.csv
@@ -9,3 +9,4 @@ date,#repositories,ndcg_avg,errors
 19/09/2021 18:38:49,44,0.8654678313783589,"['BoostingMonocularDepth. PENALIZATION: 3', 'gtfs-bench. ndcg:0. relevance: [0]', 'tada-web. PENALIZATION: 1', 'SOLQ. PENALIZATION: 1', 'lynx-py. ndcg:0. relevance: [0, 0, 0, 0, 0, 0, 0]', 'pySRURGS. ndcg:0.9401043262568904. relevance: [3, 2, 0, 1, 2, 2, 2, 2, 2, 2]', 'CU-Net. ndcg:0. relevance: [0, 0]', 'graph-neural-pde. ndcg:0.9428279810283838. relevance: [2, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1]', 'Calliar. ndcg:0.6309297535714575. relevance: [0, 1]', 'rtdl. ndcg:0. relevance: [0, 0, 0, 0]', 'tada-gam. ndcg:0.8868854556705132. relevance: [2, 0, 0, 0, 0, 0, 1, 0]', 'Morph-OME. PENALIZATION: 1', 'deeplab2. ndcg:0.6309297535714575. relevance: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]', 'kgtk-browser. ndcg:0.6309297535714575. relevance: [0, 1]', 'AliceMind. ndcg:0.4179775569776327. relevance: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]']"
 19/09/2021 19:02:42,44,0.8698089580019919,"['BoostingMonocularDepth. PENALIZATION: 3', 'gtfs-bench. ndcg:0. relevance: [0]', 'tada-web. PENALIZATION: 1', 'SOLQ. PENALIZATION: 1', 'lynx-py. ndcg:0. relevance: [0, 0, 0, 0, 0, 0, 0]', 'pySRURGS. ndcg:0.9401043262568904. relevance: [3, 2, 0, 1, 2, 2, 2, 2, 2, 2]', 'CU-Net. ndcg:0. relevance: [0, 0]', 'graph-neural-pde. ndcg:0.9428279810283838. relevance: [2, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1]', 'Calliar. ndcg:0.6309297535714575. relevance: [0, 1]', 'rtdl. ndcg:0. relevance: [0, 0, 0, 0]', 'tada-gam. ndcg:0.8868854556705132. relevance: [2, 0, 0, 0, 0, 0, 1, 0]', 'deeplab2. ndcg:0.6309297535714575. relevance: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]', 'DDOS_Python. ndcg:0.8494348345648929. relevance: [1, 0, 1, 0, 1, 0, 0, 0, 0, 1]', 'AliceMind. ndcg:0.4179775569776327. relevance: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]', 'wikidata-property-finder. ndcg:0.9725044904464192. relevance: [3, 1, 2]']"
 19/09/2021 19:53:02,44,0.8698089580019919,"['BoostingMonocularDepth. PENALIZATION: 3', 'gtfs-bench. ndcg:0. relevance: [0]', 'tada-web. PENALIZATION: 1', 'SOLQ. PENALIZATION: 1', 'lynx-py. ndcg:0. relevance: [0, 0, 0, 0, 0, 0, 0]', 'pySRURGS. ndcg:0.9401043262568904. relevance: [3, 2, 0, 1, 2, 2, 2, 2, 2, 2]', 'CU-Net. ndcg:0. relevance: [0, 0]', 'graph-neural-pde. ndcg:0.9428279810283838. relevance: [2, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1]', 'Calliar. ndcg:0.6309297535714575. relevance: [0, 1]', 'rtdl. ndcg:0. relevance: [0, 0, 0, 0]', 'tada-gam. ndcg:0.8868854556705132. relevance: [2, 0, 0, 0, 0, 0, 1, 0]', 'deeplab2. ndcg:0.6309297535714575. relevance: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]', 'DDOS_Python. ndcg:0.8494348345648929. relevance: [1, 0, 1, 0, 1, 0, 0, 0, 0, 1]', 'AliceMind. ndcg:0.4179775569776327. relevance: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]', 'wikidata-property-finder. ndcg:0.9725044904464192. relevance: [3, 1, 2]']"
+26/01/2022 14:42:28,44,0.8698089580019919,"['BoostingMonocularDepth. PENALIZATION: 3', 'gtfs-bench. ndcg:0. relevance: [0]', 'tada-web. PENALIZATION: 1', 'SOLQ. PENALIZATION: 1', 'lynx-py. ndcg:0. relevance: [0, 0, 0, 0, 0, 0, 0]', 'pySRURGS. ndcg:0.9401043262568904. relevance: [3, 2, 0, 1, 2, 2, 2, 2, 2, 2]', 'CU-Net. ndcg:0. relevance: [0, 0]', 'graph-neural-pde. ndcg:0.9428279810283838. relevance: [2, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1]', 'Calliar. ndcg:0.6309297535714575. relevance: [0, 1]', 'rtdl. ndcg:0. relevance: [0, 0, 0, 0]', 'tada-gam. ndcg:0.8868854556705132. relevance: [2, 0, 0, 0, 0, 0, 1, 0]', 'deeplab2. ndcg:0.6309297535714575. relevance: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]', 'DDOS_Python. ndcg:0.8494348345648929. relevance: [1, 0, 1, 0, 1, 0, 0, 0, 0, 1]', 'AliceMind. ndcg:0.4179775569776327. relevance: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]', 'wikidata-property-finder. ndcg:0.9725044904464192. relevance: [3, 1, 2]']"
diff --git a/evaluation/software_type/evaluation_summary_scripts_precision_recall.csv b/evaluation/software_type/evaluation_summary_scripts_precision_recall.csv
@@ -8,3 +8,4 @@ date,#repositories,precision_avg,recall_avg,errors
 08/09/2021 23:57,44,0.637374964,0.843398268,"['gtfs-benchP:0.0. R:0.0', 'SOLQP:0.42857142857142855. R:1.0', 'Self-Correction-Human-ParsingP:0.09090909090909091. R:1.0', 'dptkP:1.0. R:0.2', 'graph-neural-pdeP:0.4375. R:1.0', 'DDOS_PythonP:0.4. R:0.8', 'giraffeP:0.0. R:0.0', 'CU-NetP:0.0. R:0.0', 'decision-transformerP:1.0. R:0.3333333333333333', 'wikidata-property-finderP:0.75. R:1.0', 'tada-gamP:0.25. R:1.0', 'BoostingMonocularDepthP:0.5714285714285714. R:1.0', 'lynx-pyP:0.0. R:0.0', 'nominatim-feedback-reporterP:0.5. R:1.0', 'TfaceP:0.23076923076923078. R:1.0', 'CalliarP:0.5. R:1.0', 'rtdlP:0.0. R:0.0', 'pySRURGSP:0.9. R:0.9', 'morph-csvP:0.6666666666666666. R:1.0', 'ssspotterP:0.5. R:1.0', 'ConSERTP:0.15789473684210525. R:1.0', 'code2vecP:1.0. R:0.4', 'xcitP:0.3333333333333333. R:1.0', 'AliceMindP:0.0625. R:0.8333333333333334', 'deeplab2P:0.014925373134328358. R:0.5', 'face_classificationP:0.25. R:0.14285714285714285']""16/09/2021 23:18:46,44,0.63738540167966,0.8433982683982684,""['face_classificationP:0.25. R:0.14285714285714285', 'BoostingMonocularDepthP:0.5714285714285714. R:1.0', 'gtfs-benchP:0.0. R:0.0', 'TfaceP:0.23076923076923078. R:1.0', 'giraffeP:0.0. R:0.0', 'SOLQP:0.42857142857142855. R:1.0', 'lynx-pyP:0.0. R:0.0', 'ConSERTP:0.15789473684210525. R:1.0', 'decision-transformerP:1.0. R:0.3333333333333333', 'pySRURGSP:0.9. R:0.9', 'CU-NetP:0.0. R:0.0', 'graph-neural-pdeP:0.4375. R:1.0', 'CalliarP:0.5. R:1.0', 'code2vecP:1.0. R:0.4', 'ssspotterP:0.5. R:1.0', 'rtdlP:0.0. R:0.0', 'tada-gamP:0.25. R:1.0', 'deeplab2P:0.015384615384615385. R:0.5', 'nominatim-feedback-reporterP:0.5. R:1.0', 'Self-Correction-Human-ParsingP:0.09090909090909091. R:1.0', 'DDOS_PythonP:0.4. R:0.8', 'AliceMindP:0.0625. R:0.8333333333333334', 'morph-csvP:0.6666666666666666. R:1.0', 'xcitP:0.3333333333333333. R:1.0', 'dptkP:1.0. R:0.2', 'wikidata-property-finderP:0.75. R:1.0']"
 16/09/2021 23:32:14,44,0.63738540167966,0.843398268398268,"['face_classificationP:0.25. R:0.14285714285714285', 'BoostingMonocularDepthP:0.5714285714285714. R:1.0', 'gtfs-benchP:0.0. R:0.0', 'TfaceP:0.23076923076923078. R:1.0', 'giraffeP:0.0. R:0.0', 'SOLQP:0.42857142857142855. R:1.0', 'lynx-pyP:0.0. R:0.0', 'ConSERTP:0.15789473684210525. R:1.0', 'decision-transformerP:1.0. R:0.3333333333333333', 'pySRURGSP:0.9. R:0.9', 'CU-NetP:0.0. R:0.0', 'graph-neural-pdeP:0.4375. R:1.0', 'CalliarP:0.5. R:1.0', 'code2vecP:1.0. R:0.4', 'ssspotterP:0.5. R:1.0', 'rtdlP:0.0. R:0.0', 'tada-gamP:0.25. R:1.0', 'deeplab2P:0.015384615384615385. R:0.5', 'nominatim-feedback-reporterP:0.5. R:1.0', 'Self-Correction-Human-ParsingP:0.09090909090909091. R:1.0', 'DDOS_PythonP:0.4. R:0.8', 'AliceMindP:0.0625. R:0.8333333333333334', 'morph-csvP:0.6666666666666666. R:1.0', 'xcitP:0.3333333333333333. R:1.0', 'dptkP:1.0. R:0.2', 'wikidata-property-finderP:0.75. R:1.0']"
 19/09/2021 19:53:02,44,0.6382635128716634,0.7522727272727273,"['face_classificationP:0.25. R:0.14285714285714285', 'BoostingMonocularDepthP:0.5714285714285714. R:1.0', 'gtfs-benchP:0.0. R:0.0', 'tada-webP:0.6666666666666666. R:1', 'TfaceP:0.18181818181818182. R:0.6666666666666666', 'giraffeP:0.4. R:0.5', 'SOLQP:0.2. R:0.3333333333333333', 'lynx-pyP:0.0. R:0.0', 'ConSERTP:0.10344827586206896. R:0.5', 'decision-transformerP:1.0. R:0.3333333333333333', 'pySRURGSP:0.9. R:0.9', 'CU-NetP:0.0. R:0.0', 'graph-neural-pdeP:0.42857142857142855. R:0.8571428571428571', 'CalliarP:0.5. R:1.0', 'code2vecP:1.0. R:0.2', 'ssspotterP:0.5. R:1.0', 'rtdlP:0.0. R:0.0', 'tada-gamP:0.25. R:1.0', 'Morph-OMEP:0.3333333333333333. R:1.0', 'deeplab2P:0.1. R:0.5', 'Self-Correction-Human-ParsingP:0.045454545454545456. R:0.3333333333333333', 'DDOS_PythonP:0.4. R:0.8', 'AliceMindP:0.08620689655172414. R:0.8333333333333334', 'morph-csvP:0.6666666666666666. R:1.0', 'xcitP:0.5. R:1.0', 'dptkP:1.0. R:0.2']"
+26/01/2022 14:42:28,44,0.6382635128716634,0.7522727272727273,"['face_classificationP:0.25. R:0.14285714285714285', 'BoostingMonocularDepthP:0.5714285714285714. R:1.0', 'gtfs-benchP:0.0. R:0.0', 'tada-webP:0.6666666666666666. R:1', 'TfaceP:0.18181818181818182. R:0.6666666666666666', 'giraffeP:0.4. R:0.5', 'SOLQP:0.2. R:0.3333333333333333', 'lynx-pyP:0.0. R:0.0', 'ConSERTP:0.10344827586206896. R:0.5', 'decision-transformerP:1.0. R:0.3333333333333333', 'pySRURGSP:0.9. R:0.9', 'CU-NetP:0.0. R:0.0', 'graph-neural-pdeP:0.42857142857142855. R:0.8571428571428571', 'CalliarP:0.5. R:1.0', 'code2vecP:1.0. R:0.2', 'ssspotterP:0.5. R:1.0', 'rtdlP:0.0. R:0.0', 'tada-gamP:0.25. R:1.0', 'Morph-OMEP:0.3333333333333333. R:1.0', 'deeplab2P:0.1. R:0.5', 'Self-Correction-Human-ParsingP:0.045454545454545456. R:0.3333333333333333', 'DDOS_PythonP:0.4. R:0.8', 'AliceMindP:0.08620689655172414. R:0.8333333333333334', 'morph-csvP:0.6666666666666666. R:1.0', 'xcitP:0.5. R:1.0', 'dptkP:1.0. R:0.2']"
diff --git a/inspect4py/evaluation/run_software_invocation_evaluation_deprecated.py b/inspect4py/evaluation/run_software_invocation_evaluation_deprecated.py
@@ -64,7 +64,7 @@ def extract_types_from_response(response_data):
 
 for dir_name in os.listdir(repo_path):
     print("######## Processing: " + dir_name) # repo_path
-    cmd = 'code_inspector -i ' + repo_path + dir_name + " -o ../../output_dir/ -si"
+    cmd = 'inspect4py -i ' + repo_path + dir_name + " -o ../../output_dir/ -si"
     proc = subprocess.Popen(cmd.encode('utf-8'), shell=True, stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     stdout, stderr = proc.communicate()
diff --git a/inspect4py/evaluation/run_software_type_evaluation.py b/inspect4py/evaluation/run_software_type_evaluation.py
@@ -90,7 +90,7 @@ def main():
     total_ndcg_scripts = 0
     for dir_name in os.listdir(repo_path):
         print("######## Processing: " + dir_name + " Repo no. " + str(num_repos))  # repo_path
-        cmd = 'code_inspector -i ' + repo_path + dir_name + " -o ../../output_dir/ -si"
+        cmd = 'inspect4py -i ' + repo_path + dir_name + " -o ../../output_dir/ -si"
         proc = subprocess.Popen(cmd.encode('utf-8'), shell=True, stdin=subprocess.PIPE,
                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         stdout, stderr = proc.communicate()
diff --git a/test/test_inspect4py.py b/test/test_inspect4py.py
@@ -297,17 +297,18 @@ def test_script(self):
 #         m.document(destination=output_html_file)
 #         assert "balance between £1,000 and £1,000,000 GBP" in open(output_html_file).read()
 
-def crop_transform68(rimg, landmark, image_size, src):
-    
-    assert landmark.shape[0] == 68 or landmark.shape[0] == 5
-    assert landmark.shape[1] == 2
-    tform = trans.SimilarityTransform()
+# def crop_transform68(rimg, landmark, image_size, src):
+#
+#     assert landmark.shape[0] == 68 or landmark.shape[0] == 5
+#     assert landmark.shape[1] == 2
+#     tform = trans.SimilarityTransform()
+#
+#     tform.estimate(landmark, src)
+#     M = tform.params[0:2, :]
+#     img = cv2.warpAffine(
+#         rimg, M, (image_size[1], image_size[0]), borderValue=0.0)
+#     return img
 
-    tform.estimate(landmark, src)
-    M = tform.params[0:2, :]
-    img = cv2.warpAffine(
-        rimg, M, (image_size[1], image_size[0]), borderValue=0.0)
-    return img
 
 def invoke_inspector(input_path, fig, output_dir, ignore_dir_pattern, ignore_file_pattern, requirements,
                      call_list, control_flow, directory_tree, software_invocation):