Skip to content

Commit c71ed33

Browse files
authored
Merge branch 'incremental-analysis' into revert-main
2 parents 4125358 + 50df25f commit c71ed33

4 files changed

Lines changed: 80 additions & 40 deletions

File tree

cldk/analysis/java/codeanalyzer/codeanalyzer.py

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,15 @@ class JCodeanalyzer:
5151
"""
5252

5353
def __init__(
54-
self,
55-
project_dir: Union[str, Path],
56-
source_code: str | None,
57-
analysis_backend_path: Union[str, Path, None],
58-
analysis_json_path: Union[str, Path, None],
59-
analysis_level: str,
60-
use_graalvm_binary: bool,
61-
eager_analysis: bool,
54+
self,
55+
project_dir: Union[str, Path],
56+
source_code: str | None,
57+
analysis_backend_path: Union[str, Path, None],
58+
analysis_json_path: Union[str, Path, None],
59+
analysis_level: str,
60+
use_graalvm_binary: bool,
61+
eager_analysis: bool,
62+
target_files: List[str] | None
6263
) -> None:
6364
self.project_dir = project_dir
6465
self.source_code = source_code
@@ -67,6 +68,7 @@ def __init__(
6768
self.use_graalvm_binary = use_graalvm_binary
6869
self.eager_analysis = eager_analysis
6970
self.analysis_level = analysis_level
71+
self.target_files = target_files
7072
self.application = self._init_codeanalyzer(
7173
analysis_level=1 if analysis_level == AnalysisLevel.symbol_table else 2)
7274
# Attributes related the Java code analysis...
@@ -201,8 +203,17 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
201203

202204
if self.analysis_json_path is None:
203205
logger.info("Reading analysis from the pipe.")
204-
codeanalyzer_args = codeanalyzer_exec + shlex.split(
205-
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}")
206+
codeanalyzer_args = ''
207+
# If target file is provided, the input is merged into a single string and passed to codeanalyzer
208+
if self.target_files:
209+
target_file_options = '-t '.join([s.strip() for s in self.target_files])
210+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
211+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -t {target_file_options}"
212+
)
213+
else:
214+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
215+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
216+
)
206217
try:
207218
logger.info(f"Running codeanalyzer: {' '.join(codeanalyzer_args)}")
208219
console_out: CompletedProcess[str] = subprocess.run(
@@ -222,9 +233,18 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
222233
# flag is set, we'll run the analysis every time the object is created. This will happen regradless
223234
# of the existence of the analysis file.
224235
# Create the executable command for codeanalyzer.
225-
codeanalyzer_args = codeanalyzer_exec + shlex.split(
226-
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}")
227-
236+
codeanalyzer_args = ''
237+
# If target file is provided, the input is merged into a single string and passed to codeanalyzer
238+
if self.target_files:
239+
target_file_options = '-t '.join([s.strip() for s in self.target_files])
240+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
241+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
242+
f" -o {self.analysis_json_path} -t {target_file_options}"
243+
)
244+
else:
245+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
246+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}"
247+
)
228248
try:
229249
logger.info(f"Running codeanalyzer subprocess with args {codeanalyzer_args}")
230250
subprocess.run(
@@ -252,7 +272,6 @@ def _codeanalyzer_single_file(self):
252272
JApplication
253273
The application view of the Java code with the analysis results.
254274
"""
255-
# self.source_code: str = re.sub(r"[\r\n\t\f\v]+", lambda x: " " if x.group() in "\t\f\v" else " ", self.source_code)
256275
codeanalyzer_exec = self._get_codeanalyzer_exec()
257276
codeanalyzer_args = ["--source-analysis", self.source_code]
258277
codeanalyzer_cmd = codeanalyzer_exec + codeanalyzer_args

cldk/analysis/java/java.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,16 @@
1515
class JavaAnalysis(SymbolTable, CallGraph):
1616

1717
def __init__(
18-
self,
19-
project_dir: str | Path | None,
20-
source_code: str | None,
21-
analysis_backend: str,
22-
analysis_backend_path: str | None,
23-
analysis_json_path: str | Path | None,
24-
analysis_level: str,
25-
use_graalvm_binary: bool,
26-
eager_analysis: bool,
18+
self,
19+
project_dir: str | Path | None,
20+
source_code: str | None,
21+
analysis_backend: str,
22+
analysis_backend_path: str | None,
23+
analysis_json_path: str | Path | None,
24+
analysis_level: str,
25+
target_files: List[str] | None,
26+
use_graalvm_binary: bool,
27+
eager_analysis: bool,
2728
) -> None:
2829
"""
2930
Parameters
@@ -44,7 +45,9 @@ def __init__(
4445
eager_analysis : bool, optional
4546
A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed
4647
eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.
47-
48+
target_files: str, optional
49+
The target files for which the analysis will run or get modified. Currently, this feature only supported
50+
with symbol table analysis. In the future, we will add this feature to other analysis levels.
4851
Attributes
4952
----------
5053
analysis_backend : JCodeQL | JApplication
@@ -59,7 +62,8 @@ def __init__(
5962
self.analysis_backend_path = analysis_backend_path
6063
self.eager_analysis = eager_analysis
6164
self.use_graalvm_binary = use_graalvm_binary
62-
self.analysis_backend = analysis_backend
65+
self.analysis_backend = analysis_backend
66+
self.target_files = target_files
6367
# Initialize the analysis analysis_backend
6468
if analysis_backend.lower() == "codeql":
6569
self.analysis_backend: JCodeQL = JCodeQL(self.project_dir, self.analysis_json_path)
@@ -72,6 +76,7 @@ def __init__(
7276
analysis_json_path=self.analysis_json_path,
7377
use_graalvm_binary=self.use_graalvm_binary,
7478
analysis_backend_path=self.analysis_backend_path,
79+
target_files=self.target_files
7580
)
7681
else:
7782
raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.")
@@ -442,7 +447,9 @@ def get_implemented_interfaces(self, qualified_class_name) -> List[str]:
442447
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
443448
return self.backend.get_implemented_interfaces(qualified_class_name)
444449

445-
def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str, method_signature: str | None = None) -> (List)[Tuple[JMethodDetail, JMethodDetail]]:
450+
def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str,
451+
method_signature: str | None = None) -> (List)[
452+
Tuple[JMethodDetail, JMethodDetail]]:
446453
"""
447454
A call graph using symbol table for a given class and a given method.
448455
Args:
@@ -482,7 +489,7 @@ def get_class_call_graph(self, qualified_class_name: str, method_signature: str
482489
"""
483490
if using_symbol_table:
484491
return self.__get_class_call_graph_using_symbol_table(qualified_class_name=qualified_class_name,
485-
method_signature=method_signature)
492+
method_signature=method_signature)
486493
if self.analysis_backend in [AnalysisEngine.CODEQL, AnalysisEngine.TREESITTER]:
487494
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
488495
return self.backend.get_class_call_graph(qualified_class_name, method_signature)

cldk/core.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from pathlib import Path
22

3-
43
import logging
4+
from typing import List
55

6+
from cldk.analysis import AnalysisLevel
67
from cldk.analysis.java import JavaAnalysis
78
from cldk.analysis.java.treesitter import JavaSitter
89
from cldk.utils.exceptions import CldkInitializationException
@@ -30,15 +31,16 @@ def __init__(self, language: str):
3031
self.language: str = language
3132

3233
def analysis(
33-
self,
34-
project_path: str | Path | None = None,
35-
source_code: str | None = None,
36-
eager: bool = False,
37-
analysis_backend: str | None = "codeanalyzer",
38-
analysis_level: str = "symbol_table",
39-
analysis_backend_path: str | None = None,
40-
analysis_json_path: str | Path = None,
41-
use_graalvm_binary: bool = False,
34+
self,
35+
project_path: str | Path | None = None,
36+
source_code: str | None = None,
37+
eager: bool = False,
38+
analysis_backend: str | None = "codeanalyzer",
39+
analysis_level: str = AnalysisLevel.symbol_table,
40+
target_files: List[str] | None = None,
41+
analysis_backend_path: str | None = None,
42+
analysis_json_path: str | Path = None,
43+
use_graalvm_binary: bool = False,
4244
) -> JavaAnalysis:
4345
"""
4446
Initialize the preprocessor based on the specified language and analysis_backend.
@@ -65,7 +67,11 @@ def analysis(
6567
eager : bool, optional
6668
A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed
6769
eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.
68-
70+
analysis_level: str, optional
71+
Analysis levels. Refer to AnalysisLevel.
72+
target_files: List[str] | None, optional
73+
The target files (paths) for which the analysis will run or get modified. Currently, this feature only supported
74+
with symbol table analysis. In the future, we will add this feature to other analysis levels.
6975
Returns
7076
-------
7177
JavaAnalysis
@@ -77,13 +83,19 @@ def analysis(
7783
If neither project_path nor source_code is provided.
7884
NotImplementedError
7985
If the specified language is not implemented yet.
86+
87+
Args:
88+
analysis_level:
89+
target_files:
90+
analysis_level:
8091
"""
8192

8293
if project_path is None and source_code is None:
8394
raise CldkInitializationException("Either project_path or source_code must be provided.")
8495

8596
if project_path is not None and source_code is not None:
86-
raise CldkInitializationException("Both project_path and source_code are provided. Please provide " "only one.")
97+
raise CldkInitializationException(
98+
"Both project_path and source_code are provided. Please provide " "only one.")
8799

88100
if self.language == "java":
89101
return JavaAnalysis(
@@ -94,6 +106,7 @@ def analysis(
94106
analysis_backend_path=analysis_backend_path,
95107
analysis_json_path=analysis_json_path,
96108
use_graalvm_binary=use_graalvm_binary,
109+
target_files=target_files,
97110
eager_analysis=eager,
98111
)
99112
else:
@@ -114,7 +127,7 @@ def treesitter_parser(self):
114127
else:
115128
raise NotImplementedError(f"Treesitter parser for {self.language} is not implemented yet.")
116129

117-
def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer| NotImplementedError]:
130+
def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer | NotImplementedError]:
118131
"""
119132
Parse the project using treesitter.
120133

cldk/models/java/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ class JCompilationUnit(BaseModel):
341341
comment: str
342342
imports: List[str]
343343
type_declarations: Dict[str, JType]
344+
is_modified: bool = False
344345

345346

346347
class JMethodDetail(BaseModel):

0 commit comments

Comments
 (0)