Skip to content

Commit c5a89c0

Browse files
Rangeet PanRangeet Pan
authored andcommitted
support for adding incremental analysis
1 parent 681bdda commit c5a89c0

4 files changed

Lines changed: 72 additions & 34 deletions

File tree

cldk/analysis/java/codeanalyzer/codeanalyzer.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def __init__(
6464
analysis_level: str,
6565
use_graalvm_binary: bool,
6666
eager_analysis: bool,
67+
target_files: List[str] | None
6768
) -> None:
6869
self.project_dir = project_dir
6970
self.source_code = source_code
@@ -72,6 +73,7 @@ def __init__(
7273
self.use_graalvm_binary = use_graalvm_binary
7374
self.eager_analysis = eager_analysis
7475
self.analysis_level = analysis_level
76+
self.target_files = target_files
7577
self.application = self._init_codeanalyzer(
7678
analysis_level=1 if analysis_level == "symbol_table" else 2
7779
)
@@ -230,9 +232,17 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
230232

231233
if self.analysis_json_path is None:
232234
logger.info("Reading analysis from the pipe.")
233-
codeanalyzer_args = codeanalyzer_exec + shlex.split(
234-
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
235-
)
235+
codeanalyzer_args = ''
236+
# If target file is provided, the input is merged into a single string and passed to codeanalyzer
237+
if self.target_files:
238+
target_file_options = ' '.join([s.strip() for s in self.target_files])
239+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
240+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} --target-files={target_file_options}"
241+
)
242+
else:
243+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
244+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
245+
)
236246
try:
237247
logger.info(f"Running codeanalyzer: {' '.join(codeanalyzer_args)}")
238248
console_out: CompletedProcess[str] = subprocess.run(
@@ -254,10 +264,18 @@ def _init_codeanalyzer(self, analysis_level=1) -> JApplication:
254264
# flag is set, we'll run the analysis every time the object is created. This will happen regradless
255265
# of the existence of the analysis file.
256266
# Create the executable command for codeanalyzer.
257-
codeanalyzer_args = codeanalyzer_exec + shlex.split(
258-
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}"
259-
)
260-
267+
codeanalyzer_args = ''
268+
# If target file is provided, the input is merged into a single string and passed to codeanalyzer
269+
if self.target_files:
270+
target_file_options = ' '.join([s.strip() for s in self.target_files])
271+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
272+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level}"
273+
f" -o {self.analysis_json_path} --target-files={target_file_options}"
274+
)
275+
else:
276+
codeanalyzer_args = codeanalyzer_exec + shlex.split(
277+
f"-i {Path(self.project_dir)} --analysis-level={analysis_level} -o {self.analysis_json_path}"
278+
)
261279
try:
262280
logger.info(
263281
f"Running codeanalyzer subprocess with args {codeanalyzer_args}"
@@ -289,7 +307,6 @@ def _codeanalyzer_single_file(self):
289307
JApplication
290308
The application view of the Java code with the analysis results.
291309
"""
292-
# self.source_code: str = re.sub(r"[\r\n\t\f\v]+", lambda x: " " if x.group() in "\t\f\v" else " ", self.source_code)
293310
codeanalyzer_exec = self._get_codeanalyzer_exec()
294311
codeanalyzer_args = ["--source-analysis", self.source_code]
295312
codeanalyzer_cmd = codeanalyzer_exec + codeanalyzer_args

cldk/analysis/java/java.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,16 @@
1515
class JavaAnalysis(SymbolTable, CallGraph):
1616

1717
def __init__(
18-
self,
19-
project_dir: str | Path | None,
20-
source_code: str | None,
21-
analysis_backend: str,
22-
analysis_backend_path: str | None,
23-
analysis_json_path: str | Path | None,
24-
analysis_level: str,
25-
use_graalvm_binary: bool,
26-
eager_analysis: bool,
18+
self,
19+
project_dir: str | Path | None,
20+
source_code: str | None,
21+
analysis_backend: str,
22+
analysis_backend_path: str | None,
23+
analysis_json_path: str | Path | None,
24+
analysis_level: str,
25+
target_files: List[str] | None,
26+
use_graalvm_binary: bool,
27+
eager_analysis: bool,
2728
) -> None:
2829
"""
2930
Parameters
@@ -44,7 +45,9 @@ def __init__(
4445
eager_analysis : bool, optional
4546
A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed
4647
eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.
47-
48+
target_files: str, optional
49+
The target files for which the analysis will run or get modified. Currently, this feature only supported
50+
with symbol table analysis. In the future, we will add this feature to other analysis levels.
4851
Attributes
4952
----------
5053
analysis_backend : JCodeQL | JApplication
@@ -59,7 +62,8 @@ def __init__(
5962
self.analysis_backend_path = analysis_backend_path
6063
self.eager_analysis = eager_analysis
6164
self.use_graalvm_binary = use_graalvm_binary
62-
self.analysis_backend = analysis_backend
65+
self.analysis_backend = analysis_backend
66+
self.target_files = target_files
6367
# Initialize the analysis analysis_backend
6468
if analysis_backend.lower() == "codeql":
6569
self.analysis_backend: JCodeQL = JCodeQL(self.project_dir, self.analysis_json_path)
@@ -72,6 +76,7 @@ def __init__(
7276
analysis_json_path=self.analysis_json_path,
7377
use_graalvm_binary=self.use_graalvm_binary,
7478
analysis_backend_path=self.analysis_backend_path,
79+
target_files=self.target_files
7580
)
7681
else:
7782
raise NotImplementedError(f"Support for {analysis_backend} has not been implemented yet.")
@@ -438,7 +443,9 @@ def get_implemented_interfaces(self, qualified_class_name) -> List[str]:
438443
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
439444
return self.backend.get_implemented_interfaces(qualified_class_name)
440445

441-
def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str, method_signature: str | None = None) -> (List)[Tuple[JMethodDetail, JMethodDetail]]:
446+
def __get_class_call_graph_using_symbol_table(self, qualified_class_name: str,
447+
method_signature: str | None = None) -> (List)[
448+
Tuple[JMethodDetail, JMethodDetail]]:
442449
"""
443450
A call graph using symbol table for a given class and a given method.
444451
Args:
@@ -478,7 +485,7 @@ def get_class_call_graph(self, qualified_class_name: str, method_signature: str
478485
"""
479486
if using_symbol_table:
480487
return self.__get_class_call_graph_using_symbol_table(qualified_class_name=qualified_class_name,
481-
method_signature=method_signature)
488+
method_signature=method_signature)
482489
if self.analysis_backend in [AnalysisEngine.CODEQL, AnalysisEngine.TREESITTER]:
483490
raise NotImplementedError(f"Support for this functionality has not been implemented yet.")
484491
return self.backend.get_class_call_graph(qualified_class_name, method_signature)

cldk/core.py

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from pathlib import Path
22

3-
43
import logging
4+
from typing import List
55

6+
from cldk.analysis import AnalysisLevel
67
from cldk.analysis.java import JavaAnalysis
78
from cldk.analysis.java.treesitter import JavaSitter
89
from cldk.utils.exceptions import CldkInitializationException
@@ -30,15 +31,16 @@ def __init__(self, language: str):
3031
self.language: str = language
3132

3233
def analysis(
33-
self,
34-
project_path: str | Path | None = None,
35-
source_code: str | None = None,
36-
eager: bool = False,
37-
analysis_backend: str | None = "codeanalyzer",
38-
analysis_level: str = "symbol_table",
39-
analysis_backend_path: str | None = None,
40-
analysis_json_path: str | Path = None,
41-
use_graalvm_binary: bool = False,
34+
self,
35+
project_path: str | Path | None = None,
36+
source_code: str | None = None,
37+
eager: bool = False,
38+
analysis_backend: str | None = "codeanalyzer",
39+
analysis_level: str = AnalysisLevel.symbol_table,
40+
target_files: List[str] | None = None,
41+
analysis_backend_path: str | None = None,
42+
analysis_json_path: str | Path = None,
43+
use_graalvm_binary: bool = False,
4244
) -> JavaAnalysis:
4345
"""
4446
Initialize the preprocessor based on the specified language and analysis_backend.
@@ -65,7 +67,11 @@ def analysis(
6567
eager : bool, optional
6668
A flag indicating whether to perform eager analysis, defaults to False. If True, the analysis is performed
6769
eagerly. That is, the analysis.json file is created during analysis every time even if it already exists.
68-
70+
analysis_level: str, optional
71+
Analysis levels. Refer to AnalysisLevel.
72+
target_files: List[str] | None, optional
73+
The target files (paths) for which the analysis will run or get modified. Currently, this feature only supported
74+
with symbol table analysis. In the future, we will add this feature to other analysis levels.
6975
Returns
7076
-------
7177
JavaAnalysis
@@ -77,13 +83,19 @@ def analysis(
7783
If neither project_path nor source_code is provided.
7884
NotImplementedError
7985
If the specified language is not implemented yet.
86+
87+
Args:
88+
analysis_level:
89+
target_files:
90+
analysis_level:
8091
"""
8192

8293
if project_path is None and source_code is None:
8394
raise CldkInitializationException("Either project_path or source_code must be provided.")
8495

8596
if project_path is not None and source_code is not None:
86-
raise CldkInitializationException("Both project_path and source_code are provided. Please provide " "only one.")
97+
raise CldkInitializationException(
98+
"Both project_path and source_code are provided. Please provide " "only one.")
8799

88100
if self.language == "java":
89101
return JavaAnalysis(
@@ -94,6 +106,7 @@ def analysis(
94106
analysis_backend_path=analysis_backend_path,
95107
analysis_json_path=analysis_json_path,
96108
use_graalvm_binary=use_graalvm_binary,
109+
target_files=target_files,
97110
eager_analysis=eager,
98111
)
99112
else:
@@ -114,7 +127,7 @@ def treesitter_parser(self):
114127
else:
115128
raise NotImplementedError(f"Treesitter parser for {self.language} is not implemented yet.")
116129

117-
def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer| NotImplementedError]:
130+
def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer | NotImplementedError]:
118131
"""
119132
Parse the project using treesitter.
120133

cldk/models/java/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ class JType(BaseModel):
291291
is_record_declaration: bool = False
292292
is_concrete_class: bool = False
293293
is_entry_point: bool = False
294+
is_modified: bool = False
294295
comment: str
295296
extends_list: List[str] = []
296297
implements_list: List[str] = []

0 commit comments

Comments
 (0)