-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcommentanalyzer.py
More file actions
325 lines (258 loc) · 14.4 KB
/
commentanalyzer.py
File metadata and controls
325 lines (258 loc) · 14.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# Author: Mostafa Okasha (okashm@mcmaster.ca)
# Affiliation: CapitalOne - Technical Assessment
#
# Please see the README.md or README.pdf document for more details on the inner-workings
# of this module.
''' Quick Overview
CommentAnalyzer needs to be initiated with a file name. If the filename is invalid,
it will return False and no output. The analysis_output method can output data in
multiple formats. Every file format can be supported! extensions.py has a list of
all supported file formats and anyone can add to that list as long as you follow
the rules for adding comment syntax and style.
Enjoy! ~~
'''
""" ------CommentAnalyzer Expansion------
Class:
@CommentAnalyzer: This class will hold functions to allow code analysis to determine
quantity of LOC, Total Comments, In-line Comments,
Block Comments and TODOs.
@validate_filename: Validates the filename. False if it does not start with '.' or
if file has no extension. Can't run any of the other methods if so.
@get_file_extension: Sets the filename extensions if filename has been validated.
@comment_single: Analyzes comments for languages with single comment only syntax
that have no block comments. See extensions.py for more info.
@comment_block: Analyzes comments for languages with block comment only syntax
that have no single comment syntax. Such as HTML.
@comment_block_single: Analyzes comments for languages with both single comment and
block comment syntax.
@analyze_code: Decides which methods from comment_single,comment_block and
comment_block_single should analyze the code based on extension.
@analysis_output: Outputs the analysis report created by the analyze_code method.
Can output in several formats. See method for more info.
@requirements: extensions.py only. It has important configurations for the
CommentAnalyzer class file extension support and useage.
"""
from extensions import *
class CommentAnalyzer(object):
"""Determine quantitative values for different comment types
Count the LOC (lines of code) and analyze it to determine the lines that are comments,
block comments, in-line comments and TODOs to provide a quantitive analysis of the code.
"""
def __init__(self, filename):
self.filename = filename
self.file_extension = self.set_file_extension() # False if invalid filename
self.__check_in_status = False # True when file analysis completes
# Counter variables
self.counter_dict = {"loc":0, "tot_comments":0, "single_comments": 0,
"block_comment_lines":0, "block_comments":0, "todos":0}
def validate_filename(self)->bool:
"""Determine file name validity
This is a seperate function to allow ease of modification in case
requirements criteria for acceptable filename formats change
"""
if not self.filename.startswith('.') and '.' in self.filename:
return True
else:
return False
def set_file_extension(self):
""" Extract file extension
Supports filenames with multiple '.'s
"""
if self.validate_filename():
return self.filename.split(".")[-1]
else:
return False
def comment_single(self,comment_syntax):
# Local variables incrementation faster than instance incrementation, update when done
counter_dict = {"loc":0, "tot_comments":0, "single_comments": 0,
"block_comment_lines":0, "block_comments":0, "todos":0}
with open(self.filename, 'r', encoding='utf-8-sig') as active_file:
bool_active_block = False # Keep track if currently in an active block
bool_previous_comment = False # To keep track of lines in a block
block_counter_temp = 0
for line in active_file:
counter_dict["loc"] +=1
line = line.strip()
# Check if line starts with comment: block or solo
if line.startswith(comment_syntax[0]):
counter_dict["todos"] += line.count(TODO)
#print("line",block_counter_temp, "--",line)
block_counter_temp += 1
if not bool_previous_comment:
bool_previous_comment = True
else:
bool_active_block = True
elif comment_syntax[0] in line:
if bool_previous_comment and bool_active_block:
counter_dict["block_comments"] += 1
counter_dict["block_comment_lines"] += block_counter_temp
block_counter_temp = 0 # Reset Block counter
bool_active_block = False
bool_previous_comment = False # Exiting a block
# Moving from a single line comment to code with a comment
elif bool_previous_comment:
counter_dict["single_comments"] +=1
bool_previous_comment = False
block_counter_temp = 0
# Assuming strings start with " or ', must determine if comment
# is part of a string or not. e.g: print("//string") //comment
else:
pos = 0
while pos < len(line):
if line[pos] == "'":
pos = line.find("'",pos+1)
elif line[pos] == '"':
pos = line.find('"',pos+1)
elif line[pos] == comment_syntax[0]:
counter_dict["single_comments"] += 1
counter_dict["todos"] += line[(pos+1):].count(TODO)
break
elif pos == -1:
break
pos += 1
else:
# This is jumping to only code or an empty line
# check if we just exited a block
if bool_previous_comment and bool_active_block:
counter_dict["block_comments"] +=1
counter_dict["block_comment_lines"] += block_counter_temp
#print("test",block_counter_temp,"--",line)
block_counter_temp = 0 # Reset Block counter
bool_active_block = False
bool_previous_comment = False
# Moving from a single line comment to code or empty line
elif bool_previous_comment:
counter_dict["single_comments"] +=1
bool_previous_comment = False
block_counter_temp = 0
# In case folder ended with a block of code
if bool_previous_comment and bool_active_block:
counter_dict["block_comments"] +=1
counter_dict["block_comment_lines"] += block_counter_temp
counter_dict["tot_comments"] = \
counter_dict["block_comment_lines"] + counter_dict["single_comments"]
return counter_dict
# TODO: Implement block only syntax like HTML: <!-- ... -->
def comment_block(self,comment_syntax):
# Local variables incrementation faster than instance incrementation, update when done
counter_dict = {"loc":0, "tot_comments":0, "single_comments": 0,
"block_comment_lines":0, "block_comments":0, "todos":0}
# This should be pretty easy, simply a combination of a little of both. I didn't have time
# to implement this method but instead, I implemented the other 2 that are much harder.
return counter_dict
def comment_block_single(self,comment_syntax):
# Local variables incrementation faster than instance incrementation, update when done
counter_dict = {"loc":0, "tot_comments":0, "single_comments": 0,
"block_comment_lines":0, "block_comments":0, "todos":0}
pos_single_len = len(comment_syntax[0])
pos_multi_len_open = len(comment_syntax[1][0])
# pos_multi_len_close = len(comment_syntax[1][1]) might be useful in optimization
with open(self.filename, 'r') as active_file:
bool_active_block = False # Keep track if currently in an active block
block_counter_temp = 0
for line in active_file:
counter_dict["loc"] +=1
line = line.strip()
# Check if line starts with comment: block or solo
if line.startswith(comment_syntax[0]):
counter_dict["todos"] += line.count(TODO)
counter_dict["single_comments"] +=1
elif line.startswith(comment_syntax[1][0]):
block_counter_temp += 1
counter_dict["todos"] += line.count(TODO)
bool_active_block = True
if comment_syntax[1][1] in line:
counter_dict["block_comments"] += 1
counter_dict["block_comment_lines"] += 1
block_counter_temp = 0
bool_active_block = False
elif comment_syntax[0] in line:
if not bool_active_block: # inline single comment not */ // */
# Assuming strings start with " or ', must determine if comment
# is part of a string or not. e.g: print("//string") //comment
pos = 0
while pos < len(line):
if line[pos] == "'":
pos = line.find("'",pos+1)
elif line[pos] == '"':
pos = line.find('"',pos+1)
elif line[pos:pos+pos_single_len] == comment_syntax[0]:
counter_dict["single_comments"] += 1
counter_dict["todos"] += line[(pos+pos_single_len):].count(TODO)
break
elif pos == -1:
break
pos += 1
elif comment_syntax[1][0] in line:
if not bool_active_block: # inline single comment
# Assuming strings start with " or ', must determine if comment
# is part of a string or not. e.g: print("//string") //comment
pos = 0
while pos < len(line):
if line[pos] == "'":
pos = line.find("'",pos+1)
elif line[pos] == '"':
pos = line.find('"',pos+1)
elif line[pos:pos+pos_multi_len_open] == comment_syntax[1][0]:
block_counter_temp += 1
bool_active_block = True
counter_dict["todos"] += \
line[(pos+pos_multi_len_open):].count(TODO)
break
elif pos == -1:
break
pos += 1
if comment_syntax[1][1] in line[(pos+pos_multi_len_open):]:
counter_dict["block_comments"] += 1
counter_dict["block_comment_lines"] += 1
block_counter_temp = 0
bool_active_block = False
elif bool_active_block:
block_counter_temp += 1
if comment_syntax[1][1] in line:
counter_dict["block_comments"] += 1
counter_dict["block_comment_lines"] += block_counter_temp
block_counter_temp = 0
bool_active_block = False
counter_dict["tot_comments"] = \
counter_dict["block_comment_lines"] + counter_dict["single_comments"]
return counter_dict
# Main method for comment analysis
def analyze_code(self):
""" Analyze code depending on syntax:
Options:
True = BLOCK_SINGLE, False = SINGLE_ONLY, BLOCK_ONLY
"""
if self.file_extension != False:
style_name, syntax_condition = EXTENSIONS_STYLE[self.file_extension]
comment_syntax = STYLE_SYNTAX[style_name]
# comment_syntax is the comment styling (containing the symbols)
# syntax_condition defines if language uses block comments, multi syntax, only block, etc..
if syntax_condition == True:
counter_dict = self.comment_block_single(comment_syntax)
elif syntax_condition == False:
counter_dict = self.comment_single(comment_syntax)
elif syntax_condition == BLOCK_ONLY:
counter_dict = self.comment_block(comment_syntax)
else:
print("ERROR: Syntax Condition not supported")
return 0
self.counter_dict = counter_dict
self.__check_in_status = True
else:
print("ERROR: File extension not supported, see extensions.py")
def analysis_output(self, option:str):
""" Output code analysis results:
Options: 'terminal', 'JSON' (Python Dict), 'tuple', etc...
Add other options for output and format it as required.
(for now, only terminal is implemented)
TODO: Implement multiple output formatting as required
"""
counter_dict = self.counter_dict # Prevent multiple calls to the instance dict
if option == 'terminal':
# This printing method works with Python 2.6+
for key,value in counter_dict.items():
print("Total # of %s: %d" % (OUTPUT_FORMAT[key],value))
if option == 'JSON':
return self.counter_dict
# Other options can be implemented as required