Skip to content

Commit 735566e

Browse files
committed
added better error handling
1 parent 1d0789d commit 735566e

3 files changed

Lines changed: 26 additions & 20 deletions

File tree

graphsense/graphsense.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ def faiss_rocksdb_dump(self):
250250
pickle.dump(pca, file)
251251
txt_embeddings_for_faiss = reduced_txt_embeddings.astype(np.float16)
252252
else:
253+
print(f"Warning: PCA model can't be trained as vocabulary is less than {vectors_for_faiss.shape[1]}")
254+
print(f"Warning: OOV code lines will not be supported during inference")
253255
txt_embeddings_for_faiss = txt_embeddings.astype(np.float16)
254256

255257
print(txt_embeddings_for_faiss.shape)
@@ -390,7 +392,7 @@ def load_artifacts(self):
390392
with open('output/artifacts/pca_model.pkl', 'rb') as file:
391393
self.loaded_pca = pickle.load(file)
392394
except FileNotFoundError:
393-
print("The PCA model file was not found. Ensure it is available for OOV handling")
395+
print("The PCA model file was not found. Ensure it is available for OOV handling\nIf voabulary is too small, PCA model does not get created")
394396

395397

396398
# Function to track execution time and peak RAM usage
@@ -403,7 +405,7 @@ def infer(self, line, top_k=10):
403405
top_k (int): top k suggestions to return (default: 10)
404406
"""
405407

406-
if self.index == None or self.idx_to_line == None or self.line_to_idx == None or self.txt_embed_index == None or self.txt_embed_model == None or self.loaded_pca == None:
408+
if self.index == None or self.idx_to_line == None or self.line_to_idx == None or self.txt_embed_index == None or self.txt_embed_model == None:
407409
print("Please load artifacts first using: load_artifacts()")
408410
sys.exit(1)
409411

@@ -416,23 +418,27 @@ def infer(self, line, top_k=10):
416418
print(f"Index: {query_index}")
417419
else:
418420
print("Line not found")
419-
# handle OOV
420-
oov_vector = self.txt_embed_model.encode(line)
421-
# Reshape to (1, dim), Faiss expects a 2D array for a single query
422-
oov_vector = np.expand_dims(oov_vector, axis=0)
421+
try:
422+
# handle OOV
423+
oov_vector = self.txt_embed_model.encode(line)
424+
# Reshape to (1, dim), Faiss expects a 2D array for a single query
425+
oov_vector = np.expand_dims(oov_vector, axis=0)
423426

424-
oov_vector = self.loaded_pca.transform(oov_vector) # reduce dimensions to 128
425-
426-
oov_vector = oov_vector.astype(np.float16)
427-
428-
# Perform FAISS search
429-
distances, indices = self.txt_embed_index.search(oov_vector, 1)
430-
# Retrieve syntactically matching line
431-
matched_line = self.idx_to_line.get(struct.pack("i", indices[0][0])).decode()
432-
print("oov matched to: ", matched_line)
433-
query_index = indices[0][0]
434-
query_index = int(query_index)
435-
print(f"Matched Index: {query_index}")
427+
oov_vector = self.loaded_pca.transform(oov_vector) # reduce dimensions to 128
428+
429+
oov_vector = oov_vector.astype(np.float16)
430+
431+
# Perform FAISS search
432+
distances, indices = self.txt_embed_index.search(oov_vector, 1)
433+
# Retrieve syntactically matching line
434+
matched_line = self.idx_to_line.get(struct.pack("i", indices[0][0])).decode()
435+
print("oov matched to: ", matched_line)
436+
query_index = indices[0][0]
437+
query_index = int(query_index)
438+
print(f"Matched Index: {query_index}")
439+
except Exception:
440+
print("Error: ensure PCA model is in output/artifacts. If model was not created due to low vocabulary size, increase unique code lines in dataset and train again.")
441+
return []
436442

437443
# Load vector dynamically using index to minimize memory usage
438444
query_vector = np.array([self.index.reconstruct(query_index)], dtype=np.float16) # Dynamically load vector using FAISS

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name="graphsense",
8-
version="0.0.2",
8+
version="0.0.3",
99
description="GraphSense is a framework that can be used to easily train and use code suggestion models with minimal data preprocessing and resource consumption",
1010
packages=find_packages(),
1111
long_description=long_description,

setup_instruction.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ for publishing:
99
pip install twine
1010

1111
for install locally for testing:
12-
pip install dist/graphsense-0.0.2-py3-none-any.whl
12+
pip install dist/graphsense-0.0.3-py3-none-any.whl
1313

1414
finally run:
1515
twine upload dist/*

0 commit comments

Comments
 (0)