Skip to content

Commit 0e54b6c

Browse files
Merge pull request #3 from Muawiya-contact/SE-PATH
SE-PATH
2 parents d68170b + 757f111 commit 0e54b6c

7 files changed

Lines changed: 83 additions & 55 deletions

File tree

3.1 KB
Binary file not shown.
6 KB
Binary file not shown.
1.79 KB
Binary file not shown.

SearchEngine/documents/doc2.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
Data structures are a fundamental concept in computer science. They are used to organize and store data efficiently. Examples include arrays, linked lists, stacks, and queues. Understanding these structures is key to writing effective algorithms.
1+
Data structures are a fundamental concept in computer science.
2+
They are used to organize and store data efficiently.
3+
Examples include arrays, linked lists, stacks, and queues.
4+
Understanding these structures is key to writing effective algorithms.

SearchEngine/index.py

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,40 +2,53 @@
22
import string
33

44
class InvertedIndex:
5-
"""Inverted Index for storing word → doc → frequency."""
5+
"""A simple inverted index: word -> document -> frequency."""
66

77
def __init__(self):
8+
# Dictionary to store index
9+
# Example: { "word": {"doc1": 2, "doc2": 1} }
810
self.index = {}
911

1012
def add_doc(self, doc, text):
11-
text = text.lower().translate(str.maketrans('', '', string.punctuation))
12-
words = [w.strip() for w in text.split() if w.strip()]
13-
for w in words:
14-
if w not in self.index:
15-
self.index[w] = {}
16-
self.index[w][doc] = self.index[w].get(doc, 0) + 1
13+
"""Add a document to the index."""
14+
# Convert text to lowercase and remove punctuation
15+
clean_text = text.lower().translate(str.maketrans('', '', string.punctuation))
16+
17+
# Split text into words
18+
words = clean_text.split()
19+
20+
for word in words:
21+
if word not in self.index:
22+
self.index[word] = {}
23+
# Count how many times a word appears in a document
24+
self.index[word][doc] = self.index[word].get(doc, 0) + 1
25+
26+
def _clean_query(self, query):
27+
"""Helper function to clean search queries."""
28+
query = query.lower().translate(str.maketrans('', '', string.punctuation))
29+
return query.split()
1730

1831
def search(self, query):
32+
"""Search for documents that contain all words in the query."""
1933
q_words = self._clean_query(query)
2034
if not q_words:
2135
return []
2236

37+
# Start with documents containing the first word
2338
if q_words[0] not in self.index:
2439
return []
2540
results = set(self.index[q_words[0]].keys())
2641

27-
for w in q_words[1:]:
28-
if w not in self.index:
42+
# Keep only docs that contain all other words
43+
for word in q_words[1:]:
44+
if word not in self.index:
2945
return []
30-
results &= set(self.index[w].keys())
46+
results = results & set(self.index[word].keys())
3147

48+
# Rank results by score (sum of word frequencies)
3249
ranked = []
3350
for doc in results:
34-
score = sum(self.index[w].get(doc, 0) for w in q_words)
51+
score = sum(self.index[word].get(doc, 0) for word in q_words)
3552
ranked.append({"doc": doc, "score": score})
3653

3754
return sorted(ranked, key=lambda x: x["score"], reverse=True)
38-
39-
def _clean_query(self, query):
40-
query = query.lower().translate(str.maketrans('', '', string.punctuation))
41-
return [w.strip() for w in query.split() if w.strip()]

SearchEngine/search.py

Lines changed: 49 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,87 +3,99 @@
33
from stack import Stack
44
from index import InvertedIndex
55

6+
# Path to the folder that has all text files
7+
PATH = "./documents" # First we go to base dir
8+
69
class SearchSim:
7-
"""Search engine simulation using Inverted Index + Stack (for history)."""
10+
"""A simple search engine simulation."""
811

9-
def __init__(self, path="./"):
10-
self.index = InvertedIndex()
11-
self.history = Stack()
12-
self.path = path
13-
self.results = []
14-
self._load()
12+
def __init__(self, path=PATH):
13+
self.index = InvertedIndex() # for word search
14+
self.history = Stack() # to store search history
15+
self.path = path # folder path
16+
self.results = [] # last search results
17+
self._load() # load documents into index
1518

1619
def _load(self):
17-
print("Building index...")
20+
"""Load all text files and build the index."""
21+
print("Loading index...")
1822
try:
1923
files = [f for f in os.listdir(self.path) if f.endswith(".txt")]
2024
if not files:
21-
print("No .txt files in documents/")
25+
print("No .txt files found in documents folder.")
2226
for f in files:
23-
doc = os.path.splitext(f)[0]
27+
doc = os.path.splitext(f)[0] # filename without .txt
2428
with open(os.path.join(self.path, f), "r", encoding="utf-8") as file:
2529
self.index.add_doc(doc, file.read())
26-
print(f"Index built with {len(files)} docs.")
30+
print(f"Index built with {len(files)} documents.")
2731
except FileNotFoundError:
28-
print("documents/ folder not found.")
32+
print("Documents folder not found!")
2933
exit()
3034

3135
def run(self):
36+
"""Main loop for user interaction."""
3237
while True:
33-
user_input = input("\nEnter search query, 'back', 'show', or 'quit': ").strip()
34-
if user_input.lower() == "quit":
38+
user_input = input("\nEnter search query, 'back', 'show', or 'quit': ").strip().lower()
39+
40+
if user_input == "quit":
41+
print("Goodbye!")
3542
break
36-
elif user_input.lower() == "back":
43+
elif user_input == "back":
3744
self._back()
38-
elif user_input.lower() == "show":
45+
elif user_input == "show":
3946
self.history.show()
40-
else:
47+
elif user_input: # if user typed something
4148
self._search(user_input)
4249

43-
def _search(self, q):
44-
self.history.push(q)
45-
print(f"\nSearching: '{q}'")
46-
self.results = self.index.search(q)
50+
def _search(self, query):
51+
"""Handle search queries."""
52+
self.history.push(query)
53+
print(f"\nSearching for: '{query}'")
4754

55+
self.results = self.index.search(query)
4856
if not self.results:
49-
print("No matches.")
57+
print("No matches found.")
5058
return
5159

52-
print(f"Found {len(self.results)} docs:")
53-
for i, r in enumerate(self.results, 1):
60+
print(f"Found {len(self.results)} document(s):")
61+
for i, r in enumerate(self.results, start=1):
5462
print(f"{i}. {r['doc']}.txt | Score: {r['score']}")
5563

5664
self._open_doc()
5765

5866
def _open_doc(self):
67+
"""Open and show the contents of a selected document."""
5968
while True:
60-
sel = input("\nEnter doc number to open, or 'next': ").strip().lower()
61-
if sel == "next":
69+
choice = input("\nEnter document number to open, or 'next': ").strip().lower()
70+
if choice == "next":
6271
break
6372
try:
64-
i = int(sel)
65-
if 1 <= i <= len(self.results):
66-
doc = self.results[i - 1]["doc"]
73+
num = int(choice)
74+
if 1 <= num <= len(self.results):
75+
doc = self.results[num - 1]["doc"]
6776
with open(os.path.join(self.path, f"{doc}.txt"), "r", encoding="utf-8") as f:
6877
print(f"\n--- {doc}.txt ---\n{f.read()}\n------------------")
6978
else:
7079
print("Invalid number.")
7180
except ValueError:
72-
print("Enter a number or 'next'.")
81+
print("Please enter a valid number or 'next'.")
7382
except FileNotFoundError:
7483
print("File not found.")
7584

7685
def _back(self):
86+
"""Go back to the previous search query."""
7787
if len(self.history.items) <= 1:
78-
print("No previous search.")
88+
print("No previous search available.")
7989
return
80-
self.history.pop()
81-
prev = self.history.peek()
82-
print(f"\nBack to: '{prev}'")
83-
self.results = self.index.search(prev)
90+
91+
self.history.pop() # remove current search
92+
prev_query = self.history.peek() # last one left
93+
print(f"\nBack to: '{prev_query}'")
94+
95+
self.results = self.index.search(prev_query)
8496
if not self.results:
85-
print("No matches.")
97+
print("No matches found.")
8698
else:
87-
for i, r in enumerate(self.results, 1):
99+
for i, r in enumerate(self.results, start=1):
88100
print(f"{i}. {r['doc']}.txt | Score: {r['score']}")
89101
self._open_doc()

SearchEngine/stack.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# stack.py
1+
# stack.py
22

33
class Stack:
44
"""Custom Stack implementation using list (LIFO)."""
@@ -24,4 +24,4 @@ def peek(self):
2424
return None if self.empty() else self.items[-1]
2525

2626
def show(self):
27-
print("Current Stack (top bottom):", list(reversed(self.items)) if self.items else "Empty")
27+
print("Current Stack (top TO bottom):", list(reversed(self.items)) if self.items else "Empty")

0 commit comments

Comments
 (0)