-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCoOccurrence.py
More file actions
93 lines (80 loc) · 2.67 KB
/
CoOccurrence.py
File metadata and controls
93 lines (80 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Vertex class represents a vertex in the Co-Occurrence graph.
# Each vertex contains a word (the vertex), a set of edges,
# or other words that it is connected to, and a number for
# the frequency or number of times the word/vertex occurs.
class Vertex:
def __init__(self):
self.word = ''
self.edges = set()
self.frequency = 0
return
# function to initialize the word of the vertex
def setWord(self, w):
self.word = w
# function to add an edge to a vertex
def addEdge(self, e):
self.edges.add(e)
# function to increment the number of occurrences
# of a vertex.
def incFrequency(self):
self.frequency += 1
Vertex()
# CoGraph represents the Co-Occurrence graph. The graph instance
# variable stores a dictionary representation of the graph. The
# keys of the dictionary are the word or vertex, the values are
# instances of the vertex class for the word.
# The keywords instance variable is a copy of the candidate keyword
# dictionary, extracted from the text files.
# The scores instance variable is a dictionary with the keys being
# the candidate keywords, and the values the graph score for each
# keyword candidate.
class CoGraph:
def __init__(self):
self.graph = {}
self.scores = {}
self.keywords = {}
return
# Member function to create the dictionary representation of the
# co-occurrence graph.
def createGraph(self, keyword_map):
self.keywords = keyword_map
for key in keyword_map:
words = key.split()
for word in words:
if word in self.graph:
self.graph[word].incFrequency()
for w in words:
if w == word:
continue
self.graph[word].addEdge(w)
else:
v = Vertex()
v.setWord(word)
v.incFrequency()
for w in words:
if w == word:
continue
v.addEdge(w)
self.graph[word] = v
return
# Member function to create the dictionary containing the candidate
# keywords, and their respective scores.
def calcScore(self):
for key in self.keywords:
key_score = 0.0
words = key.split()
for word in words:
key_score += float(len(self.graph[word].edges)) / float(self.graph[word].frequency)
self.scores[key] = key_score
return
CoGraph()
keyword_map = {
'Present': 1, 'Scalable Distributed Information Management System (SDIMS)': 1, 'aggregates information': 1,
'large-scale networked systems':1, 'large-scale distributed applications':1,
'information':1, 'summary':1, 'global':1, 'information':1
}
#g = CoGraph()
#g.createGraph(keyword_map)
#g.calcScore()
#for key, value in g.scores.iteritems():
# print(key + ': ' + str(value))