-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathtextrank2.py
57 lines (42 loc) · 1.73 KB
/
textrank2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Code listing #20
""" Module textrank - Rank text files in order of degree of a specific word frequency. """
# Note: This is textrank.py rewritten to use rankbase, so called textrank2.py
import operator
from rankbase import RankBase
class TextRank(object):
""" Accept text files as inputs and rank them in
terms of how much a word occurs in them """
def __init__(self, word, *filenames):
self.word = word.strip().lower()
self.filenames = filenames
def rank(self):
""" Rank the files. A tuple is returned with
(filename, #occur) in decreasing order of
occurences """
occurs = []
for fpath in self.filenames:
data = open(fpath).read()
words = map(lambda x: x.lower().strip(), data.split())
# Filter empty words
count = words.count(self.word)
occurs.append((fpath, count))
# Return in sorted order
return sorted(occurs, key=operator.itemgetter(1), reverse=True)
class TextRank(RankBase):
""" Accept text files as inputs and rank them in
terms of how much a word occurs in them """
def __init__(self, word, *filenames):
self.word = word.strip().lower()
self.filenames = filenames
def rank(self):
""" Rank the files. A tuple is returned with
(filename, #occur) in decreasing order of
occurences """
texts = map(lambda x: open(x).read(), self.filenames)
occurs = super(TextRank, self).rank(*texts)
# Convert to filename list
occurs = [(self.filenames[x],y) for x,y in occurs.items()]
return self.sort(occurs)
if __name__ == "__main__":
import sys
print TextRank('common',*sys.argv[1:]).rank()