-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathtextrank.py
30 lines (22 loc) · 908 Bytes
/
textrank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Code listing #17
""" Module textrank - Rank text files in order of degree of a specific word frequency. """
import operator
class TextRank(object):
""" Accept text files as inputs and rank them in
terms of how much a word occurs in them """
def __init__(self, word, *filenames):
self.word = word.strip().lower()
self.filenames = filenames
def rank(self):
""" Rank the files. A tuple is returned with
(filename, #occur) in decreasing order of
occurences """
occurs = []
for fpath in self.filenames:
data = open(fpath).read()
words = map(lambda x: x.lower().strip(), data.split())
# Filter empty words
count = words.count(self.word)
occurs.append((fpath, count))
# Return in sorted order
return sorted(occurs, key=operator.itemgetter(1), reverse=True)