-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathurlrank.py
31 lines (23 loc) · 890 Bytes
/
urlrank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Code listing #18
""" Module urlrank - Rank URLs in order of degree of a specific word frequency """
import operator
import requests
class UrlRank(object):
""" Accept URLs as inputs and rank them in
terms of how much a word occurs in them """
def __init__(self, word, *urls):
self.word = word.strip().lower()
self.urls = urls
def rank(self):
""" Rank the URLs. A tuple is returned with
(url, #occur) in decreasing order of
occurences """
occurs = []
for url in self.urls:
data = requests.get(url).content
words = map(lambda x: x.lower().strip(), data.split())
# Filter empty words
count = words.count(self.word)
occurs.append((url, count))
# Return in sorted order
return sorted(occurs, key=operator.itemgetter(1), reverse=True)