-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathurlrank2.py
59 lines (43 loc) · 1.67 KB
/
urlrank2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# Code listing #21
""" Module urlrank - Rank URLs in order of degree of a specific word frequency """
# Note: This is urlrank.py rewritten to use rankbase so called urlrank2.py
import requests
import operator
from rankbase import RankBase
class UrlRank(object):
""" Accept URLs as inputs and rank them in
terms of how much a word occurs in them """
def __init__(self, word, *urls):
self.word = word.strip().lower()
self.urls = urls
def rank(self):
""" Rank the URLs. A tuple is returned with
(url, #occur) in decreasing order of
occurences """
occurs = []
for url in self.urls:
data = requests.get(url).content
words = map(lambda x: x.lower().strip(), data.split())
# Filter empty words
count = words.count(self.word)
occurs.append((url, count))
# Return in sorted order
return sorted(occurs, key=operator.itemgetter(1), reverse=True)
class UrlRank(RankBase):
""" Accept URLs as inputs and rank them in
terms of how much a word occurs in them """
def __init__(self, word, *urls):
self.word = word.strip().lower()
self.urls = urls
def rank(self):
""" Rank the URLs. A tuple is returned with
(url, #occur) in decreasing order of
occurences """
texts = map(lambda x: requests.get(x).content, self.urls)
occurs = super(UrlRank, self).rank(*texts)
# Convert to URLs list
occurs = [(self.urls[x],y) for x,y in occurs.items()]
return self.sort(occurs)
if __name__ == "__main__":
import sys
print(UrlRank('python',*sys.argv[1:]).rank())