-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathsort_counter_mp.py
67 lines (43 loc) · 1.49 KB
/
sort_counter_mp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Code Listing #10
"""
Sort a number of disk files using a counter using multiple processes
"""
# sort_counter_mp.py
import sys
import time
import collections
from multiprocessing import Pool
MAXINT = 100000
def sorter(filenames):
""" Sorter process sorting files using a counter """
counter = collections.defaultdict(int)
for filename in filenames:
for i in open(filename):
counter[i] += 1
return counter
def batch_files(pool_size, limit):
""" Create batches of files to process by a multiprocessing Pool """
batch_size = limit // pool_size
filenames = []
for i in range(pool_size):
batch = []
for j in range(i*batch_size, (i+1)*batch_size):
filename = 'numbers/numbers_%d.txt' % j
batch.append(filename)
filenames.append(batch)
return filenames
def sort_files(pool_size, filenames):
""" Sort files by batches using a multiprocessing Pool """
with Pool(pool_size) as pool:
counters = pool.map(sorter, filenames)
with open('sorted_nums.txt','w') as fp:
for i in range(1, MAXINT+1):
count = sum([x.get(str(i)+'\n',0) for x in counters])
if count>0:
fp.write((str(i)+'\n')*count)
print('Sorted')
if __name__ == "__main__":
limit = int(sys.argv[1])
pool_size = 4
filenames = batch_files(pool_size, limit)
sort_files(pool_size, filenames)