Skip to content

Added new code #179

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from Oct 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions sorts/countingsort.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Python program for counting sort

# This is the main function that sort the given string arr[] in
# in the alphabetical order
def countSort(arr):

# The output character array that will have sorted arr
output = [0 for i in range(256)]

# Create a count array to store count of inidividul
# characters and initialize count array as 0
count = [0 for i in range(256)]

# For storing the resulting answer since the
# string is immutable
ans = ["" for _ in arr]

# Store count of each character
for i in arr:
count[ord(i)] += 1

# Change count[i] so that count[i] now contains actual
# position of this character in output array
for i in range(256):
count[i] += count[i-1]

# Build the output character array
for i in range(len(arr)):
output[count[ord(arr[i])]-1] = arr[i]
count[ord(arr[i])] -= 1

# Copy the output array to arr, so that arr now
# contains sorted characters
for i in range(len(arr)):
ans[i] = output[i]
return ans

# Driver program to test above function
arr = "thisisthestring"
ans = countSort(arr)
print ("Sorted string array is %s" %("".join(ans)))
161 changes: 161 additions & 0 deletions sorts/external-sort.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/usr/bin/env python

#
# Sort large text files in a minimum amount of memory
#
import os
import sys
import argparse

class FileSplitter(object):
BLOCK_FILENAME_FORMAT = 'block_{0}.dat'

def __init__(self, filename):
self.filename = filename
self.block_filenames = []

def write_block(self, data, block_number):
filename = self.BLOCK_FILENAME_FORMAT.format(block_number)
file = open(filename, 'w')
file.write(data)
file.close()
self.block_filenames.append(filename)

def get_block_filenames(self):
return self.block_filenames

def split(self, block_size, sort_key=None):
file = open(self.filename, 'r')
i = 0

while True:
lines = file.readlines(block_size)

if lines == []:
break

if sort_key is None:
lines.sort()
else:
lines.sort(key=sort_key)

self.write_block(''.join(lines), i)
i += 1

def cleanup(self):
map(lambda f: os.remove(f), self.block_filenames)


class NWayMerge(object):
def select(self, choices):
min_index = -1
min_str = None

for i in range(len(choices)):
if min_str is None or choices[i] < min_str:
min_index = i

return min_index


class FilesArray(object):
def __init__(self, files):
self.files = files
self.empty = set()
self.num_buffers = len(files)
self.buffers = {i: None for i in range(self.num_buffers)}

def get_dict(self):
return {i: self.buffers[i] for i in range(self.num_buffers) if i not in self.empty}

def refresh(self):
for i in range(self.num_buffers):
if self.buffers[i] is None and i not in self.empty:
self.buffers[i] = self.files[i].readline()

if self.buffers[i] == '':
self.empty.add(i)

if len(self.empty) == self.num_buffers:
return False

return True

def unshift(self, index):
value = self.buffers[index]
self.buffers[index] = None

return value


class FileMerger(object):
def __init__(self, merge_strategy):
self.merge_strategy = merge_strategy

def merge(self, filenames, outfilename, buffer_size):
outfile = open(outfilename, 'w', buffer_size)
buffers = FilesArray(self.get_file_handles(filenames, buffer_size))

while buffers.refresh():
min_index = self.merge_strategy.select(buffers.get_dict())
outfile.write(buffers.unshift(min_index))

def get_file_handles(self, filenames, buffer_size):
files = {}

for i in range(len(filenames)):
files[i] = open(filenames[i], 'r', buffer_size)

return files



class ExternalSort(object):
def __init__(self, block_size):
self.block_size = block_size

def sort(self, filename, sort_key=None):
num_blocks = self.get_number_blocks(filename, self.block_size)
splitter = FileSplitter(filename)
splitter.split(self.block_size, sort_key)

merger = FileMerger(NWayMerge())
buffer_size = self.block_size / (num_blocks + 1)
merger.merge(splitter.get_block_filenames(), filename + '.out', buffer_size)

splitter.cleanup()

def get_number_blocks(self, filename, block_size):
return (os.stat(filename).st_size / block_size) + 1


def parse_memory(string):
if string[-1].lower() == 'k':
return int(string[:-1]) * 1024
elif string[-1].lower() == 'm':
return int(string[:-1]) * 1024 * 1024
elif string[-1].lower() == 'g':
return int(string[:-1]) * 1024 * 1024 * 1024
else:
return int(string)



def main():
parser = argparse.ArgumentParser()
parser.add_argument('-m',
'--mem',
help='amount of memory to use for sorting',
default='100M')
parser.add_argument('filename',
metavar='<filename>',
nargs=1,
help='name of file to sort')
args = parser.parse_args()

sorter = ExternalSort(parse_memory(args.mem))
sorter.sort(args.filename[0])


if __name__ == '__main__':
main()