Add files via upload

TarrySingh · web-flow · commit ca8fad4d979a · 2018-08-03T06:19:21.000Z
diff --git a/pytorch/New_Tuts/sequential_tasks.py b/pytorch/New_Tuts/sequential_tasks.py
@@ -0,0 +1,196 @@
+import numpy as np
+from tensorflow.python.keras.utils import Sequence, to_categorical
+from tensorflow.python.keras.preprocessing.sequence import pad_sequences
+
+
+class EchoData(Sequence):
+
+    def __init__(self, series_length=40000, batch_size=32,
+                 echo_step=3, truncated_length=10, seed=None):
+
+        self.series_length = series_length
+        self.truncated_length = truncated_length
+        self.n_batches = series_length//truncated_length
+
+        self.echo_step = echo_step
+        self.batch_size = batch_size
+        if seed is not None:
+            np.random.seed(seed)
+        self.raw_x = None
+        self.raw_y = None
+        self.x_batches = []
+        self.y_batches = []
+        self.generate_new_series()
+        self.prepare_batches()
+
+    def __getitem__(self, index):
+        if index == 0:
+            self.generate_new_series()
+            self.prepare_batches()
+        return self.x_batches[index], self.y_batches[index]
+
+    def __len__(self):
+        return self.n_batches
+
+    def generate_new_series(self):
+        x = np.random.choice(
+            2,
+            size=(self.batch_size, self.series_length),
+            p=[0.5, 0.5])
+        y = np.roll(x, self.echo_step, axis=1)
+        y[:, 0:self.echo_step] = 0
+        self.raw_x = x
+        self.raw_y = y
+
+    def prepare_batches(self):
+        x = np.expand_dims(self.raw_x, axis=-1)
+        y = np.expand_dims(self.raw_y, axis=-1)
+        self.x_batches = np.split(x, self.n_batches, axis=1)
+        self.y_batches = np.split(y, self.n_batches, axis=1)
+
+
+class TemporalOrderExp6aSequence(Sequence):
+    """
+    From Hochreiter&Schmidhuber(1997):
+
+        The goal is to classify sequences. Elements and targets are represented locally
+        (input vectors with only one non-zero bit). The sequence starts with an E, ends
+        with a B (the "trigger symbol") and otherwise consists of randomly chosen symbols
+        from the set {a, b, c, d} except for two elements at positions t1 and t2 that are
+        either X or Y . The sequence length is randomly chosen between 100 and 110, t1 is
+        randomly chosen between 10 and 20, and t2 is randomly chosen between 50 and 60.
+        There are 4 sequence classes Q, R, S, U which depend on the temporal order of X and Y.
+        The rules are:
+            X, X -> Q,
+            X, Y -> R,
+            Y , X -> S,
+            Y , Y -> U.
+
+    """
+
+    def __init__(self, length_range=(100, 111), t1_range=(10, 21), t2_range=(50, 61),
+                 batch_size=32, seed=None):
+
+        self.classes = ['Q', 'R', 'S', 'U']
+        self.n_classes = len(self.classes)
+
+        self.relevant_symbols = ['X', 'Y']
+        self.distraction_symbols = ['a', 'b', 'c', 'd']
+        self.start_symbol = 'B'
+        self.end_symbol = 'E'
+
+        self.length_range = length_range
+        self.t1_range = t1_range
+        self.t2_range = t2_range
+        self.batch_size = batch_size
+
+        if seed is not None:
+            np.random.seed(seed)
+
+        all_symbols = self.relevant_symbols + self.distraction_symbols + \
+                      [self.start_symbol] + [self.end_symbol]
+        self.n_symbols = len(all_symbols)
+        self.s_to_idx = {s: n for n, s in enumerate(all_symbols)}
+        self.idx_to_s = {n: s for n, s in enumerate(all_symbols)}
+
+        self.c_to_idx = {c: n for n, c in enumerate(self.classes)}
+        self.idx_to_c = {n: c for n, c in enumerate(self.classes)}
+
+    def generate_pair(self):
+        length = np.random.randint(self.length_range[0], self.length_range[1])
+        t1 = np.random.randint(self.t1_range[0], self.t1_range[1])
+        t2 = np.random.randint(self.t2_range[0], self.t2_range[1])
+
+        x = np.random.choice(self.distraction_symbols, length)
+        x[0] = self.start_symbol
+        x[-1] = self.end_symbol
+
+        y = np.random.choice(self.classes)
+        if y == 'Q':
+            x[t1], x[t2] = self.relevant_symbols[0], self.relevant_symbols[0]
+        elif y == 'R':
+            x[t1], x[t2] = self.relevant_symbols[0], self.relevant_symbols[1]
+        elif y == 'S':
+            x[t1], x[t2] = self.relevant_symbols[1], self.relevant_symbols[0]
+        else:
+            x[t1], x[t2] = self.relevant_symbols[1], self.relevant_symbols[1]
+
+        return ''.join(x), y
+
+    # encoding/decoding single instance version
+
+    def encode_x(self, x):
+        idx_x = [self.s_to_idx[s] for s in x]
+        return to_categorical(idx_x, num_classes=self.n_symbols)
+
+    def encode_y(self, y):
+        idx_y = self.c_to_idx[y]
+        return to_categorical(idx_y, num_classes=self.n_classes)
+
+    def decode_x(self, x):
+        x = x[np.sum(x, axis=1) > 0]    # remove padding
+        return ''.join([self.idx_to_s[pos] for pos in np.argmax(x, axis=1)])
+
+    def decode_y(self, y):
+        return self.idx_to_c[np.argmax(y)]
+
+    # encoding/decoding batch versions
+
+    def encode_x_batch(self, x_batch):
+        return pad_sequences([self.encode_x(x) for x in x_batch],
+                             maxlen=self.length_range[1])
+
+    def encode_y_batch(self, y_batch):
+        return np.array([self.encode_y(y) for y in y_batch])
+
+    def decode_x_batch(self, x_batch):
+        return [self.decode_x(x) for x in x_batch]
+
+    def decode_y_batch(self, y_batch):
+        return [self.idx_to_c[pos] for pos in np.argmax(y_batch, axis=1)]
+
+    def __len__(self):
+        """ Let's assume 1000 sequences as the size of data. """
+        return int(1000. / self.batch_size)
+
+    def __getitem__(self, index):
+        batch_x, batch_y = [], []
+        for _ in range(self.batch_size):
+            x, y = self.generate_pair()
+            batch_x.append(x)
+            batch_y.append(y)
+        return self.encode_x_batch(batch_x), self.encode_y_batch(batch_y)
+
+    class DifficultyLevel:
+        """ On HARD, settings are identical to the original settings from the '97 paper."""
+        EASY, NORMAL, MODERATE, HARD, NIGHTMARE = range(5)
+
+    @staticmethod
+    def get_predefined_generator(difficulty_level, batch_size=32, seed=8382):
+        EASY = TemporalOrderExp6aSequence.DifficultyLevel.EASY
+        NORMAL = TemporalOrderExp6aSequence.DifficultyLevel.NORMAL
+        MODERATE = TemporalOrderExp6aSequence.DifficultyLevel.MODERATE
+        HARD = TemporalOrderExp6aSequence.DifficultyLevel.HARD
+
+        if difficulty_level == EASY:
+            length_range = (7, 9)
+            t1_range = (1, 3)
+            t2_range = (4, 6)
+        elif difficulty_level == NORMAL:
+            length_range = (30, 41)
+            t1_range = (2, 6)
+            t2_range = (20, 28)
+        elif difficulty_level == MODERATE:
+            length_range = (60, 81)
+            t1_range = (10, 21)
+            t2_range = (45, 55)
+        elif difficulty_level == HARD:
+            length_range = (100, 111)
+            t1_range = (10, 21)
+            t2_range = (50, 61)
+        else:
+            length_range = (300, 501)
+            t1_range = (10, 81)
+            t2_range = (250, 291)
+        return TemporalOrderExp6aSequence(length_range, t1_range, t2_range,
+                                          batch_size, seed)