diff --git a/.gitignore b/.gitignore index 0d074fbf4e..a366cb115a 100755 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,7 @@ Session.vim .vscode __pycache__ .pytest* -venv -my_train \ No newline at end of file +venv* +.venv* +my_train +.DS_Store \ No newline at end of file diff --git a/setup.py b/setup.py index d783dccddd..cca03e7e3e 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ def parse_requirements(lines: List[str]): setup( name="TensorFlowASR", - version="1.0.3", + version="1.0.4", author="Huy Le Nguyen", author_email="nlhuy.cs.16@gmail.com", description="Almost State-of-the-art Automatic Speech Recognition using Tensorflow 2", diff --git a/tensorflow_asr/models/transducer/contextnet.py b/tensorflow_asr/models/transducer/contextnet.py index 4469c43880..aefb91a404 100644 --- a/tensorflow_asr/models/transducer/contextnet.py +++ b/tensorflow_asr/models/transducer/contextnet.py @@ -146,7 +146,7 @@ def recognize_tflite( """ features = self.speech_featurizer.tf_extract(signal) encoded = self.encoder_inference(features, tf.shape(features)[0]) - hypothesis = self._perform_greedy(encoded, tf.shape(encoded)[0], predicted, prediction_states) + hypothesis = self._perform_greedy(encoded, tf.shape(encoded)[0], predicted, prediction_states, tflite=True) transcript = self.text_featurizer.indices2upoints(hypothesis.prediction) return transcript, hypothesis.index, hypothesis.states @@ -158,7 +158,7 @@ def recognize_tflite_with_timestamp( ): features = self.speech_featurizer.tf_extract(signal) encoded = self.encoder_inference(features, tf.shape(features)[0]) - hypothesis = self._perform_greedy(encoded, tf.shape(encoded)[0], predicted, states) + hypothesis = self._perform_greedy(encoded, tf.shape(encoded)[0], predicted, states, tflite=True) indices = self.text_featurizer.normalize_indices(hypothesis.prediction) upoints = tf.gather_nd(self.text_featurizer.upoints, tf.expand_dims(indices, axis=-1)) # [None, max_subword_length] diff --git a/tensorflow_asr/models/transducer/rnn_transducer.py b/tensorflow_asr/models/transducer/rnn_transducer.py index d43d0d5440..4eaebffba6 100644 --- a/tensorflow_asr/models/transducer/rnn_transducer.py +++ b/tensorflow_asr/models/transducer/rnn_transducer.py @@ -332,7 +332,7 @@ def recognize_tflite( """ features = self.speech_featurizer.tf_extract(signal) encoded, new_encoder_states = self.encoder_inference(features, encoder_states) - hypothesis = self._perform_greedy(encoded, tf.shape(encoded)[0], predicted, prediction_states) + hypothesis = self._perform_greedy(encoded, tf.shape(encoded)[0], predicted, prediction_states, tflite=True) transcript = self.text_featurizer.indices2upoints(hypothesis.prediction) return transcript, hypothesis.index, new_encoder_states, hypothesis.states @@ -345,7 +345,7 @@ def recognize_tflite_with_timestamp( ): features = self.speech_featurizer.tf_extract(signal) encoded, new_encoder_states = self.encoder_inference(features, encoder_states) - hypothesis = self._perform_greedy(encoded, tf.shape(encoded)[0], predicted, prediction_states) + hypothesis = self._perform_greedy(encoded, tf.shape(encoded)[0], predicted, prediction_states, tflite=True) indices = self.text_featurizer.normalize_indices(hypothesis.prediction) upoints = tf.gather_nd(self.text_featurizer.upoints, tf.expand_dims(indices, axis=-1)) # [None, max_subword_length]