-
Notifications
You must be signed in to change notification settings - Fork 49
/
Copy pathfeature-pipeline.cc
124 lines (111 loc) · 4.04 KB
/
feature-pipeline.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// Copyright (c) 2017 Personal (Binbin Zhang)
// Created on 2017-08-02
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include "feature-pipeline.h"
FeaturePipeline::FeaturePipeline(const FeaturePipelineConfig& config):
config_(config),
left_context_(config.left_context),
right_context_(config.right_context),
raw_feat_dim_(config.num_bins),
fbank_(config.num_bins, config.sample_rate,
config.frame_length, config.frame_shift),
num_frames_(0),
done_(false) {
ReadCmvn(config.cmvn_file);
}
void FeaturePipeline::ReadCmvn(const std::string& cmvn_file) {
cmvn_.Read(cmvn_file);
CHECK(cmvn_.NumCols() == raw_feat_dim_);
}
void FeaturePipeline::AcceptRawWav(const std::vector<float>& wav) {
std::vector<float> feat;
std::vector<float> waves;
waves.insert(waves.end(), ctx_wav_.begin(), ctx_wav_.end());
waves.insert(waves.end(), wav.begin(), wav.end());
int num_frames = fbank_.Compute(waves, &feat);
// do cmvn
CHECK(raw_feat_dim_ == cmvn_.NumCols());
for (int i = 0; i < num_frames; i++) {
for (int j = 0; j < raw_feat_dim_; j++) {
CHECK(i * raw_feat_dim_ + j < static_cast<int>(feat.size()));
feat[i*raw_feat_dim_+j] =
(feat[i*raw_feat_dim_+j] - cmvn_(0, j)) * cmvn_(1, j);
// printf("%f ", feat[i*raw_feat_dim+j]);
}
// printf("\n");
}
if (feature_buf_.size() == 0 && left_context_ > 0) {
for (int i = 0; i < left_context_; i++) {
feature_buf_.insert(feature_buf_.end(), feat.begin(),
feat.begin() + raw_feat_dim_);
}
}
feature_buf_.insert(feature_buf_.end(), feat.begin(), feat.end());
num_frames_ += num_frames;
int left_samples = waves.size() - config_.frame_shift * num_frames;
ctx_wav_.resize(left_samples);
std::copy(waves.begin() + config_.frame_shift * num_frames,
waves.end(), ctx_wav_.begin());
}
int FeaturePipeline::NumFramesReady() const {
if (num_frames_ < right_context_) return 0;
if (done_) {
return num_frames_;
} else {
return num_frames_ - right_context_;
}
}
void FeaturePipeline::SetDone() {
CHECK(!done_);
done_ = true;
if (num_frames_ == 0) return;
// copy last frames to buffer
std::vector<float> last_feat(feature_buf_.end() - raw_feat_dim_,
feature_buf_.end());
for (int i = 0; i < right_context_; i++) {
feature_buf_.insert(feature_buf_.end(), last_feat.begin(), last_feat.end());
}
}
int FeaturePipeline::ReadFeature(int t, std::vector<float>* feat) {
CHECK(t < num_frames_);
int num_frames_ready = NumFramesReady();
if (num_frames_ready <= 0) return 0;
int total_frame = num_frames_ready - t;
int feat_dim = (left_context_ + 1 + right_context_) * raw_feat_dim_;
feat->resize(total_frame * feat_dim);
for (int i = t; i < num_frames_ready; i++) {
memcpy(feat->data() + (i - t) * feat_dim,
feature_buf_.data() + i * raw_feat_dim_,
sizeof(float) * feat_dim);
}
return total_frame;
}
int FeaturePipeline::ReadOneFrame(int t, float *data) {
CHECK(data != NULL);
CHECK(t < num_frames_);
int num_frames_ready = NumFramesReady();
if (num_frames_ready <= 0) return 0;
CHECK(t <= num_frames_ready);
int feat_dim = (left_context_ + 1 + right_context_) * raw_feat_dim_;
memcpy(data, feature_buf_.data() + t * raw_feat_dim_,
sizeof(float) * feat_dim);
return 1;
}
int FeaturePipeline::ReadAllFeature(std::vector<float> *feat) {
return ReadFeature(0, feat);
}
int FeaturePipeline::NumFrames(int size) const {
return 1 + (size - config_.frame_length) / config_.frame_shift;
}