forked from espressif/arduino-esp32
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaudio_provider.cpp
198 lines (177 loc) · 6.89 KB
/
audio_provider.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "audio_provider.h"
#include <cstdlib>
#include <cstring>
// FreeRTOS.h must be included before some of the following dependencies.
// Solves b/150260343.
// clang-format off
#include "freertos/FreeRTOS.h"
// clang-format on
#include "driver/i2s.h"
#include "esp_log.h"
#include "esp_spi_flash.h"
#include "esp_system.h"
#include "esp_timer.h"
#include "freertos/task.h"
#include "ringbuf.h"
#include "micro_model_settings.h"
using namespace std;
#define NO_I2S_SUPPORT CONFIG_IDF_TARGET_ESP32C2 || \
(CONFIG_IDF_TARGET_ESP32C3 \
&& (ESP_IDF_VERSION < ESP_IDF_VERSION_VAL(4, 4, 0)))
static const char* TAG = "TF_LITE_AUDIO_PROVIDER";
/* ringbuffer to hold the incoming audio data */
ringbuf_t* g_audio_capture_buffer;
volatile int32_t g_latest_audio_timestamp = 0;
/* model requires 20ms new data from g_audio_capture_buffer and 10ms old data
* each time , storing old data in the histrory buffer , {
* history_samples_to_keep = 10 * 16 } */
constexpr int32_t history_samples_to_keep =
((kFeatureSliceDurationMs - kFeatureSliceStrideMs) *
(kAudioSampleFrequency / 1000));
/* new samples to get each time from ringbuffer, { new_samples_to_get = 20 * 16
* } */
constexpr int32_t new_samples_to_get =
(kFeatureSliceStrideMs * (kAudioSampleFrequency / 1000));
namespace {
int16_t g_audio_output_buffer[kMaxAudioSampleSize];
bool g_is_audio_initialized = false;
int16_t g_history_buffer[history_samples_to_keep];
} // namespace
const int32_t kAudioCaptureBufferSize = 80000;
const int32_t i2s_bytes_to_read = 3200;
#if NO_I2S_SUPPORT
// nothing to be done here
#else
static void i2s_init(void) {
// Start listening for audio: MONO @ 16KHz
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_TX),
.sample_rate = 16000,
.bits_per_sample = (i2s_bits_per_sample_t)16,
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
.communication_format = I2S_COMM_FORMAT_I2S,
.intr_alloc_flags = 0,
.dma_buf_count = 3,
.dma_buf_len = 300,
.use_apll = false,
.tx_desc_auto_clear = false,
.fixed_mclk = -1,
};
i2s_pin_config_t pin_config = {
.bck_io_num = 26, // IIS_SCLK
.ws_io_num = 32, // IIS_LCLK
.data_out_num = -1, // IIS_DSIN
.data_in_num = 33, // IIS_DOUT
};
esp_err_t ret = 0;
ret = i2s_driver_install((i2s_port_t)1, &i2s_config, 0, NULL);
if (ret != ESP_OK) {
ESP_LOGE(TAG, "Error in i2s_driver_install");
}
ret = i2s_set_pin((i2s_port_t)1, &pin_config);
if (ret != ESP_OK) {
ESP_LOGE(TAG, "Error in i2s_set_pin");
}
ret = i2s_zero_dma_buffer((i2s_port_t)1);
if (ret != ESP_OK) {
ESP_LOGE(TAG, "Error in initializing dma buffer with 0");
}
}
#endif
static void CaptureSamples(void* arg) {
#if NO_I2S_SUPPORT
ESP_LOGE(TAG, "i2s support not available on C3 chip for IDF < 4.4.0");
return;
#else
size_t bytes_read = i2s_bytes_to_read;
uint8_t i2s_read_buffer[i2s_bytes_to_read] = {};
i2s_init();
while (1) {
/* read 100ms data at once from i2s */
i2s_read((i2s_port_t)1, (void*)i2s_read_buffer, i2s_bytes_to_read,
&bytes_read, pdMS_TO_TICKS(100));
if (bytes_read <= 0) {
ESP_LOGE(TAG, "Error in I2S read : %d", bytes_read);
} else {
if (bytes_read < i2s_bytes_to_read) {
ESP_LOGW(TAG, "Partial I2S read");
}
/* write bytes read by i2s into ring buffer */
int bytes_written = rb_write(g_audio_capture_buffer,
(uint8_t*)i2s_read_buffer, bytes_read, pdMS_TO_TICKS(100));
/* update the timestamp (in ms) to let the model know that new data has
* arrived */
g_latest_audio_timestamp = g_latest_audio_timestamp +
((1000 * (bytes_written / 2)) / kAudioSampleFrequency);
if (bytes_written <= 0) {
ESP_LOGE(TAG, "Could Not Write in Ring Buffer: %d ", bytes_written);
} else if (bytes_written < bytes_read) {
ESP_LOGW(TAG, "Partial Write");
}
}
}
#endif
vTaskDelete(NULL);
}
TfLiteStatus InitAudioRecording() {
g_audio_capture_buffer = rb_init("tf_ringbuffer", kAudioCaptureBufferSize);
if (!g_audio_capture_buffer) {
ESP_LOGE(TAG, "Error creating ring buffer");
return kTfLiteError;
}
/* create CaptureSamples Task which will get the i2s_data from mic and fill it
* in the ring buffer */
xTaskCreate(CaptureSamples, "CaptureSamples", 1024 * 32, NULL, 10, NULL);
while (!g_latest_audio_timestamp) {
vTaskDelay(1); // one tick delay to avoid watchdog
}
ESP_LOGI(TAG, "Audio Recording started");
return kTfLiteOk;
}
TfLiteStatus GetAudioSamples(int start_ms, int duration_ms,
int* audio_samples_size, int16_t** audio_samples) {
if (!g_is_audio_initialized) {
TfLiteStatus init_status = InitAudioRecording();
if (init_status != kTfLiteOk) {
return init_status;
}
g_is_audio_initialized = true;
}
/* copy 160 samples (320 bytes) into output_buff from history */
memcpy((void*)(g_audio_output_buffer), (void*)(g_history_buffer),
history_samples_to_keep * sizeof(int16_t));
/* copy 320 samples (640 bytes) from rb at ( int16_t*(g_audio_output_buffer) +
* 160 ), first 160 samples (320 bytes) will be from history */
int bytes_read =
rb_read(g_audio_capture_buffer,
((uint8_t*)(g_audio_output_buffer + history_samples_to_keep)),
new_samples_to_get * sizeof(int16_t), pdMS_TO_TICKS(100));
if (bytes_read < 0) {
ESP_LOGE(TAG, " Model Could not read data from Ring Buffer");
} else if (bytes_read < new_samples_to_get * sizeof(int16_t)) {
ESP_LOGD(TAG, "RB FILLED RIGHT NOW IS %d",
rb_filled(g_audio_capture_buffer));
ESP_LOGD(TAG, " Partial Read of Data by Model ");
ESP_LOGV(TAG, " Could only read %d bytes when required %d bytes ",
bytes_read, (int) (new_samples_to_get * sizeof(int16_t)));
}
/* copy 320 bytes from output_buff into history */
memcpy((void*)(g_history_buffer),
(void*)(g_audio_output_buffer + new_samples_to_get),
history_samples_to_keep * sizeof(int16_t));
*audio_samples_size = kMaxAudioSampleSize;
*audio_samples = g_audio_output_buffer;
return kTfLiteOk;
}
int32_t LatestAudioTimestamp() { return g_latest_audio_timestamp; }