blob: a4ad6b299b844df7b92ad279122a290d7e61e9a4 [file] [log] [blame]
/*
* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
* * Neither the name of The Linux Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Changes from Qualcomm Innovation Center are provided under the following license:
* Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
* SPDX-License-Identifier: BSD-3-Clause-Clear
*/
#define LOG_TAG "PAL: HotwordInterface"
#include "HotwordInterface.h"
HotwordInterface::HotwordInterface(std::shared_ptr<VUIStreamConfig> sm_cfg) {
sm_cfg_ = sm_cfg;
hist_duration_ = 0;
preroll_duration_ = 0;
custom_event_ = nullptr;
custom_event_size_ = 0;
}
HotwordInterface::~HotwordInterface() {
if (custom_event_)
free(custom_event_);
}
int32_t HotwordInterface::ParseSoundModel(std::shared_ptr<VUIStreamConfig> sm_cfg,
struct pal_st_sound_model *sound_model,
st_module_type_t &first_stage_type,
std::vector<sm_pair_t> &model_list) {
int32_t status = 0;
struct pal_st_phrase_sound_model *phrase_sm = nullptr;
struct pal_st_sound_model *common_sm = nullptr;
uint8_t *ptr = nullptr;
uint8_t *sm_data = nullptr;
int32_t sm_size = 0;
PAL_DBG(LOG_TAG, "Enter");
if (sound_model->type == PAL_SOUND_MODEL_TYPE_KEYPHRASE) {
// handle for phrase sound model
phrase_sm = (struct pal_st_phrase_sound_model *)sound_model;
first_stage_type = sm_cfg->GetVUIModuleType();
sm_size = phrase_sm->common.data_size;
sm_data = (uint8_t *)calloc(1, sm_size);
if (!sm_data) {
PAL_ERR(LOG_TAG, "Failed to allocate memory for sm_data");
status = -ENOMEM;
goto error_exit;
}
ptr = (uint8_t*)phrase_sm + phrase_sm->common.data_offset;
ar_mem_cpy(sm_data, sm_size, ptr, sm_size);
model_list.push_back(std::make_pair(ST_SM_ID_SVA_F_STAGE_GMM,
std::make_pair(sm_data, sm_size)));
} else if (sound_model->type == PAL_SOUND_MODEL_TYPE_GENERIC) {
// handle for generic sound model
first_stage_type = sm_cfg->GetVUIModuleType();
common_sm = sound_model;
sm_size = common_sm->data_size;
sm_data = (uint8_t *)calloc(1, sm_size);
if (!sm_data) {
PAL_ERR(LOG_TAG, "Failed to allocate memory for sm_data");
status = -ENOMEM;
goto error_exit;
}
ptr = (uint8_t*)common_sm + common_sm->data_offset;
ar_mem_cpy(sm_data, sm_size, ptr, sm_size);
model_list.push_back(std::make_pair(ST_SM_ID_SVA_F_STAGE_GMM,
std::make_pair(sm_data, sm_size)));
}
PAL_DBG(LOG_TAG, "Exit, status %d", status);
return status;
error_exit:
// clean up memory added to model_list in failure case
for (auto iter = model_list.begin(); iter != model_list.end(); iter++) {
if ((*iter).second.first)
free((*iter).second.first);
}
model_list.clear();
PAL_DBG(LOG_TAG, "Exit, status %d", status);
return status;
}
int32_t HotwordInterface::ParseRecognitionConfig(Stream *s,
struct pal_st_recognition_config *config) {
int32_t status = 0;
struct sound_model_info *sm_info = nullptr;
uint32_t hist_buffer_duration = 0;
uint32_t pre_roll_duration = 0;
if (sm_info_map_.find(s) != sm_info_map_.end() && sm_info_map_[s]) {
sm_info = sm_info_map_[s];
sm_info->rec_config = config;
} else {
PAL_ERR(LOG_TAG, "Stream not registered to interface");
return -EINVAL;
}
PAL_DBG(LOG_TAG, "Enter");
if (!config) {
PAL_ERR(LOG_TAG, "Invalid config");
status = -EINVAL;
goto exit;
}
// get history buffer duration from sound trigger platform xml
hist_buffer_duration = sm_cfg_->GetKwDuration();
pre_roll_duration = 0;
sm_info_map_[s]->hist_buffer_duration = hist_buffer_duration;
sm_info_map_[s]->pre_roll_duration = pre_roll_duration;
exit:
PAL_DBG(LOG_TAG, "Exit, status %d", status);
return status;
}
void HotwordInterface::GetBufferingConfigs(Stream *s,
uint32_t *hist_duration,
uint32_t *preroll_duration) {
if (sm_info_map_.find(s) != sm_info_map_.end() && sm_info_map_[s]) {
*hist_duration = sm_info_map_[s]->hist_buffer_duration;
*preroll_duration = sm_info_map_[s]->pre_roll_duration;
} else {
PAL_ERR(LOG_TAG, "Stream not registered to interface");
}
}
int32_t HotwordInterface::ParseDetectionPayload(void *event, uint32_t size) {
int32_t status = 0;
if (!event || size == 0) {
PAL_ERR(LOG_TAG, "Invalid detection payload");
return -EINVAL;
}
custom_event_ = (uint8_t *)realloc(custom_event_, size);
if (!custom_event_) {
PAL_ERR(LOG_TAG, "Failed to allocate memory for detection payload");
return -ENOMEM;
}
ar_mem_cpy(custom_event_, size, event, size);
custom_event_size_ = size;
return status;
}
Stream* HotwordInterface::GetDetectedStream() {
PAL_DBG(LOG_TAG, "Enter");
if (sm_info_map_.empty()) {
PAL_ERR(LOG_TAG, "Unexpected, No streams attached to engine!");
return nullptr;
} else if (sm_info_map_.size() == 1) {
return sm_info_map_.begin()->first;
} else {
PAL_ERR(LOG_TAG, "Only single hotword usecase is supported");
return nullptr;
}
}
int32_t HotwordInterface::GenerateCallbackEvent(Stream *s,
struct pal_st_recognition_event **event,
uint32_t *size, bool detection) {
struct sound_model_info *sm_info = nullptr;
struct pal_st_phrase_recognition_event *phrase_event = nullptr;
struct pal_st_generic_recognition_event *generic_event = nullptr;
struct pal_stream_attributes strAttr;
size_t event_size = 0;
uint8_t *opaque_data = nullptr;
int32_t status = 0;
if (sm_info_map_.find(s) != sm_info_map_.end() && sm_info_map_[s]) {
sm_info = sm_info_map_[s];
} else {
PAL_ERR(LOG_TAG, "Stream not registered to interface");
return -EINVAL;
}
status = s->getStreamAttributes(&strAttr);
PAL_DBG(LOG_TAG, "Enter");
*event = nullptr;
if (sm_info->type == PAL_SOUND_MODEL_TYPE_KEYPHRASE) {
event_size = sizeof(struct pal_st_phrase_recognition_event) +
custom_event_size_;
// event allocated will be used/deallocated in stream side
phrase_event = (struct pal_st_phrase_recognition_event *)
calloc(1, event_size);
if (!phrase_event) {
PAL_ERR(LOG_TAG, "Failed to alloc memory for recognition event");
status = -ENOMEM;
goto exit;
}
phrase_event->num_phrases = sm_info->rec_config->num_phrases;
memcpy(phrase_event->phrase_extras, sm_info->rec_config->phrases,
phrase_event->num_phrases *
sizeof(struct pal_st_phrase_recognition_extra));
*event = &(phrase_event->common);
(*event)->status = detection ? PAL_RECOGNITION_STATUS_SUCCESS :
PAL_RECOGNITION_STATUS_FAILURE;
(*event)->type = sm_info->type;
(*event)->st_handle = (pal_st_handle_t *)this;
(*event)->capture_available = sm_info->rec_config->capture_requested;
// TODO: generate capture session
(*event)->capture_session = 0;
(*event)->capture_delay_ms = 0;
(*event)->capture_preamble_ms = 0;
(*event)->trigger_in_data = true;
(*event)->data_size = custom_event_size_;
(*event)->data_offset = sizeof(struct pal_st_phrase_recognition_event);
(*event)->media_config.sample_rate =
strAttr.in_media_config.sample_rate;
(*event)->media_config.bit_width =
strAttr.in_media_config.bit_width;
(*event)->media_config.ch_info.channels =
strAttr.in_media_config.ch_info.channels;
(*event)->media_config.aud_fmt_id = PAL_AUDIO_FMT_PCM_S16_LE;
// Filling Opaque data
opaque_data = (uint8_t *)phrase_event +
phrase_event->common.data_offset;
ar_mem_cpy(opaque_data, custom_event_size_, custom_event_, custom_event_size_);
} else if (sm_info->type == PAL_SOUND_MODEL_TYPE_GENERIC) {
event_size = sizeof(struct pal_st_generic_recognition_event) +
custom_event_size_;
// event allocated will be used/deallocated in stream side
generic_event = (struct pal_st_generic_recognition_event *)
calloc(1, event_size);
if (!generic_event) {
PAL_ERR(LOG_TAG, "Failed to alloc memory for recognition event");
status = -ENOMEM;
goto exit;
}
*event = &(generic_event->common);
(*event)->status = PAL_RECOGNITION_STATUS_SUCCESS;
(*event)->type = sm_info->type;
(*event)->st_handle = (pal_st_handle_t *)this;
(*event)->capture_available = sm_info->rec_config->capture_requested;
// TODO: generate capture session
(*event)->capture_session = 0;
(*event)->capture_delay_ms = 0;
(*event)->capture_preamble_ms = 0;
(*event)->trigger_in_data = true;
(*event)->data_size = custom_event_size_;
(*event)->data_offset = sizeof(struct pal_st_generic_recognition_event);
(*event)->media_config.sample_rate =
strAttr.in_media_config.sample_rate;
(*event)->media_config.bit_width =
strAttr.in_media_config.bit_width;
(*event)->media_config.ch_info.channels =
strAttr.in_media_config.ch_info.channels;
(*event)->media_config.aud_fmt_id = PAL_AUDIO_FMT_PCM_S16_LE;
// Filling Opaque data
opaque_data = (uint8_t *)generic_event +
generic_event->common.data_offset;
ar_mem_cpy(opaque_data, custom_event_size_, custom_event_, custom_event_size_);
}
*size = event_size;
exit:
PAL_DBG(LOG_TAG, "Exit");
return status;
}