diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c index 472fa85f87..5efaa7d1fc 100644 --- a/libavcodec/amrnbdec.c +++ b/libavcodec/amrnbdec.c @@ -181,21 +181,21 @@ static av_cold int amrnb_decode_init(AVCodecContext *avctx) for (int ch = 0; ch < avctx->channels; ch++) { AMRContext *p = &s->ch[ch]; - // p->excitation always points to the same position in p->excitation_buf - p->excitation = &p->excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1]; + // p->excitation always points to the same position in p->excitation_buf + p->excitation = &p->excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1]; - for (i = 0; i < LP_FILTER_ORDER; i++) { - p->prev_lsp_sub4[i] = lsp_sub4_init[i] * 1000 / (float)(1 << 15); - p->lsf_avg[i] = p->lsf_q[3][i] = lsp_avg_init[i] / (float)(1 << 15); - } + for (i = 0; i < LP_FILTER_ORDER; i++) { + p->prev_lsp_sub4[i] = lsp_sub4_init[i] * 1000 / (float)(1 << 15); + p->lsf_avg[i] = p->lsf_q[3][i] = lsp_avg_init[i] / (float)(1 << 15); + } - for (i = 0; i < 4; i++) - p->prediction_error[i] = MIN_ENERGY; + for (i = 0; i < 4; i++) + p->prediction_error[i] = MIN_ENERGY; - ff_acelp_filter_init(&p->acelpf_ctx); - ff_acelp_vectors_init(&p->acelpv_ctx); - ff_celp_filter_init(&p->celpf_ctx); - ff_celp_math_init(&p->celpm_ctx); + ff_acelp_filter_init(&p->acelpf_ctx); + ff_acelp_vectors_init(&p->acelpv_ctx); + ff_celp_filter_init(&p->celpf_ctx); + ff_celp_math_init(&p->celpm_ctx); } return 0; @@ -980,112 +980,112 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data, int channel_size; int i, subframe; - p->cur_frame_mode = unpack_bitstream(p, buf, buf_size); + p->cur_frame_mode = unpack_bitstream(p, buf, buf_size); channel_size = frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and +8 for TOC - if (p->cur_frame_mode == NO_DATA) { - av_log(avctx, AV_LOG_ERROR, "Corrupt bitstream\n"); - return AVERROR_INVALIDDATA; - } - if (p->cur_frame_mode == MODE_DTX) { - avpriv_report_missing_feature(avctx, "dtx mode"); - av_log(avctx, AV_LOG_INFO, "Note: libopencore_amrnb supports dtx\n"); - return AVERROR_PATCHWELCOME; - } - - if (p->cur_frame_mode == MODE_12k2) { - lsf2lsp_5(p); - } else - lsf2lsp_3(p); - - for (i = 0; i < 4; i++) - ff_acelp_lspd2lpc(p->lsp[i], p->lpc[i], 5); - - for (subframe = 0; subframe < 4; subframe++) { - const AMRNBSubframe *amr_subframe = &p->frame.subframe[subframe]; - - decode_pitch_vector(p, amr_subframe, subframe); - - decode_fixed_sparse(&fixed_sparse, amr_subframe->pulses, - p->cur_frame_mode, subframe); - - // The fixed gain (section 6.1.3) depends on the fixed vector - // (section 6.1.2), but the fixed vector calculation uses - // pitch sharpening based on the on the pitch gain (section 6.1.3). - // So the correct order is: pitch gain, pitch sharpening, fixed gain. - decode_gains(p, amr_subframe, p->cur_frame_mode, subframe, - &fixed_gain_factor); - - pitch_sharpening(p, subframe, p->cur_frame_mode, &fixed_sparse); - - if (fixed_sparse.pitch_lag == 0) { - av_log(avctx, AV_LOG_ERROR, "The file is corrupted, pitch_lag = 0 is not allowed\n"); + if (p->cur_frame_mode == NO_DATA) { + av_log(avctx, AV_LOG_ERROR, "Corrupt bitstream\n"); return AVERROR_INVALIDDATA; } - ff_set_fixed_vector(p->fixed_vector, &fixed_sparse, 1.0, - AMR_SUBFRAME_SIZE); + if (p->cur_frame_mode == MODE_DTX) { + avpriv_report_missing_feature(avctx, "dtx mode"); + av_log(avctx, AV_LOG_INFO, "Note: libopencore_amrnb supports dtx\n"); + return AVERROR_PATCHWELCOME; + } - p->fixed_gain[4] = - ff_amr_set_fixed_gain(fixed_gain_factor, - p->celpm_ctx.dot_productf(p->fixed_vector, - p->fixed_vector, - AMR_SUBFRAME_SIZE) / - AMR_SUBFRAME_SIZE, - p->prediction_error, - energy_mean[p->cur_frame_mode], energy_pred_fac); + if (p->cur_frame_mode == MODE_12k2) { + lsf2lsp_5(p); + } else + lsf2lsp_3(p); - // The excitation feedback is calculated without any processing such - // as fixed gain smoothing. This isn't mentioned in the specification. - for (i = 0; i < AMR_SUBFRAME_SIZE; i++) - p->excitation[i] *= p->pitch_gain[4]; - ff_set_fixed_vector(p->excitation, &fixed_sparse, p->fixed_gain[4], - AMR_SUBFRAME_SIZE); + for (i = 0; i < 4; i++) + ff_acelp_lspd2lpc(p->lsp[i], p->lpc[i], 5); - // In the ref decoder, excitation is stored with no fractional bits. - // This step prevents buzz in silent periods. The ref encoder can - // emit long sequences with pitch factor greater than one. This - // creates unwanted feedback if the excitation vector is nonzero. - // (e.g. test sequence T19_795.COD in 3GPP TS 26.074) - for (i = 0; i < AMR_SUBFRAME_SIZE; i++) - p->excitation[i] = truncf(p->excitation[i]); + for (subframe = 0; subframe < 4; subframe++) { + const AMRNBSubframe *amr_subframe = &p->frame.subframe[subframe]; - // Smooth fixed gain. - // The specification is ambiguous, but in the reference source, the - // smoothed value is NOT fed back into later fixed gain smoothing. - synth_fixed_gain = fixed_gain_smooth(p, p->lsf_q[subframe], - p->lsf_avg, p->cur_frame_mode); + decode_pitch_vector(p, amr_subframe, subframe); - synth_fixed_vector = anti_sparseness(p, &fixed_sparse, p->fixed_vector, - synth_fixed_gain, spare_vector); + decode_fixed_sparse(&fixed_sparse, amr_subframe->pulses, + p->cur_frame_mode, subframe); - if (synthesis(p, p->lpc[subframe], synth_fixed_gain, - synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 0)) - // overflow detected -> rerun synthesis scaling pitch vector down - // by a factor of 4, skipping pitch vector contribution emphasis - // and adaptive gain control - synthesis(p, p->lpc[subframe], synth_fixed_gain, - synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 1); + // The fixed gain (section 6.1.3) depends on the fixed vector + // (section 6.1.2), but the fixed vector calculation uses + // pitch sharpening based on the on the pitch gain (section 6.1.3). + // So the correct order is: pitch gain, pitch sharpening, fixed gain. + decode_gains(p, amr_subframe, p->cur_frame_mode, subframe, + &fixed_gain_factor); - postfilter(p, p->lpc[subframe], buf_out + subframe * AMR_SUBFRAME_SIZE); + pitch_sharpening(p, subframe, p->cur_frame_mode, &fixed_sparse); - // update buffers and history - ff_clear_fixed_vector(p->fixed_vector, &fixed_sparse, AMR_SUBFRAME_SIZE); - update_state(p); - } + if (fixed_sparse.pitch_lag == 0) { + av_log(avctx, AV_LOG_ERROR, "The file is corrupted, pitch_lag = 0 is not allowed\n"); + return AVERROR_INVALIDDATA; + } + ff_set_fixed_vector(p->fixed_vector, &fixed_sparse, 1.0, + AMR_SUBFRAME_SIZE); - p->acelpf_ctx.acelp_apply_order_2_transfer_function(buf_out, - buf_out, highpass_zeros, - highpass_poles, - highpass_gain * AMR_SAMPLE_SCALE, - p->high_pass_mem, AMR_BLOCK_SIZE); + p->fixed_gain[4] = + ff_amr_set_fixed_gain(fixed_gain_factor, + p->celpm_ctx.dot_productf(p->fixed_vector, + p->fixed_vector, + AMR_SUBFRAME_SIZE) / + AMR_SUBFRAME_SIZE, + p->prediction_error, + energy_mean[p->cur_frame_mode], energy_pred_fac); - /* Update averaged lsf vector (used for fixed gain smoothing). - * - * Note that lsf_avg should not incorporate the current frame's LSFs - * for fixed_gain_smooth. - * The specification has an incorrect formula: the reference decoder uses - * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */ - p->acelpv_ctx.weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3], - 0.84, 0.16, LP_FILTER_ORDER); + // The excitation feedback is calculated without any processing such + // as fixed gain smoothing. This isn't mentioned in the specification. + for (i = 0; i < AMR_SUBFRAME_SIZE; i++) + p->excitation[i] *= p->pitch_gain[4]; + ff_set_fixed_vector(p->excitation, &fixed_sparse, p->fixed_gain[4], + AMR_SUBFRAME_SIZE); + + // In the ref decoder, excitation is stored with no fractional bits. + // This step prevents buzz in silent periods. The ref encoder can + // emit long sequences with pitch factor greater than one. This + // creates unwanted feedback if the excitation vector is nonzero. + // (e.g. test sequence T19_795.COD in 3GPP TS 26.074) + for (i = 0; i < AMR_SUBFRAME_SIZE; i++) + p->excitation[i] = truncf(p->excitation[i]); + + // Smooth fixed gain. + // The specification is ambiguous, but in the reference source, the + // smoothed value is NOT fed back into later fixed gain smoothing. + synth_fixed_gain = fixed_gain_smooth(p, p->lsf_q[subframe], + p->lsf_avg, p->cur_frame_mode); + + synth_fixed_vector = anti_sparseness(p, &fixed_sparse, p->fixed_vector, + synth_fixed_gain, spare_vector); + + if (synthesis(p, p->lpc[subframe], synth_fixed_gain, + synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 0)) + // overflow detected -> rerun synthesis scaling pitch vector down + // by a factor of 4, skipping pitch vector contribution emphasis + // and adaptive gain control + synthesis(p, p->lpc[subframe], synth_fixed_gain, + synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 1); + + postfilter(p, p->lpc[subframe], buf_out + subframe * AMR_SUBFRAME_SIZE); + + // update buffers and history + ff_clear_fixed_vector(p->fixed_vector, &fixed_sparse, AMR_SUBFRAME_SIZE); + update_state(p); + } + + p->acelpf_ctx.acelp_apply_order_2_transfer_function(buf_out, + buf_out, highpass_zeros, + highpass_poles, + highpass_gain * AMR_SAMPLE_SCALE, + p->high_pass_mem, AMR_BLOCK_SIZE); + + /* Update averaged lsf vector (used for fixed gain smoothing). + * + * Note that lsf_avg should not incorporate the current frame's LSFs + * for fixed_gain_smooth. + * The specification has an incorrect formula: the reference decoder uses + * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */ + p->acelpv_ctx.weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3], + 0.84, 0.16, LP_FILTER_ORDER); buf += channel_size; buf_size -= channel_size; } diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c index 998dd82791..3ff4077f7f 100644 --- a/libavcodec/amrwbdec.c +++ b/libavcodec/amrwbdec.c @@ -116,23 +116,23 @@ static av_cold int amrwb_decode_init(AVCodecContext *avctx) avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; for (int ch = 0; ch < avctx->channels; ch++) { - AMRWBContext *ctx = &s->ch[ch]; + AMRWBContext *ctx = &s->ch[ch]; - av_lfg_init(&ctx->prng, 1); + av_lfg_init(&ctx->prng, 1); - ctx->excitation = &ctx->excitation_buf[AMRWB_P_DELAY_MAX + LP_ORDER + 1]; - ctx->first_frame = 1; + ctx->excitation = &ctx->excitation_buf[AMRWB_P_DELAY_MAX + LP_ORDER + 1]; + ctx->first_frame = 1; - for (i = 0; i < LP_ORDER; i++) - ctx->isf_past_final[i] = isf_init[i] * (1.0f / (1 << 15)); + for (i = 0; i < LP_ORDER; i++) + ctx->isf_past_final[i] = isf_init[i] * (1.0f / (1 << 15)); - for (i = 0; i < 4; i++) - ctx->prediction_error[i] = MIN_ENERGY; + for (i = 0; i < 4; i++) + ctx->prediction_error[i] = MIN_ENERGY; - ff_acelp_filter_init(&ctx->acelpf_ctx); - ff_acelp_vectors_init(&ctx->acelpv_ctx); - ff_celp_filter_init(&ctx->celpf_ctx); - ff_celp_math_init(&ctx->celpm_ctx); + ff_acelp_filter_init(&ctx->acelpf_ctx); + ff_acelp_vectors_init(&ctx->acelpv_ctx); + ff_celp_filter_init(&ctx->celpf_ctx); + ff_celp_math_init(&ctx->celpm_ctx); } return 0; @@ -1116,172 +1116,172 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, return ret; for (int ch = 0; ch < avctx->channels; ch++) { - AMRWBContext *ctx = &s->ch[ch]; - AMRWBFrame *cf = &ctx->frame; - int expected_fr_size, header_size; - float spare_vector[AMRWB_SFR_SIZE]; // extra stack space to hold result from anti-sparseness processing - float fixed_gain_factor; // fixed gain correction factor (gamma) - float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use - float synth_fixed_gain; // the fixed gain that synthesis should use - float voice_fac, stab_fac; // parameters used for gain smoothing - float synth_exc[AMRWB_SFR_SIZE]; // post-processed excitation for synthesis - float hb_exc[AMRWB_SFR_SIZE_16k]; // excitation for the high frequency band - float hb_samples[AMRWB_SFR_SIZE_16k]; // filtered high-band samples from synthesis - float hb_gain; - float *buf_out = (float *)frame->extended_data[ch]; + AMRWBContext *ctx = &s->ch[ch]; + AMRWBFrame *cf = &ctx->frame; + int expected_fr_size, header_size; + float spare_vector[AMRWB_SFR_SIZE]; // extra stack space to hold result from anti-sparseness processing + float fixed_gain_factor; // fixed gain correction factor (gamma) + float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use + float synth_fixed_gain; // the fixed gain that synthesis should use + float voice_fac, stab_fac; // parameters used for gain smoothing + float synth_exc[AMRWB_SFR_SIZE]; // post-processed excitation for synthesis + float hb_exc[AMRWB_SFR_SIZE_16k]; // excitation for the high frequency band + float hb_samples[AMRWB_SFR_SIZE_16k]; // filtered high-band samples from synthesis + float hb_gain; + float *buf_out = (float *)frame->extended_data[ch]; - header_size = decode_mime_header(ctx, buf); - expected_fr_size = ((cf_sizes_wb[ctx->fr_cur_mode] + 7) >> 3) + 1; + header_size = decode_mime_header(ctx, buf); + expected_fr_size = ((cf_sizes_wb[ctx->fr_cur_mode] + 7) >> 3) + 1; - if (!ctx->fr_quality) - av_log(avctx, AV_LOG_ERROR, "Encountered a bad or corrupted frame\n"); + if (!ctx->fr_quality) + av_log(avctx, AV_LOG_ERROR, "Encountered a bad or corrupted frame\n"); - if (ctx->fr_cur_mode == NO_DATA || !ctx->fr_quality) { - /* The specification suggests a "random signal" and - "a muting technique" to "gradually decrease the output level". */ - av_samples_set_silence(&frame->extended_data[ch], 0, frame->nb_samples, 1, AV_SAMPLE_FMT_FLT); - buf += expected_fr_size; - buf_size -= expected_fr_size; - continue; - } - if (ctx->fr_cur_mode > MODE_SID) { - av_log(avctx, AV_LOG_ERROR, - "Invalid mode %d\n", ctx->fr_cur_mode); - return AVERROR_INVALIDDATA; - } - - if (buf_size < expected_fr_size) { - av_log(avctx, AV_LOG_ERROR, - "Frame too small (%d bytes). Truncated file?\n", buf_size); - *got_frame_ptr = 0; - return AVERROR_INVALIDDATA; - } - - if (ctx->fr_cur_mode == MODE_SID) { /* Comfort noise frame */ - avpriv_request_sample(avctx, "SID mode"); - return AVERROR_PATCHWELCOME; - } - - ff_amr_bit_reorder((uint16_t *) &ctx->frame, sizeof(AMRWBFrame), - buf + header_size, amr_bit_orderings_by_mode[ctx->fr_cur_mode]); - - /* Decode the quantized ISF vector */ - if (ctx->fr_cur_mode == MODE_6k60) { - decode_isf_indices_36b(cf->isp_id, ctx->isf_cur); - } else { - decode_isf_indices_46b(cf->isp_id, ctx->isf_cur); - } - - isf_add_mean_and_past(ctx->isf_cur, ctx->isf_q_past); - ff_set_min_dist_lsf(ctx->isf_cur, MIN_ISF_SPACING, LP_ORDER - 1); - - stab_fac = stability_factor(ctx->isf_cur, ctx->isf_past_final); - - ctx->isf_cur[LP_ORDER - 1] *= 2.0; - ff_acelp_lsf2lspd(ctx->isp[3], ctx->isf_cur, LP_ORDER); - - /* Generate a ISP vector for each subframe */ - if (ctx->first_frame) { - ctx->first_frame = 0; - memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(double)); - } - interpolate_isp(ctx->isp, ctx->isp_sub4_past); - - for (sub = 0; sub < 4; sub++) - ff_amrwb_lsp2lpc(ctx->isp[sub], ctx->lp_coef[sub], LP_ORDER); - - for (sub = 0; sub < 4; sub++) { - const AMRWBSubFrame *cur_subframe = &cf->subframe[sub]; - float *sub_buf = buf_out + sub * AMRWB_SFR_SIZE_16k; - - /* Decode adaptive codebook (pitch vector) */ - decode_pitch_vector(ctx, cur_subframe, sub); - /* Decode innovative codebook (fixed vector) */ - decode_fixed_vector(ctx->fixed_vector, cur_subframe->pul_ih, - cur_subframe->pul_il, ctx->fr_cur_mode); - - pitch_sharpening(ctx, ctx->fixed_vector); - - decode_gains(cur_subframe->vq_gain, ctx->fr_cur_mode, - &fixed_gain_factor, &ctx->pitch_gain[0]); - - ctx->fixed_gain[0] = - ff_amr_set_fixed_gain(fixed_gain_factor, - ctx->celpm_ctx.dot_productf(ctx->fixed_vector, - ctx->fixed_vector, - AMRWB_SFR_SIZE) / - AMRWB_SFR_SIZE, - ctx->prediction_error, - ENERGY_MEAN, energy_pred_fac); - - /* Calculate voice factor and store tilt for next subframe */ - voice_fac = voice_factor(ctx->pitch_vector, ctx->pitch_gain[0], - ctx->fixed_vector, ctx->fixed_gain[0], - &ctx->celpm_ctx); - ctx->tilt_coef = voice_fac * 0.25 + 0.25; - - /* Construct current excitation */ - for (i = 0; i < AMRWB_SFR_SIZE; i++) { - ctx->excitation[i] *= ctx->pitch_gain[0]; - ctx->excitation[i] += ctx->fixed_gain[0] * ctx->fixed_vector[i]; - ctx->excitation[i] = truncf(ctx->excitation[i]); + if (ctx->fr_cur_mode == NO_DATA || !ctx->fr_quality) { + /* The specification suggests a "random signal" and + "a muting technique" to "gradually decrease the output level". */ + av_samples_set_silence(&frame->extended_data[ch], 0, frame->nb_samples, 1, AV_SAMPLE_FMT_FLT); + buf += expected_fr_size; + buf_size -= expected_fr_size; + continue; + } + if (ctx->fr_cur_mode > MODE_SID) { + av_log(avctx, AV_LOG_ERROR, + "Invalid mode %d\n", ctx->fr_cur_mode); + return AVERROR_INVALIDDATA; } - /* Post-processing of excitation elements */ - synth_fixed_gain = noise_enhancer(ctx->fixed_gain[0], &ctx->prev_tr_gain, - voice_fac, stab_fac); + if (buf_size < expected_fr_size) { + av_log(avctx, AV_LOG_ERROR, + "Frame too small (%d bytes). Truncated file?\n", buf_size); + *got_frame_ptr = 0; + return AVERROR_INVALIDDATA; + } - synth_fixed_vector = anti_sparseness(ctx, ctx->fixed_vector, - spare_vector); + if (ctx->fr_cur_mode == MODE_SID) { /* Comfort noise frame */ + avpriv_request_sample(avctx, "SID mode"); + return AVERROR_PATCHWELCOME; + } - pitch_enhancer(synth_fixed_vector, voice_fac); + ff_amr_bit_reorder((uint16_t *) &ctx->frame, sizeof(AMRWBFrame), + buf + header_size, amr_bit_orderings_by_mode[ctx->fr_cur_mode]); - synthesis(ctx, ctx->lp_coef[sub], synth_exc, synth_fixed_gain, - synth_fixed_vector, &ctx->samples_az[LP_ORDER]); + /* Decode the quantized ISF vector */ + if (ctx->fr_cur_mode == MODE_6k60) { + decode_isf_indices_36b(cf->isp_id, ctx->isf_cur); + } else { + decode_isf_indices_46b(cf->isp_id, ctx->isf_cur); + } - /* Synthesis speech post-processing */ - de_emphasis(&ctx->samples_up[UPS_MEM_SIZE], - &ctx->samples_az[LP_ORDER], PREEMPH_FAC, ctx->demph_mem); + isf_add_mean_and_past(ctx->isf_cur, ctx->isf_q_past); + ff_set_min_dist_lsf(ctx->isf_cur, MIN_ISF_SPACING, LP_ORDER - 1); - ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE], - &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_31_poles, - hpf_31_gain, ctx->hpf_31_mem, AMRWB_SFR_SIZE); + stab_fac = stability_factor(ctx->isf_cur, ctx->isf_past_final); - upsample_5_4(sub_buf, &ctx->samples_up[UPS_FIR_SIZE], - AMRWB_SFR_SIZE_16k, &ctx->celpm_ctx); + ctx->isf_cur[LP_ORDER - 1] *= 2.0; + ff_acelp_lsf2lspd(ctx->isp[3], ctx->isf_cur, LP_ORDER); - /* High frequency band (6.4 - 7.0 kHz) generation part */ - ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(hb_samples, - &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_400_poles, - hpf_400_gain, ctx->hpf_400_mem, AMRWB_SFR_SIZE); + /* Generate a ISP vector for each subframe */ + if (ctx->first_frame) { + ctx->first_frame = 0; + memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(double)); + } + interpolate_isp(ctx->isp, ctx->isp_sub4_past); - hb_gain = find_hb_gain(ctx, hb_samples, - cur_subframe->hb_gain, cf->vad); + for (sub = 0; sub < 4; sub++) + ff_amrwb_lsp2lpc(ctx->isp[sub], ctx->lp_coef[sub], LP_ORDER); - scaled_hb_excitation(ctx, hb_exc, synth_exc, hb_gain); + for (sub = 0; sub < 4; sub++) { + const AMRWBSubFrame *cur_subframe = &cf->subframe[sub]; + float *sub_buf = buf_out + sub * AMRWB_SFR_SIZE_16k; - hb_synthesis(ctx, sub, &ctx->samples_hb[LP_ORDER_16k], - hb_exc, ctx->isf_cur, ctx->isf_past_final); + /* Decode adaptive codebook (pitch vector) */ + decode_pitch_vector(ctx, cur_subframe, sub); + /* Decode innovative codebook (fixed vector) */ + decode_fixed_vector(ctx->fixed_vector, cur_subframe->pul_ih, + cur_subframe->pul_il, ctx->fr_cur_mode); - /* High-band post-processing filters */ - hb_fir_filter(hb_samples, bpf_6_7_coef, ctx->bpf_6_7_mem, - &ctx->samples_hb[LP_ORDER_16k]); + pitch_sharpening(ctx, ctx->fixed_vector); - if (ctx->fr_cur_mode == MODE_23k85) - hb_fir_filter(hb_samples, lpf_7_coef, ctx->lpf_7_mem, - hb_samples); + decode_gains(cur_subframe->vq_gain, ctx->fr_cur_mode, + &fixed_gain_factor, &ctx->pitch_gain[0]); - /* Add the low and high frequency bands */ - for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) - sub_buf[i] = (sub_buf[i] + hb_samples[i]) * (1.0f / (1 << 15)); + ctx->fixed_gain[0] = + ff_amr_set_fixed_gain(fixed_gain_factor, + ctx->celpm_ctx.dot_productf(ctx->fixed_vector, + ctx->fixed_vector, + AMRWB_SFR_SIZE) / + AMRWB_SFR_SIZE, + ctx->prediction_error, + ENERGY_MEAN, energy_pred_fac); - /* Update buffers and history */ - update_sub_state(ctx); - } + /* Calculate voice factor and store tilt for next subframe */ + voice_fac = voice_factor(ctx->pitch_vector, ctx->pitch_gain[0], + ctx->fixed_vector, ctx->fixed_gain[0], + &ctx->celpm_ctx); + ctx->tilt_coef = voice_fac * 0.25 + 0.25; - /* update state for next frame */ - memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(ctx->isp[3][0])); - memcpy(ctx->isf_past_final, ctx->isf_cur, LP_ORDER * sizeof(float)); + /* Construct current excitation */ + for (i = 0; i < AMRWB_SFR_SIZE; i++) { + ctx->excitation[i] *= ctx->pitch_gain[0]; + ctx->excitation[i] += ctx->fixed_gain[0] * ctx->fixed_vector[i]; + ctx->excitation[i] = truncf(ctx->excitation[i]); + } + + /* Post-processing of excitation elements */ + synth_fixed_gain = noise_enhancer(ctx->fixed_gain[0], &ctx->prev_tr_gain, + voice_fac, stab_fac); + + synth_fixed_vector = anti_sparseness(ctx, ctx->fixed_vector, + spare_vector); + + pitch_enhancer(synth_fixed_vector, voice_fac); + + synthesis(ctx, ctx->lp_coef[sub], synth_exc, synth_fixed_gain, + synth_fixed_vector, &ctx->samples_az[LP_ORDER]); + + /* Synthesis speech post-processing */ + de_emphasis(&ctx->samples_up[UPS_MEM_SIZE], + &ctx->samples_az[LP_ORDER], PREEMPH_FAC, ctx->demph_mem); + + ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE], + &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_31_poles, + hpf_31_gain, ctx->hpf_31_mem, AMRWB_SFR_SIZE); + + upsample_5_4(sub_buf, &ctx->samples_up[UPS_FIR_SIZE], + AMRWB_SFR_SIZE_16k, &ctx->celpm_ctx); + + /* High frequency band (6.4 - 7.0 kHz) generation part */ + ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(hb_samples, + &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_400_poles, + hpf_400_gain, ctx->hpf_400_mem, AMRWB_SFR_SIZE); + + hb_gain = find_hb_gain(ctx, hb_samples, + cur_subframe->hb_gain, cf->vad); + + scaled_hb_excitation(ctx, hb_exc, synth_exc, hb_gain); + + hb_synthesis(ctx, sub, &ctx->samples_hb[LP_ORDER_16k], + hb_exc, ctx->isf_cur, ctx->isf_past_final); + + /* High-band post-processing filters */ + hb_fir_filter(hb_samples, bpf_6_7_coef, ctx->bpf_6_7_mem, + &ctx->samples_hb[LP_ORDER_16k]); + + if (ctx->fr_cur_mode == MODE_23k85) + hb_fir_filter(hb_samples, lpf_7_coef, ctx->lpf_7_mem, + hb_samples); + + /* Add the low and high frequency bands */ + for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) + sub_buf[i] = (sub_buf[i] + hb_samples[i]) * (1.0f / (1 << 15)); + + /* Update buffers and history */ + update_sub_state(ctx); + } + + /* update state for next frame */ + memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(ctx->isp[3][0])); + memcpy(ctx->isf_past_final, ctx->isf_cur, LP_ORDER * sizeof(float)); buf += expected_fr_size; buf_size -= expected_fr_size;