aacenc: reorder coding tools

This commit reorders the coding tools such that they're doing what
the decoder does in reverse order. The very first thing the decoder
does is to decode M/S stereo if that's signalled, then prediction,
IS, and finally TNS and PNS in another function.
adjust_frame_information()'s application of IS and M/S was taken
out into two separate functions since prediction doesn't expect
to get the raw coefficients but rathe the coefficients at that
part of the encoding process.

The results show a much better PSNR when any combination of
Intensity Stereo, Mid/Side stereo and Prediction is used, which
is a sign of an increased encoder efficiency as well as the fact
that the decoder gets what it expects.

Otherwise, with only IS, PNS or prediction there are neither
regressions nor improvements except in the case of IS, which
now by itself (or with PNS) is less prone to artifacts. Enabling
M/S (using stereo_mode) as well will also reduce stereo artifacts
induced by IS, so in the very near future M/S may be enabled
by default.

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
This commit is contained in:
Rostislav Pehlivanov 2015-09-02 06:26:45 +01:00
parent a4055d3e5d
commit 20dc527139

View File

@ -196,37 +196,6 @@ static void adjust_frame_information(ChannelElement *cpe, int chans)
{
int i, w, w2, g, ch;
int maxsfb, cmaxsfb;
IndividualChannelStream *ics;
if (cpe->common_window) {
ics = &cpe->ch[0].ics;
for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
for (w2 = 0; w2 < ics->group_len[w]; w2++) {
int start = (w+w2) * 128;
for (g = 0; g < ics->num_swb; g++) {
//apply Intensity stereo coeffs transformation
if (cpe->is_mask[w*16 + g]) {
int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
float scale = cpe->ch[0].is_ener[w*16+g];
for (i = 0; i < ics->swb_sizes[g]; i++) {
cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i]) * scale;
cpe->ch[1].coeffs[start+i] = 0.0f;
}
} else if (cpe->ms_mask[w*16 + g] &&
cpe->ch[0].band_type[w*16 + g] < NOISE_BT &&
cpe->ch[1].band_type[w*16 + g] < NOISE_BT) {
for (i = 0; i < ics->swb_sizes[g]; i++) {
float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
float R = L - cpe->ch[1].coeffs[start+i];
cpe->ch[0].coeffs[start+i] = L;
cpe->ch[1].coeffs[start+i] = R;
}
}
start += ics->swb_sizes[g];
}
}
}
}
for (ch = 0; ch < chans; ch++) {
IndividualChannelStream *ics = &cpe->ch[ch].ics;
@ -273,6 +242,59 @@ static void adjust_frame_information(ChannelElement *cpe, int chans)
}
}
static void apply_intensity_stereo(ChannelElement *cpe)
{
int w, w2, g, i;
IndividualChannelStream *ics = &cpe->ch[0].ics;
if (!cpe->common_window)
return;
for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
for (w2 = 0; w2 < ics->group_len[w]; w2++) {
int start = (w+w2) * 128;
for (g = 0; g < ics->num_swb; g++) {
int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
float scale = cpe->ch[0].is_ener[w*16+g];
if (!cpe->is_mask[w*16 + g]) {
start += ics->swb_sizes[g];
continue;
}
for (i = 0; i < ics->swb_sizes[g]; i++) {
float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
cpe->ch[0].coeffs[start+i] = sum;
cpe->ch[1].coeffs[start+i] = 0.0f;
}
start += ics->swb_sizes[g];
}
}
}
}
static void apply_mid_side_stereo(ChannelElement *cpe)
{
int w, w2, g, i;
IndividualChannelStream *ics = &cpe->ch[0].ics;
if (!cpe->common_window)
return;
for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
for (w2 = 0; w2 < ics->group_len[w]; w2++) {
int start = (w+w2) * 128;
for (g = 0; g < ics->num_swb; g++) {
if (!cpe->ms_mask[w*16 + g]) {
start += ics->swb_sizes[g];
continue;
}
for (i = 0; i < ics->swb_sizes[g]; i++) {
float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
float R = L - cpe->ch[1].coeffs[start+i];
cpe->ch[0].coeffs[start+i] = L;
cpe->ch[1].coeffs[start+i] = R;
}
start += ics->swb_sizes[g];
}
}
}
}
/**
* Encode scalefactor band coding type.
*/
@ -280,6 +302,9 @@ static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
{
int w;
if (s->coder->set_special_band_scalefactors)
s->coder->set_special_band_scalefactors(s, sce);
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
}
@ -464,7 +489,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
float **samples = s->planar_samples, *samples2, *la, *overlap;
ChannelElement *cpe;
SingleChannelElement *sce;
int i, ch, w, g, chans, tag, start_ch, ret;
int i, ch, w, chans, tag, start_ch, ret;
int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
int chan_el_counter[4];
FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
@ -603,7 +628,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
}
}
}
for (ch = 0; ch < chans; ch++) {
for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
sce = &cpe->ch[ch];
s->cur_channel = start_ch + ch;
if (s->options.pns && s->coder->search_for_pns)
@ -616,40 +641,40 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
tns_mode = 1;
}
s->cur_channel = start_ch;
if (s->options.stereo_mode && cpe->common_window) {
if (s->options.stereo_mode > 0) {
IndividualChannelStream *ics = &cpe->ch[0].ics;
for (w = 0; w < ics->num_windows; w += ics->group_len[w])
for (g = 0; g < ics->num_swb; g++)
cpe->ms_mask[w*16+g] = 1;
} else if (s->coder->search_for_ms) {
s->coder->search_for_ms(s, cpe);
}
}
if (s->options.intensity_stereo && s->coder->search_for_is) {
s->coder->search_for_is(s, avctx, cpe);
if (s->options.intensity_stereo) { /* Intensity Stereo */
if (s->coder->search_for_is)
s->coder->search_for_is(s, avctx, cpe);
if (cpe->is_mode) is_mode = 1;
apply_intensity_stereo(cpe);
}
if (s->options.pred) { /* Prediction */
for (ch = 0; ch < chans; ch++) {
sce = &cpe->ch[ch];
s->cur_channel = start_ch + ch;
if (s->options.pred && s->coder->search_for_pred)
s->coder->search_for_pred(s, sce);
if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
}
if (s->coder->adjust_common_prediction)
s->coder->adjust_common_prediction(s, cpe);
for (ch = 0; ch < chans; ch++) {
sce = &cpe->ch[ch];
s->cur_channel = start_ch + ch;
if (s->options.pred && s->coder->apply_main_pred)
s->coder->apply_main_pred(s, sce);
}
s->cur_channel = start_ch;
}
if (s->options.stereo_mode) { /* Mid/Side stereo */
if (s->options.stereo_mode == -1 && s->coder->search_for_ms)
s->coder->search_for_ms(s, cpe);
else if (cpe->common_window)
memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
for (w = 0; w < 128; w++)
cpe->ms_mask[w] = cpe->is_mask[w] ? 0 : cpe->ms_mask[w];
apply_mid_side_stereo(cpe);
}
if (s->coder->set_special_band_scalefactors)
for (ch = 0; ch < chans; ch++)
s->coder->set_special_band_scalefactors(s, &cpe->ch[ch]);
adjust_frame_information(cpe, chans);
for (ch = 0; ch < chans; ch++) {
sce = &cpe->ch[ch];
s->cur_channel = start_ch + ch;
if (s->options.pred && s->coder->search_for_pred)
s->coder->search_for_pred(s, sce);
if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
}
if (s->options.pred && s->coder->adjust_common_prediction)
s->coder->adjust_common_prediction(s, cpe);
for (ch = 0; ch < chans; ch++) {
sce = &cpe->ch[ch];
s->cur_channel = start_ch + ch;
if (s->options.pred && s->coder->apply_main_pred)
s->coder->apply_main_pred(s, sce);
}
s->cur_channel = start_ch;
if (chans == 2) {
put_bits(&s->pb, 1, cpe->common_window);
if (cpe->common_window) {