From 9eb86275757d8c4e967038fd324a9a0afd91e77a Mon Sep 17 00:00:00 2001
From: Tyler Jones <tdjones879@gmail.com>
Date: Sun, 26 Mar 2017 15:30:18 -0600
Subject: [PATCH] [WIP] libavcodec/vorbisenc: Include AAC psy model

Scale factors are copied from the AAC encoder tabulated data. This bypasses
namespace conflicts the would occur when including the necessary files for
the AAC psych system. Including the AAC tabulated header causes improper framing
bits to be output to the header and an invalid audio stream.

Signed-off-by: Tyler Jones <tdjones879@gmail.com>
---
 libavcodec/psymodel.c        |   1 +
 libavcodec/vorbis_enc_data.h | 108 +++++++++++++++++++++++++++++++++++++++++++
 libavcodec/vorbisenc.c       |  77 ++++++++++++++++++++++++------
 3 files changed, 173 insertions(+), 13 deletions(-)

diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c
index 2b5f111..38831ce 100644
--- a/libavcodec/psymodel.c
+++ b/libavcodec/psymodel.c
@@ -62,6 +62,7 @@ av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
 
     switch (ctx->avctx->codec_id) {
     case AV_CODEC_ID_AAC:
+    case AV_CODEC_ID_VORBIS:
         ctx->model = &ff_aac_psy_model;
         break;
     }
diff --git a/libavcodec/vorbis_enc_data.h b/libavcodec/vorbis_enc_data.h
index a51aaec..d65e7cc 100644
--- a/libavcodec/vorbis_enc_data.h
+++ b/libavcodec/vorbis_enc_data.h
@@ -501,4 +501,112 @@ static const struct {
     { 3, 2, 3, { -1, 12, 13, 14 } },
 };
 
+static const uint8_t swb_size_128_96[] = {
+    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
+};
+
+static const uint8_t swb_size_128_64[] = {
+    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
+};
+
+static const uint8_t swb_size_128_48[] = {
+    4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
+};
+
+static const uint8_t swb_size_128_24[] = {
+    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
+};
+
+static const uint8_t swb_size_128_16[] = {
+    4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
+};
+
+static const uint8_t swb_size_128_8[] = {
+    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
+};
+
+static const uint8_t swb_size_1024_96[] = {
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
+    12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+};
+
+static const uint8_t swb_size_1024_64[] = {
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
+    12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
+    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
+};
+
+static const uint8_t swb_size_1024_48[] = {
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
+    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
+    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+    96
+};
+
+static const uint8_t swb_size_1024_32[] = {
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
+    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
+    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
+};
+
+static const uint8_t swb_size_1024_24[] = {
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
+    32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
+};
+
+static const uint8_t swb_size_1024_16[] = {
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
+    32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
+};
+
+static const uint8_t swb_size_1024_8[] = {
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
+    32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
+};
+
+const uint8_t *ff_vorbis_swb_size_128[] = {
+    swb_size_128_96, swb_size_128_96, swb_size_128_64,
+    swb_size_128_48, swb_size_128_48, swb_size_128_48,
+    swb_size_128_24, swb_size_128_24, swb_size_128_16,
+    swb_size_128_16, swb_size_128_16, swb_size_128_8,
+    swb_size_128_8
+};
+
+const uint8_t *ff_vorbis_swb_size_1024[] = {
+    swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
+    swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
+    swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
+    swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
+    swb_size_1024_8
+};
+
+const int ff_vorbis_swb_size_128_len  = FF_ARRAY_ELEMS(ff_vorbis_swb_size_128);
+const int ff_vorbis_swb_size_1024_len = FF_ARRAY_ELEMS(ff_vorbis_swb_size_1024);
+
+/* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
+ * failures */
+static const int mpeg4audio_sample_rates[16] = {
+    96000, 88200, 64000, 48000, 44100, 32000,
+    24000, 22050, 16000, 12000, 11025, 8000, 7350
+};
+
+enum WindowSequence {
+    ONLY_LONG_SEQUENCE,
+    LONG_START_SEQUENCE,
+    EIGHT_SHORT_SEQUENCE,
+    LONG_STOP_SEQUENCE,
+};
+
+const uint8_t ff_vorbis_num_swb_1024[] = {
+    41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40, 40
+};
+
+const uint8_t ff_vorbis_num_swb_128[] = {
+    12, 12, 12, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15
+};
+
 #endif /* AVCODEC_VORBIS_ENC_DATA_H */
diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c
index bf21a3b..9f5aed9 100644
--- a/libavcodec/vorbisenc.c
+++ b/libavcodec/vorbisenc.c
@@ -36,6 +36,7 @@
 
 #include "audio_frame_queue.h"
 #include "libavfilter/bufferqueue.h"
+#include "psymodel.h"
 
 #define BITSTREAM_WRITER_LE
 #include "put_bits.h"
@@ -43,6 +44,17 @@
 #undef NDEBUG
 #include <assert.h>
 
+#define MAX_CHANNELS     2
+#define MAX_CODEBOOK_DIM 8
+
+#define MAX_FLOOR_CLASS_DIM  4
+#define NUM_FLOOR_PARTITIONS 8
+#define MAX_FLOOR_VALUES     (MAX_FLOOR_CLASS_DIM*NUM_FLOOR_PARTITIONS+2)
+
+#define RESIDUE_SIZE           1600
+#define RESIDUE_PART_SIZE      32
+#define NUM_RESIDUE_PARTITIONS (RESIDUE_SIZE/RESIDUE_PART_SIZE)
+
 typedef struct vorbis_enc_codebook {
     int nentries;
     uint8_t *lens;
@@ -101,6 +113,10 @@ typedef struct vorbis_enc_mode {
     int mapping;
 } vorbis_enc_mode;
 
+typedef struct vorbis_chan_stream {
+    enum WindowSequence window_sequence[2];
+} vorbis_chan_stream;
+
 typedef struct vorbis_enc_context {
     int channels;
     int sample_rate;
@@ -118,6 +134,9 @@ typedef struct vorbis_enc_context {
     AudioFrameQueue afq;
     struct FFBufQueue bufqueue;
 
+    FFPsyContext psy;
+    struct FFPsyPreprocessContext *psypp;
+
     int ncodebooks;
     vorbis_enc_codebook *codebooks;
 
@@ -136,19 +155,9 @@ typedef struct vorbis_enc_context {
     int64_t next_pts;
 
     AVFloatDSPContext *fdsp;
+    vorbis_chan_stream chan_stream[MAX_CHANNELS];
 } vorbis_enc_context;
 
-#define MAX_CHANNELS     2
-#define MAX_CODEBOOK_DIM 8
-
-#define MAX_FLOOR_CLASS_DIM  4
-#define NUM_FLOOR_PARTITIONS 8
-#define MAX_FLOOR_VALUES     (MAX_FLOOR_CLASS_DIM*NUM_FLOOR_PARTITIONS+2)
-
-#define RESIDUE_SIZE           1600
-#define RESIDUE_PART_SIZE      32
-#define NUM_RESIDUE_PARTITIONS (RESIDUE_SIZE/RESIDUE_PART_SIZE)
-
 static inline int put_codeword(PutBitContext *pb, vorbis_enc_codebook *cb,
                                int entry)
 {
@@ -1085,10 +1094,14 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                                const AVFrame *frame, int *got_packet_ptr)
 {
     vorbis_enc_context *venc = avctx->priv_data;
-    int i, ret, need_more;
+    int i, ret, need_more, ch;
     int frame_size = 1 << (venc->log2_blocksize[1] - 1);
+    float *overlap, *look_ahead;
+    float *scratch[2] = {venc->scratch, venc->scratch + 2 * frame_size};
     vorbis_enc_mode *mode;
     vorbis_enc_mapping *mapping;
+    vorbis_chan_stream *chan_stream;
+    FFPsyWindowInfo windows[MAX_CHANNELS];
     PutBitContext pb;
 
     if (frame) {
@@ -1121,6 +1134,20 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 
     move_audio(venc, avctx->frame_size);
 
+    if (venc->psypp)
+        ff_psy_preprocess(venc->psypp, scratch, venc->channels);
+
+    for (ch = 0; ch < venc->channels; ch++) {
+        chan_stream = &venc->chan_stream[ch];
+        overlap = venc->scratch + 2*ch*frame_size;
+        look_ahead = overlap + frame_size / 2;
+        windows[ch] = venc->psy.model->window(&venc->psy, overlap, look_ahead, ch,
+                                              chan_stream->window_sequence[0]);
+
+        chan_stream->window_sequence[1] = chan_stream->window_sequence[0];
+        chan_stream->window_sequence[0] = windows[ch].window_type[0];
+    }
+
     if (!apply_window_and_mdct(venc))
         return 0;
 
@@ -1252,6 +1279,10 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx)
     ff_mdct_end(&venc->mdct[1]);
     ff_af_queue_close(&venc->afq);
     ff_bufqueue_discard_all(&venc->bufqueue);
+    ff_psy_end(&venc->psy);
+
+    if (venc->psypp)
+        ff_psy_preprocess_end(venc->psypp);
 
     av_freep(&avctx->extradata);
 
@@ -1261,7 +1292,9 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx)
 static av_cold int vorbis_encode_init(AVCodecContext *avctx)
 {
     vorbis_enc_context *venc = avctx->priv_data;
-    int ret;
+    int ret, samplerate_index, lengths[MAX_CHANNELS];
+    const uint8_t *sizes[MAX_CHANNELS];
+    uint8_t grouping[MAX_CHANNELS];
 
     if (avctx->channels != 2) {
         av_log(avctx, AV_LOG_ERROR, "Current FFmpeg Vorbis encoder only supports 2 channels.\n");
@@ -1286,6 +1319,24 @@ static av_cold int vorbis_encode_init(AVCodecContext *avctx)
 
     ff_af_queue_init(avctx, &venc->afq);
 
+    for (samplerate_index = 0; samplerate_index < 16; samplerate_index++)
+        if (avctx->sample_rate == mpeg4audio_sample_rates[samplerate_index])
+            break;
+    if (samplerate_index == 16 ||
+        samplerate_index >= ff_vorbis_swb_size_1024_len ||
+        samplerate_index >= ff_vorbis_swb_size_128_len)
+        av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate);
+
+    sizes[0]   = ff_vorbis_swb_size_1024[samplerate_index];
+    sizes[1]   = ff_vorbis_swb_size_128[samplerate_index];
+    lengths[0] = ff_vorbis_num_swb_1024[samplerate_index];
+    lengths[1] = ff_vorbis_num_swb_128[samplerate_index];
+
+    if ((ret = ff_psy_init(&venc->psy, avctx, 2, sizes, lengths,
+                           1, grouping)) < 0)
+        goto error;
+    venc->psypp = ff_psy_preprocess_init(avctx);
+
     return 0;
 error:
     vorbis_encode_close(avctx);
-- 
2.7.4