summaryrefslogtreecommitdiff
path: root/gr-vocoder/lib/codec2/codec2.c
diff options
context:
space:
mode:
Diffstat (limited to 'gr-vocoder/lib/codec2/codec2.c')
-rw-r--r--gr-vocoder/lib/codec2/codec2.c1385
1 files changed, 1282 insertions, 103 deletions
diff --git a/gr-vocoder/lib/codec2/codec2.c b/gr-vocoder/lib/codec2/codec2.c
index 93ea9208c1..bc4a084839 100644
--- a/gr-vocoder/lib/codec2/codec2.c
+++ b/gr-vocoder/lib/codec2/codec2.c
@@ -42,7 +42,32 @@
#include "interp.h"
#include "postfilter.h"
#include "codec2.h"
+#include "lsp.h"
#include "codec2_internal.h"
+#include "machdep.h"
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION HEADERS
+
+\*---------------------------------------------------------------------------*/
+
+void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[]);
+void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model,
+ float ak[]);
+void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[]);
+void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits);
+void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[]);
+void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits);
+void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[]);
+void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits);
+void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[]);
+void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits);
+void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[]);
+void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est);
+void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[]);
+void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits);
+static void ear_protection(float in_out[], int n);
/*---------------------------------------------------------------------------*\
@@ -64,50 +89,68 @@
\*---------------------------------------------------------------------------*/
-void *codec2_create()
+struct CODEC2 * CODEC2_WIN32SUPPORT codec2_create(int mode)
{
- CODEC2 *c2;
- int i,l;
+ struct CODEC2 *c2;
+ int i,l;
- c2 = (CODEC2*)malloc(sizeof(CODEC2));
+ c2 = (struct CODEC2*)malloc(sizeof(struct CODEC2));
if (c2 == NULL)
return NULL;
+ assert(
+ (mode == CODEC2_MODE_3200) ||
+ (mode == CODEC2_MODE_2400) ||
+ (mode == CODEC2_MODE_1600) ||
+ (mode == CODEC2_MODE_1400) ||
+ (mode == CODEC2_MODE_1300) ||
+ (mode == CODEC2_MODE_1200)
+ );
+ c2->mode = mode;
for(i=0; i<M; i++)
c2->Sn[i] = 1.0;
c2->hpf_states[0] = c2->hpf_states[1] = 0.0;
for(i=0; i<2*N; i++)
c2->Sn_[i] = 0;
- make_analysis_window(c2->w,c2->W);
+ c2->fft_fwd_cfg = kiss_fft_alloc(FFT_ENC, 0, NULL, NULL);
+ make_analysis_window(c2->fft_fwd_cfg, c2->w,c2->W);
make_synthesis_window(c2->Pn);
+ c2->fft_inv_cfg = kiss_fft_alloc(FFT_DEC, 1, NULL, NULL);
quantise_init();
- c2->prev_Wo = 0.0;
+ c2->prev_Wo_enc = 0.0;
c2->bg_est = 0.0;
c2->ex_phase = 0.0;
- for(l=1; l<MAX_AMP; l++)
- c2->prev_model.A[l] = 0.0;
- c2->prev_model.Wo = TWO_PI/P_MAX;
- c2->prev_model.L = PI/c2->prev_model.Wo;
- c2->prev_model.voiced = 0;
+ for(l=1; l<=MAX_AMP; l++)
+ c2->prev_model_dec.A[l] = 0.0;
+ c2->prev_model_dec.Wo = TWO_PI/P_MAX;
+ c2->prev_model_dec.L = PI/c2->prev_model_dec.Wo;
+ c2->prev_model_dec.voiced = 0;
for(i=0; i<LPC_ORD; i++) {
- c2->prev_lsps[i] = i*PI/(LPC_ORD+1);
+ c2->prev_lsps_dec[i] = i*PI/(LPC_ORD+1);
}
- c2->prev_energy = 1;
+ c2->prev_e_dec = 1;
- c2->nlp = nlp_create();
+ c2->nlp = nlp_create(M);
if (c2->nlp == NULL) {
free (c2);
return NULL;
}
- return (void*)c2;
+ c2->lpc_pf = 1; c2->bass_boost = 1; c2->beta = LPCPF_BETA; c2->gamma = LPCPF_GAMMA;
+
+ c2->xq_enc[0] = c2->xq_enc[1] = 0.0;
+ c2->xq_dec[0] = c2->xq_dec[1] = 0.0;
+
+ c2->smoothing = 0;
+
+ return c2;
}
/*---------------------------------------------------------------------------*\
- FUNCTION....: codec2_create
+ FUNCTION....: codec2_destroy
AUTHOR......: David Rowe
DATE CREATED: 21/8/2010
@@ -115,27 +158,282 @@ void *codec2_create()
\*---------------------------------------------------------------------------*/
-void codec2_destroy(void *codec2_state)
+void CODEC2_WIN32SUPPORT codec2_destroy(struct CODEC2 *c2)
{
- CODEC2 *c2;
-
- assert(codec2_state != NULL);
- c2 = (CODEC2*)codec2_state;
+ assert(c2 != NULL);
nlp_destroy(c2->nlp);
- free(codec2_state);
+ KISS_FFT_FREE(c2->fft_fwd_cfg);
+ KISS_FFT_FREE(c2->fft_inv_cfg);
+ free(c2);
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_bits_per_frame
+ AUTHOR......: David Rowe
+ DATE CREATED: Nov 14 2011
+
+ Returns the number of bits per frame.
+
+\*---------------------------------------------------------------------------*/
+
+int CODEC2_WIN32SUPPORT codec2_bits_per_frame(struct CODEC2 *c2) {
+ if (c2->mode == CODEC2_MODE_3200)
+ return 64;
+ if (c2->mode == CODEC2_MODE_2400)
+ return 48;
+ if (c2->mode == CODEC2_MODE_1600)
+ return 64;
+ if (c2->mode == CODEC2_MODE_1400)
+ return 56;
+ if (c2->mode == CODEC2_MODE_1300)
+ return 52;
+ if (c2->mode == CODEC2_MODE_1200)
+ return 48;
+
+ return 0; /* shouldn't get here */
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_samples_per_frame
+ AUTHOR......: David Rowe
+ DATE CREATED: Nov 14 2011
+
+ Returns the number of bits per frame.
+
+\*---------------------------------------------------------------------------*/
+
+int CODEC2_WIN32SUPPORT codec2_samples_per_frame(struct CODEC2 *c2) {
+ if (c2->mode == CODEC2_MODE_3200)
+ return 160;
+ if (c2->mode == CODEC2_MODE_2400)
+ return 160;
+ if (c2->mode == CODEC2_MODE_1600)
+ return 320;
+ if (c2->mode == CODEC2_MODE_1400)
+ return 320;
+ if (c2->mode == CODEC2_MODE_1300)
+ return 320;
+ if (c2->mode == CODEC2_MODE_1200)
+ return 320;
+
+ return 0; /* shouldnt get here */
+}
+
+void CODEC2_WIN32SUPPORT codec2_encode(struct CODEC2 *c2, unsigned char *bits, short speech[])
+{
+ assert(c2 != NULL);
+ assert(
+ (c2->mode == CODEC2_MODE_3200) ||
+ (c2->mode == CODEC2_MODE_2400) ||
+ (c2->mode == CODEC2_MODE_1600) ||
+ (c2->mode == CODEC2_MODE_1400) ||
+ (c2->mode == CODEC2_MODE_1300) ||
+ (c2->mode == CODEC2_MODE_1200)
+ );
+
+ if (c2->mode == CODEC2_MODE_3200)
+ codec2_encode_3200(c2, bits, speech);
+ if (c2->mode == CODEC2_MODE_2400)
+ codec2_encode_2400(c2, bits, speech);
+ if (c2->mode == CODEC2_MODE_1600)
+ codec2_encode_1600(c2, bits, speech);
+ if (c2->mode == CODEC2_MODE_1400)
+ codec2_encode_1400(c2, bits, speech);
+ if (c2->mode == CODEC2_MODE_1300)
+ codec2_encode_1300(c2, bits, speech);
+ if (c2->mode == CODEC2_MODE_1200)
+ codec2_encode_1200(c2, bits, speech);
+}
+
+void CODEC2_WIN32SUPPORT codec2_decode(struct CODEC2 *c2, short speech[], const unsigned char *bits, float ber_est)
+{
+ assert(c2 != NULL);
+ assert(
+ (c2->mode == CODEC2_MODE_3200) ||
+ (c2->mode == CODEC2_MODE_2400) ||
+ (c2->mode == CODEC2_MODE_1600) ||
+ (c2->mode == CODEC2_MODE_1400) ||
+ (c2->mode == CODEC2_MODE_1300) ||
+ (c2->mode == CODEC2_MODE_1200)
+ );
+
+ if (c2->mode == CODEC2_MODE_3200)
+ codec2_decode_3200(c2, speech, bits);
+ if (c2->mode == CODEC2_MODE_2400)
+ codec2_decode_2400(c2, speech, bits);
+ if (c2->mode == CODEC2_MODE_1600)
+ codec2_decode_1600(c2, speech, bits);
+ if (c2->mode == CODEC2_MODE_1400)
+ codec2_decode_1400(c2, speech, bits);
+ if (c2->mode == CODEC2_MODE_1300)
+ codec2_decode_1300(c2, speech, bits, ber_est);
+ if (c2->mode == CODEC2_MODE_1200)
+ codec2_decode_1200(c2, speech, bits);
}
+
/*---------------------------------------------------------------------------*\
- FUNCTION....: codec2_encode
+ FUNCTION....: codec2_encode_3200
+ AUTHOR......: David Rowe
+ DATE CREATED: 13 Sep 2012
+
+ Encodes 160 speech samples (20ms of speech) into 64 bits.
+
+ The codec2 algorithm actually operates internally on 10ms (80
+ sample) frames, so we run the encoding algorithm twice. On the
+ first frame we just send the voicing bits. On the second frame we
+ send all model parameters. Compared to 2400 we use a larger number
+ of bits for the LSPs and non-VQ pitch and energy.
+
+ The bit allocation is:
+
+ Parameter bits/frame
+ --------------------------------------
+ Harmonic magnitudes (LSPs) 50
+ Pitch (Wo) 7
+ Energy 5
+ Voicing (10ms update) 2
+ TOTAL 64
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_encode_3200(struct CODEC2 *c2, unsigned char * bits, short speech[])
+{
+ MODEL model;
+ float ak[LPC_ORD+1];
+ float lsps[LPC_ORD];
+ float e;
+ int Wo_index, e_index;
+ int lspd_indexes[LPC_ORD];
+ int i;
+ unsigned int nbit = 0;
+
+ assert(c2 != NULL);
+
+ memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
+
+ /* first 10ms analysis frame - we just want voicing */
+
+ analyse_one_frame(c2, &model, speech);
+ pack(bits, &nbit, model.voiced, 1);
+
+ /* second 10ms analysis frame */
+
+ analyse_one_frame(c2, &model, &speech[N]);
+ pack(bits, &nbit, model.voiced, 1);
+ Wo_index = encode_Wo(model.Wo);
+ pack(bits, &nbit, Wo_index, WO_BITS);
+
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e_index = encode_energy(e);
+ pack(bits, &nbit, e_index, E_BITS);
+
+ encode_lspds_scalar(lspd_indexes, lsps, LPC_ORD);
+ for(i=0; i<LSPD_SCALAR_INDEXES; i++) {
+ pack(bits, &nbit, lspd_indexes[i], lspd_bits(i));
+ }
+ assert(nbit == (unsigned)codec2_bits_per_frame(c2));
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_decode_3200
+ AUTHOR......: David Rowe
+ DATE CREATED: 13 Sep 2012
+
+ Decodes a frame of 64 bits into 160 samples (20ms) of speech.
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_decode_3200(struct CODEC2 *c2, short speech[], const unsigned char * bits)
+{
+ MODEL model[2];
+ int lspd_indexes[LPC_ORD];
+ float lsps[2][LPC_ORD];
+ int Wo_index, e_index;
+ float e[2];
+ float snr;
+ float ak[2][LPC_ORD+1];
+ int i,j;
+ unsigned int nbit = 0;
+
+ assert(c2 != NULL);
+
+ /* only need to zero these out due to (unused) snr calculation */
+
+ for(i=0; i<2; i++)
+ for(j=1; j<=MAX_AMP; j++)
+ model[i].A[j] = 0.0;
+
+ /* unpack bits from channel ------------------------------------*/
+
+ /* this will partially fill the model params for the 2 x 10ms
+ frames */
+
+ model[0].voiced = unpack(bits, &nbit, 1);
+ model[1].voiced = unpack(bits, &nbit, 1);
+
+ Wo_index = unpack(bits, &nbit, WO_BITS);
+ model[1].Wo = decode_Wo(Wo_index);
+ model[1].L = PI/model[1].Wo;
+
+ e_index = unpack(bits, &nbit, E_BITS);
+ e[1] = decode_energy(e_index);
+
+ for(i=0; i<LSPD_SCALAR_INDEXES; i++) {
+ lspd_indexes[i] = unpack(bits, &nbit, lspd_bits(i));
+ }
+ decode_lspds_scalar(&lsps[1][0], lspd_indexes, LPC_ORD);
+
+ /* interpolate ------------------------------------------------*/
+
+ /* Wo and energy are sampled every 20ms, so we interpolate just 1
+ 10ms frame between 20ms samples */
+
+ interp_Wo(&model[0], &c2->prev_model_dec, &model[1]);
+ e[0] = interp_energy(c2->prev_e_dec, e[1]);
+
+ /* LSPs are sampled every 20ms so we interpolate the frame in
+ between, then recover spectral amplitudes */
+
+ interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5);
+ for(i=0; i<2; i++) {
+ lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
+ aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+ c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma);
+ apply_lpc_correction(&model[i]);
+ }
+
+ /* synthesise ------------------------------------------------*/
+
+ for(i=0; i<2; i++)
+ synthesise_one_frame(c2, &speech[N*i], &model[i], &ak[i][0]);
+
+ /* update memories for next frame ----------------------------*/
+
+ c2->prev_model_dec = model[1];
+ c2->prev_e_dec = e[1];
+ for(i=0; i<LPC_ORD; i++)
+ c2->prev_lsps_dec[i] = lsps[1][i];
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_encode_2400
AUTHOR......: David Rowe
DATE CREATED: 21/8/2010
- Encodes 160 speech samples (20ms of speech) into 51 bits.
+ Encodes 160 speech samples (20ms of speech) into 48 bits.
The codec2 algorithm actually operates internally on 10ms (80
sample) frames, so we run the encoding algorithm twice. On the
- first frame we just send the voicing bit. One the second frame we
+ first frame we just send the voicing bit. On the second frame we
send all model parameters.
The bit allocation is:
@@ -143,132 +441,859 @@ void codec2_destroy(void *codec2_state)
Parameter bits/frame
--------------------------------------
Harmonic magnitudes (LSPs) 36
- Low frequency LPC correction 1
- Energy 5
- Wo (fundamental frequnecy) 7
+ Joint VQ of Energy and Wo 8
Voicing (10ms update) 2
- TOTAL 51
+ Spare 2
+ TOTAL 48
\*---------------------------------------------------------------------------*/
-void codec2_encode(void *codec2_state, unsigned char * bits, short speech[])
+void codec2_encode_2400(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
- CODEC2 *c2;
MODEL model;
- int voiced1, voiced2;
+ float ak[LPC_ORD+1];
+ float lsps[LPC_ORD];
+ float e;
+ int WoE_index;
int lsp_indexes[LPC_ORD];
- int energy_index;
- int Wo_index;
int i;
+ int spare = 0;
unsigned int nbit = 0;
- assert(codec2_state != NULL);
- c2 = (CODEC2*)codec2_state;
+ assert(c2 != NULL);
+
+ memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
/* first 10ms analysis frame - we just want voicing */
analyse_one_frame(c2, &model, speech);
- voiced1 = model.voiced;
+ pack(bits, &nbit, model.voiced, 1);
/* second 10ms analysis frame */
analyse_one_frame(c2, &model, &speech[N]);
- voiced2 = model.voiced;
+ pack(bits, &nbit, model.voiced, 1);
+
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ WoE_index = encode_WoE(&model, e, c2->xq_enc);
+ pack(bits, &nbit, WoE_index, WO_E_BITS);
+
+ encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
+ for(i=0; i<LSP_SCALAR_INDEXES; i++) {
+ pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
+ }
+ pack(bits, &nbit, spare, 2);
+
+ assert(nbit == (unsigned)codec2_bits_per_frame(c2));
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_decode_2400
+ AUTHOR......: David Rowe
+ DATE CREATED: 21/8/2010
+
+ Decodes frames of 48 bits into 160 samples (20ms) of speech.
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_decode_2400(struct CODEC2 *c2, short speech[], const unsigned char * bits)
+{
+ MODEL model[2];
+ int lsp_indexes[LPC_ORD];
+ float lsps[2][LPC_ORD];
+ int WoE_index;
+ float e[2];
+ float snr;
+ float ak[2][LPC_ORD+1];
+ int i,j;
+ unsigned int nbit = 0;
+
+ assert(c2 != NULL);
+
+ /* only need to zero these out due to (unused) snr calculation */
+
+ for(i=0; i<2; i++)
+ for(j=1; j<=MAX_AMP; j++)
+ model[i].A[j] = 0.0;
+
+ /* unpack bits from channel ------------------------------------*/
+
+ /* this will partially fill the model params for the 2 x 10ms
+ frames */
+
+ model[0].voiced = unpack(bits, &nbit, 1);
+
+ model[1].voiced = unpack(bits, &nbit, 1);
+ WoE_index = unpack(bits, &nbit, WO_E_BITS);
+ decode_WoE(&model[1], &e[1], c2->xq_dec, WoE_index);
+
+ for(i=0; i<LSP_SCALAR_INDEXES; i++) {
+ lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
+ }
+ decode_lsps_scalar(&lsps[1][0], lsp_indexes, LPC_ORD);
+ check_lsp_order(&lsps[1][0], LPC_ORD);
+ bw_expand_lsps(&lsps[1][0], LPC_ORD, 50.0, 100.0);
+
+ /* interpolate ------------------------------------------------*/
+
+ /* Wo and energy are sampled every 20ms, so we interpolate just 1
+ 10ms frame between 20ms samples */
+
+ interp_Wo(&model[0], &c2->prev_model_dec, &model[1]);
+ e[0] = interp_energy(c2->prev_e_dec, e[1]);
+
+ /* LSPs are sampled every 20ms so we interpolate the frame in
+ between, then recover spectral amplitudes */
+
+ interpolate_lsp_ver2(&lsps[0][0], c2->prev_lsps_dec, &lsps[1][0], 0.5);
+ for(i=0; i<2; i++) {
+ lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
+ aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+ c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma);
+ apply_lpc_correction(&model[i]);
+ }
+
+ /* synthesise ------------------------------------------------*/
+
+ for(i=0; i<2; i++)
+ synthesise_one_frame(c2, &speech[N*i], &model[i], &ak[i][0]);
+
+ /* update memories for next frame ----------------------------*/
+
+ c2->prev_model_dec = model[1];
+ c2->prev_e_dec = e[1];
+ for(i=0; i<LPC_ORD; i++)
+ c2->prev_lsps_dec[i] = lsps[1][i];
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_encode_1600
+ AUTHOR......: David Rowe
+ DATE CREATED: Feb 28 2013
+
+ Encodes 320 speech samples (40ms of speech) into 64 bits.
+
+ The codec2 algorithm actually operates internally on 10ms (80
+ sample) frames, so we run the encoding algorithm 4 times:
+
+ frame 0: voicing bit
+ frame 1: voicing bit, Wo and E
+ frame 2: voicing bit
+ frame 3: voicing bit, Wo and E, scalar LSPs
+
+ The bit allocation is:
+
+ Parameter frame 2 frame 4 Total
+ -------------------------------------------------------
+ Harmonic magnitudes (LSPs) 0 36 36
+ Pitch (Wo) 7 7 14
+ Energy 5 5 10
+ Voicing (10ms update) 2 2 4
+ TOTAL 14 50 64
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_encode_1600(struct CODEC2 *c2, unsigned char * bits, short speech[])
+{
+ MODEL model;
+ float lsps[LPC_ORD];
+ float ak[LPC_ORD+1];
+ float e;
+ int lsp_indexes[LPC_ORD];
+ int Wo_index, e_index;
+ int i;
+ unsigned int nbit = 0;
+
+ assert(c2 != NULL);
+
+ memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
+
+ /* frame 1: - voicing ---------------------------------------------*/
+
+ analyse_one_frame(c2, &model, speech);
+ pack(bits, &nbit, model.voiced, 1);
+
+ /* frame 2: - voicing, scalar Wo & E -------------------------------*/
+
+ analyse_one_frame(c2, &model, &speech[N]);
+ pack(bits, &nbit, model.voiced, 1);
Wo_index = encode_Wo(model.Wo);
- encode_amplitudes(lsp_indexes,
- &energy_index,
- &model,
- c2->Sn,
- c2->w);
- memset(bits, '\0', ((CODEC2_BITS_PER_FRAME + 7) / 8));
pack(bits, &nbit, Wo_index, WO_BITS);
- for(i=0; i<LPC_ORD; i++) {
+
+ /* need to run this just to get LPC energy */
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e_index = encode_energy(e);
+ pack(bits, &nbit, e_index, E_BITS);
+
+ /* frame 3: - voicing ---------------------------------------------*/
+
+ analyse_one_frame(c2, &model, &speech[2*N]);
+ pack(bits, &nbit, model.voiced, 1);
+
+ /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/
+
+ analyse_one_frame(c2, &model, &speech[3*N]);
+ pack(bits, &nbit, model.voiced, 1);
+
+ Wo_index = encode_Wo(model.Wo);
+ pack(bits, &nbit, Wo_index, WO_BITS);
+
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e_index = encode_energy(e);
+ pack(bits, &nbit, e_index, E_BITS);
+
+ encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
+ for(i=0; i<LSP_SCALAR_INDEXES; i++) {
pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
}
- pack(bits, &nbit, energy_index, E_BITS);
- pack(bits, &nbit, voiced1, 1);
- pack(bits, &nbit, voiced2, 1);
- assert(nbit == CODEC2_BITS_PER_FRAME);
+ assert(nbit == (unsigned)codec2_bits_per_frame(c2));
}
+
/*---------------------------------------------------------------------------*\
- FUNCTION....: codec2_decode
+ FUNCTION....: codec2_decode_1600
AUTHOR......: David Rowe
- DATE CREATED: 21/8/2010
+ DATE CREATED: 11 May 2012
- Decodes frames of 51 bits into 160 samples (20ms) of speech.
+ Decodes frames of 64 bits into 320 samples (40ms) of speech.
\*---------------------------------------------------------------------------*/
-void codec2_decode(void *codec2_state, short speech[],
- const unsigned char * bits)
+void codec2_decode_1600(struct CODEC2 *c2, short speech[], const unsigned char * bits)
+{
+ MODEL model[4];
+ int lsp_indexes[LPC_ORD];
+ float lsps[4][LPC_ORD];
+ int Wo_index, e_index;
+ float e[4];
+ float snr;
+ float ak[4][LPC_ORD+1];
+ int i,j;
+ unsigned int nbit = 0;
+ float weight;
+
+ assert(c2 != NULL);
+
+ /* only need to zero these out due to (unused) snr calculation */
+
+ for(i=0; i<4; i++)
+ for(j=1; j<=MAX_AMP; j++)
+ model[i].A[j] = 0.0;
+
+ /* unpack bits from channel ------------------------------------*/
+
+ /* this will partially fill the model params for the 4 x 10ms
+ frames */
+
+ model[0].voiced = unpack(bits, &nbit, 1);
+
+ model[1].voiced = unpack(bits, &nbit, 1);
+ Wo_index = unpack(bits, &nbit, WO_BITS);
+ model[1].Wo = decode_Wo(Wo_index);
+ model[1].L = PI/model[1].Wo;
+
+ e_index = unpack(bits, &nbit, E_BITS);
+ e[1] = decode_energy(e_index);
+
+ model[2].voiced = unpack(bits, &nbit, 1);
+
+ model[3].voiced = unpack(bits, &nbit, 1);
+ Wo_index = unpack(bits, &nbit, WO_BITS);
+ model[3].Wo = decode_Wo(Wo_index);
+ model[3].L = PI/model[3].Wo;
+
+ e_index = unpack(bits, &nbit, E_BITS);
+ e[3] = decode_energy(e_index);
+
+ for(i=0; i<LSP_SCALAR_INDEXES; i++) {
+ lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
+ }
+ decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
+ check_lsp_order(&lsps[3][0], LPC_ORD);
+ bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
+
+ /* interpolate ------------------------------------------------*/
+
+ /* Wo and energy are sampled every 20ms, so we interpolate just 1
+ 10ms frame between 20ms samples */
+
+ interp_Wo(&model[0], &c2->prev_model_dec, &model[1]);
+ e[0] = interp_energy(c2->prev_e_dec, e[1]);
+ interp_Wo(&model[2], &model[1], &model[3]);
+ e[2] = interp_energy(e[1], e[3]);
+
+ /* LSPs are sampled every 40ms so we interpolate the 3 frames in
+ between, then recover spectral amplitudes */
+
+ for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
+ interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight);
+ }
+ for(i=0; i<4; i++) {
+ lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
+ aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+ c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma);
+ apply_lpc_correction(&model[i]);
+ }
+
+ /* synthesise ------------------------------------------------*/
+
+ for(i=0; i<4; i++)
+ synthesise_one_frame(c2, &speech[N*i], &model[i], &ak[i][0]);
+
+ /* update memories for next frame ----------------------------*/
+
+ c2->prev_model_dec = model[3];
+ c2->prev_e_dec = e[3];
+ for(i=0; i<LPC_ORD; i++)
+ c2->prev_lsps_dec[i] = lsps[3][i];
+
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_encode_1400
+ AUTHOR......: David Rowe
+ DATE CREATED: May 11 2012
+
+ Encodes 320 speech samples (40ms of speech) into 56 bits.
+
+ The codec2 algorithm actually operates internally on 10ms (80
+ sample) frames, so we run the encoding algorithm 4 times:
+
+ frame 0: voicing bit
+ frame 1: voicing bit, joint VQ of Wo and E
+ frame 2: voicing bit
+ frame 3: voicing bit, joint VQ of Wo and E, scalar LSPs
+
+ The bit allocation is:
+
+ Parameter frame 2 frame 4 Total
+ -------------------------------------------------------
+ Harmonic magnitudes (LSPs) 0 36 36
+ Energy+Wo 8 8 16
+ Voicing (10ms update) 2 2 4
+ TOTAL 10 46 56
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_encode_1400(struct CODEC2 *c2, unsigned char * bits, short speech[])
{
- CODEC2 *c2;
MODEL model;
- int voiced1, voiced2;
+ float lsps[LPC_ORD];
+ float ak[LPC_ORD+1];
+ float e;
int lsp_indexes[LPC_ORD];
+ int WoE_index;
+ int i;
+ unsigned int nbit = 0;
+
+ assert(c2 != NULL);
+
+ memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
+
+ /* frame 1: - voicing ---------------------------------------------*/
+
+ analyse_one_frame(c2, &model, speech);
+ pack(bits, &nbit, model.voiced, 1);
+
+ /* frame 2: - voicing, joint Wo & E -------------------------------*/
+
+ analyse_one_frame(c2, &model, &speech[N]);
+ pack(bits, &nbit, model.voiced, 1);
+
+ /* need to run this just to get LPC energy */
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+
+ WoE_index = encode_WoE(&model, e, c2->xq_enc);
+ pack(bits, &nbit, WoE_index, WO_E_BITS);
+
+ /* frame 3: - voicing ---------------------------------------------*/
+
+ analyse_one_frame(c2, &model, &speech[2*N]);
+ pack(bits, &nbit, model.voiced, 1);
+
+ /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/
+
+ analyse_one_frame(c2, &model, &speech[3*N]);
+ pack(bits, &nbit, model.voiced, 1);
+
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ WoE_index = encode_WoE(&model, e, c2->xq_enc);
+ pack(bits, &nbit, WoE_index, WO_E_BITS);
+
+ encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
+ for(i=0; i<LSP_SCALAR_INDEXES; i++) {
+ pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
+ }
+
+ assert(nbit == (unsigned)codec2_bits_per_frame(c2));
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_decode_1400
+ AUTHOR......: David Rowe
+ DATE CREATED: 11 May 2012
+
+ Decodes frames of 56 bits into 320 samples (40ms) of speech.
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_decode_1400(struct CODEC2 *c2, short speech[], const unsigned char * bits)
+{
+ MODEL model[4];
+ int lsp_indexes[LPC_ORD];
+ float lsps[4][LPC_ORD];
+ int WoE_index;
+ float e[4];
+ float snr;
+ float ak[4][LPC_ORD+1];
+ int i,j;
+ unsigned int nbit = 0;
+ float weight;
+
+ assert(c2 != NULL);
+
+ /* only need to zero these out due to (unused) snr calculation */
+
+ for(i=0; i<4; i++)
+ for(j=1; j<=MAX_AMP; j++)
+ model[i].A[j] = 0.0;
+
+ /* unpack bits from channel ------------------------------------*/
+
+ /* this will partially fill the model params for the 4 x 10ms
+ frames */
+
+ model[0].voiced = unpack(bits, &nbit, 1);
+
+ model[1].voiced = unpack(bits, &nbit, 1);
+ WoE_index = unpack(bits, &nbit, WO_E_BITS);
+ decode_WoE(&model[1], &e[1], c2->xq_dec, WoE_index);
+
+ model[2].voiced = unpack(bits, &nbit, 1);
+
+ model[3].voiced = unpack(bits, &nbit, 1);
+ WoE_index = unpack(bits, &nbit, WO_E_BITS);
+ decode_WoE(&model[3], &e[3], c2->xq_dec, WoE_index);
+
+ for(i=0; i<LSP_SCALAR_INDEXES; i++) {
+ lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
+ }
+ decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
+ check_lsp_order(&lsps[3][0], LPC_ORD);
+ bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
+
+ /* interpolate ------------------------------------------------*/
+
+ /* Wo and energy are sampled every 20ms, so we interpolate just 1
+ 10ms frame between 20ms samples */
+
+ interp_Wo(&model[0], &c2->prev_model_dec, &model[1]);
+ e[0] = interp_energy(c2->prev_e_dec, e[1]);
+ interp_Wo(&model[2], &model[1], &model[3]);
+ e[2] = interp_energy(e[1], e[3]);
+
+ /* LSPs are sampled every 40ms so we interpolate the 3 frames in
+ between, then recover spectral amplitudes */
+
+ for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
+ interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight);
+ }
+ for(i=0; i<4; i++) {
+ lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
+ aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+ c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma);
+ apply_lpc_correction(&model[i]);
+ }
+
+ /* synthesise ------------------------------------------------*/
+
+ for(i=0; i<4; i++)
+ synthesise_one_frame(c2, &speech[N*i], &model[i], &ak[i][0]);
+
+ /* update memories for next frame ----------------------------*/
+
+ c2->prev_model_dec = model[3];
+ c2->prev_e_dec = e[3];
+ for(i=0; i<LPC_ORD; i++)
+ c2->prev_lsps_dec[i] = lsps[3][i];
+
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_encode_1300
+ AUTHOR......: David Rowe
+ DATE CREATED: March 14 2013
+
+ Encodes 320 speech samples (40ms of speech) into 52 bits.
+
+ The codec2 algorithm actually operates internally on 10ms (80
+ sample) frames, so we run the encoding algorithm 4 times:
+
+ frame 0: voicing bit
+ frame 1: voicing bit,
+ frame 2: voicing bit
+ frame 3: voicing bit, Wo and E, scalar LSPs
+
+ The bit allocation is:
+
+ Parameter frame 2 frame 4 Total
+ -------------------------------------------------------
+ Harmonic magnitudes (LSPs) 0 36 36
+ Pitch (Wo) 0 7 7
+ Energy 0 5 5
+ Voicing (10ms update) 2 2 4
+ TOTAL 2 50 52
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_encode_1300(struct CODEC2 *c2, unsigned char * bits, short speech[])
+{
+ MODEL model;
float lsps[LPC_ORD];
- int energy_index;
- float energy;
- int Wo_index;
float ak[LPC_ORD+1];
- float ak_interp[LPC_ORD+1];
+ float e;
+ int lsp_indexes[LPC_ORD];
+ int Wo_index, e_index;
int i;
unsigned int nbit = 0;
- MODEL model_interp;
+ #ifdef TIMER
+ unsigned int quant_start;
+ #endif
+
+ assert(c2 != NULL);
+
+ memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
+
+ /* frame 1: - voicing ---------------------------------------------*/
+
+ analyse_one_frame(c2, &model, speech);
+ pack(bits, &nbit, model.voiced, 1);
+
+ /* frame 2: - voicing ---------------------------------------------*/
+
+ analyse_one_frame(c2, &model, &speech[N]);
+ pack(bits, &nbit, model.voiced, 1);
+
+ /* frame 3: - voicing ---------------------------------------------*/
+
+ analyse_one_frame(c2, &model, &speech[2*N]);
+ pack(bits, &nbit, model.voiced, 1);
- assert(codec2_state != NULL);
- c2 = (CODEC2*)codec2_state;
+ /* frame 4: - voicing, scalar Wo & E, scalar LSPs ------------------*/
- /* unpack bit stream to integer codes */
+ analyse_one_frame(c2, &model, &speech[3*N]);
+ pack(bits, &nbit, model.voiced, 1);
+
+ Wo_index = encode_Wo(model.Wo);
+ pack(bits, &nbit, Wo_index, WO_BITS);
+
+ #ifdef TIMER
+ quant_start = machdep_timer_sample();
+ #endif
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ e_index = encode_energy(e);
+ pack(bits, &nbit, e_index, E_BITS);
+
+ encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD);
+ for(i=0; i<LSP_SCALAR_INDEXES; i++) {
+ pack(bits, &nbit, lsp_indexes[i], lsp_bits(i));
+ }
+ #ifdef TIMER
+ machdep_timer_sample_and_log(quant_start, " quant/packing");
+ #endif
+
+ assert(nbit == (unsigned)codec2_bits_per_frame(c2));
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_decode_1300
+ AUTHOR......: David Rowe
+ DATE CREATED: 11 May 2012
+
+ Decodes frames of 52 bits into 320 samples (40ms) of speech.
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_decode_1300(struct CODEC2 *c2, short speech[], const unsigned char * bits, float ber_est)
+{
+ MODEL model[4];
+ int lsp_indexes[LPC_ORD];
+ float lsps[4][LPC_ORD];
+ int Wo_index, e_index;
+ float e[4];
+ float snr;
+ float ak[4][LPC_ORD+1];
+ int i,j;
+ unsigned int nbit = 0;
+ float weight;
+ TIMER_VAR(recover_start);
+
+ assert(c2 != NULL);
+
+ /* only need to zero these out due to (unused) snr calculation */
+
+ for(i=0; i<4; i++)
+ for(j=1; j<=MAX_AMP; j++)
+ model[i].A[j] = 0.0;
+
+ /* unpack bits from channel ------------------------------------*/
+
+ /* this will partially fill the model params for the 4 x 10ms
+ frames */
+
+ model[0].voiced = unpack(bits, &nbit, 1);
+ model[1].voiced = unpack(bits, &nbit, 1);
+ model[2].voiced = unpack(bits, &nbit, 1);
+ model[3].voiced = unpack(bits, &nbit, 1);
Wo_index = unpack(bits, &nbit, WO_BITS);
- for(i=0; i<LPC_ORD; i++) {
+ model[3].Wo = decode_Wo(Wo_index);
+ model[3].L = PI/model[3].Wo;
+
+ e_index = unpack(bits, &nbit, E_BITS);
+ e[3] = decode_energy(e_index);
+
+ for(i=0; i<LSP_SCALAR_INDEXES; i++) {
lsp_indexes[i] = unpack(bits, &nbit, lsp_bits(i));
}
- energy_index = unpack(bits, &nbit, E_BITS);
- voiced1 = unpack(bits, &nbit, 1);
- voiced2 = unpack(bits, &nbit, 1);
- assert(nbit == CODEC2_BITS_PER_FRAME);
+ decode_lsps_scalar(&lsps[3][0], lsp_indexes, LPC_ORD);
+ check_lsp_order(&lsps[3][0], LPC_ORD);
+ bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
+
+ if (ber_est > 0.15) {
+ model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced = 0;
+ e[3] = decode_energy(10);
+ bw_expand_lsps(&lsps[3][0], LPC_ORD, 200.0, 200.0);
+ fprintf(stderr, "soft mute\n");
+ }
+
+ /* interpolate ------------------------------------------------*/
+
+ /* Wo, energy, and LSPs are sampled every 40ms so we interpolate
+ the 3 frames in between */
+
+ TIMER_SAMPLE(recover_start);
+ for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
+ interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight);
+ interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight);
+ e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
+ }
+
+ /* then recover spectral amplitudes */
+
+ for(i=0; i<4; i++) {
+ lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
+ aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+ c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma);
+ apply_lpc_correction(&model[i]);
+ }
+ TIMER_SAMPLE_AND_LOG2(recover_start, " recover");
+
+ /* synthesise ------------------------------------------------*/
+
+ for(i=0; i<4; i++)
+ synthesise_one_frame(c2, &speech[N*i], &model[i], &ak[i][0]);
+
+ /* update memories for next frame ----------------------------*/
+
+ c2->prev_model_dec = model[3];
+ c2->prev_e_dec = e[3];
+ for(i=0; i<LPC_ORD; i++)
+ c2->prev_lsps_dec[i] = lsps[3][i];
+
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_encode_1200
+ AUTHOR......: David Rowe
+ DATE CREATED: Nov 14 2011
+
+ Encodes 320 speech samples (40ms of speech) into 48 bits.
+
+ The codec2 algorithm actually operates internally on 10ms (80
+ sample) frames, so we run the encoding algorithm four times:
+
+ frame 0: voicing bit
+ frame 1: voicing bit, joint VQ of Wo and E
+ frame 2: voicing bit
+ frame 3: voicing bit, joint VQ of Wo and E, VQ LSPs
+
+ The bit allocation is:
+
+ Parameter frame 2 frame 4 Total
+ -------------------------------------------------------
+ Harmonic magnitudes (LSPs) 0 27 27
+ Energy+Wo 8 8 16
+ Voicing (10ms update) 2 2 4
+ Spare 0 1 1
+ TOTAL 10 38 48
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_encode_1200(struct CODEC2 *c2, unsigned char * bits, short speech[])
+{
+ MODEL model;
+ float lsps[LPC_ORD];
+ float lsps_[LPC_ORD];
+ float ak[LPC_ORD+1];
+ float e;
+ int lsp_indexes[LPC_ORD];
+ int WoE_index;
+ int i;
+ int spare = 0;
+ unsigned int nbit = 0;
+
+ assert(c2 != NULL);
+
+ memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
+
+ /* frame 1: - voicing ---------------------------------------------*/
+
+ analyse_one_frame(c2, &model, speech);
+ pack(bits, &nbit, model.voiced, 1);
+
+ /* frame 2: - voicing, joint Wo & E -------------------------------*/
+
+ analyse_one_frame(c2, &model, &speech[N]);
+ pack(bits, &nbit, model.voiced, 1);
+
+ /* need to run this just to get LPC energy */
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+
+ WoE_index = encode_WoE(&model, e, c2->xq_enc);
+ pack(bits, &nbit, WoE_index, WO_E_BITS);
+
+ /* frame 3: - voicing ---------------------------------------------*/
+
+ analyse_one_frame(c2, &model, &speech[2*N]);
+ pack(bits, &nbit, model.voiced, 1);
+
+ /* frame 4: - voicing, joint Wo & E, scalar LSPs ------------------*/
+
+ analyse_one_frame(c2, &model, &speech[3*N]);
+ pack(bits, &nbit, model.voiced, 1);
+
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD);
+ WoE_index = encode_WoE(&model, e, c2->xq_enc);
+ pack(bits, &nbit, WoE_index, WO_E_BITS);
+
+ encode_lsps_vq(lsp_indexes, lsps, lsps_, LPC_ORD);
+ for(i=0; i<LSP_PRED_VQ_INDEXES; i++) {
+ pack(bits, &nbit, lsp_indexes[i], lsp_pred_vq_bits(i));
+ }
+ pack(bits, &nbit, spare, 1);
+
+ assert(nbit == (unsigned)codec2_bits_per_frame(c2));
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_decode_1200
+ AUTHOR......: David Rowe
+ DATE CREATED: 14 Feb 2012
+
+ Decodes frames of 48 bits into 320 samples (40ms) of speech.
- /* decode integer codes to model parameters */
+\*---------------------------------------------------------------------------*/
- model.Wo = decode_Wo(Wo_index);
- model.L = PI/model.Wo;
- memset(&model.A, 0, (model.L+1)*sizeof(model.A[0]));
- decode_amplitudes(&model,
- ak,
- lsp_indexes,
- energy_index,
- lsps,
- &energy);
+void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits)
+{
+ MODEL model[4];
+ int lsp_indexes[LPC_ORD];
+ float lsps[4][LPC_ORD];
+ int WoE_index;
+ float e[4];
+ float snr;
+ float ak[4][LPC_ORD+1];
+ int i,j;
+ unsigned int nbit = 0;
+ float weight;
- model.voiced = voiced2;
- model_interp.voiced = voiced1;
- model_interp.Wo = P_MAX/2;
- memset(&model_interp.A, 0, MAX_AMP*sizeof(model_interp.A[0]));
+ assert(c2 != NULL);
- /* interpolate middle frame's model parameters for adjacent frames */
+ /* only need to zero these out due to (unused) snr calculation */
- interpolate_lsp(&model_interp, &c2->prev_model, &model,
- c2->prev_lsps, c2->prev_energy, lsps, energy, ak_interp);
- apply_lpc_correction(&model_interp);
+ for(i=0; i<4; i++)
+ for(j=1; j<=MAX_AMP; j++)
+ model[i].A[j] = 0.0;
+
+ /* unpack bits from channel ------------------------------------*/
+
+ /* this will partially fill the model params for the 4 x 10ms
+ frames */
+
+ model[0].voiced = unpack(bits, &nbit, 1);
+
+ model[1].voiced = unpack(bits, &nbit, 1);
+ WoE_index = unpack(bits, &nbit, WO_E_BITS);
+ decode_WoE(&model[1], &e[1], c2->xq_dec, WoE_index);
+
+ model[2].voiced = unpack(bits, &nbit, 1);
+
+ model[3].voiced = unpack(bits, &nbit, 1);
+ WoE_index = unpack(bits, &nbit, WO_E_BITS);
+ decode_WoE(&model[3], &e[3], c2->xq_dec, WoE_index);
+
+ for(i=0; i<LSP_PRED_VQ_INDEXES; i++) {
+ lsp_indexes[i] = unpack(bits, &nbit, lsp_pred_vq_bits(i));
+ }
+ decode_lsps_vq(lsp_indexes, &lsps[3][0], LPC_ORD);
+ check_lsp_order(&lsps[3][0], LPC_ORD);
+ bw_expand_lsps(&lsps[3][0], LPC_ORD, 50.0, 100.0);
+
+ /* interpolate ------------------------------------------------*/
+
+ /* Wo and energy are sampled every 20ms, so we interpolate just 1
+ 10ms frame between 20ms samples */
+
+ interp_Wo(&model[0], &c2->prev_model_dec, &model[1]);
+ e[0] = interp_energy(c2->prev_e_dec, e[1]);
+ interp_Wo(&model[2], &model[1], &model[3]);
+ e[2] = interp_energy(e[1], e[3]);
+
+ /* LSPs are sampled every 40ms so we interpolate the 3 frames in
+ between, then recover spectral amplitudes */
+
+ for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
+ interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight);
+ }
+ for(i=0; i<4; i++) {
+ lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD);
+ aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD, &model[i], e[i], &snr, 0, 0,
+ c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma);
+ apply_lpc_correction(&model[i]);
+ }
- /* synthesis two 10ms frames */
+ /* synthesise ------------------------------------------------*/
- synthesise_one_frame(c2, speech, &model_interp, ak_interp);
- synthesise_one_frame(c2, &speech[N], &model, ak);
+ for(i=0; i<4; i++)
+ synthesise_one_frame(c2, &speech[N*i], &model[i], &ak[i][0]);
- /* update memories (decode states) for next time */
+ /* update memories for next frame ----------------------------*/
- memcpy(&c2->prev_model, &model, sizeof(MODEL));
- memcpy(c2->prev_lsps, lsps, sizeof(lsps));
- c2->prev_energy = energy;
+ c2->prev_model_dec = model[3];
+ c2->prev_e_dec = e[3];
+ for(i=0; i<LPC_ORD; i++)
+ c2->prev_lsps_dec[i] = lsps[3][i];
}
+
/*---------------------------------------------------------------------------*\
FUNCTION....: synthesise_one_frame()
@@ -279,13 +1304,30 @@ void codec2_decode(void *codec2_state, short speech[],
\*---------------------------------------------------------------------------*/
-void synthesise_one_frame(CODEC2 *c2, short speech[], MODEL *model, float ak[])
+void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, float ak[])
{
int i;
+ TIMER_VAR(phase_start, pf_start, synth_start);
+
+ #ifdef DUMP
+ dump_quantised_model(model);
+ #endif
+
+ TIMER_SAMPLE(phase_start);
+
+ phase_synth_zero_order(c2->fft_fwd_cfg, model, ak, &c2->ex_phase, LPC_ORD);
+
+ TIMER_SAMPLE_AND_LOG(pf_start,phase_start, " phase_synth");
- phase_synth_zero_order(model, ak, &c2->ex_phase, LPC_ORD);
postfilter(model, &c2->bg_est);
- synthesise(c2->Sn_, model, c2->Pn, 1);
+
+ TIMER_SAMPLE_AND_LOG(synth_start, pf_start, " postfilter");
+
+ synthesise(c2->fft_inv_cfg, c2->Sn_, model, c2->Pn, 1);
+
+ TIMER_SAMPLE_AND_LOG2(synth_start, " synth");
+
+ ear_protection(c2->Sn_, N);
for(i=0; i<N; i++) {
if (c2->Sn_[i] > 32767.0)
@@ -309,13 +1351,14 @@ void synthesise_one_frame(CODEC2 *c2, short speech[], MODEL *model, float ak[])
\*---------------------------------------------------------------------------*/
-void analyse_one_frame(CODEC2 *c2, MODEL *model, short speech[])
+void analyse_one_frame(struct CODEC2 *c2, MODEL *model, short speech[])
{
COMP Sw[FFT_ENC];
COMP Sw_[FFT_ENC];
COMP Ew[FFT_ENC];
float pitch;
int i;
+ TIMER_VAR(dft_start, nlp_start, model_start, two_stage, estamps);
/* Read input speech */
@@ -324,19 +1367,155 @@ void analyse_one_frame(CODEC2 *c2, MODEL *model, short speech[])
for(i=0; i<N; i++)
c2->Sn[i+M-N] = speech[i];
- dft_speech(Sw, c2->Sn, c2->w);
+ TIMER_SAMPLE(dft_start);
+ dft_speech(c2->fft_fwd_cfg, Sw, c2->Sn, c2->w);
+ TIMER_SAMPLE_AND_LOG(nlp_start, dft_start, " dft_speech");
/* Estimate pitch */
- nlp(c2->nlp,c2->Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&c2->prev_Wo);
+ nlp(c2->nlp,c2->Sn,N,P_MIN,P_MAX,&pitch,Sw, c2->W, &c2->prev_Wo_enc);
+ TIMER_SAMPLE_AND_LOG(model_start, nlp_start, " nlp");
+
model->Wo = TWO_PI/pitch;
model->L = PI/model->Wo;
/* estimate model parameters */
two_stage_pitch_refinement(model, Sw);
- estimate_amplitudes(model, Sw, c2->W);
- est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo);
+ TIMER_SAMPLE_AND_LOG(two_stage, model_start, " two_stage");
+ estimate_amplitudes(model, Sw, c2->W, 0);
+ TIMER_SAMPLE_AND_LOG(estamps, two_stage, " est_amps");
+ est_voicing_mbe(model, Sw, c2->W, Sw_, Ew, c2->prev_Wo_enc);
+ c2->prev_Wo_enc = model->Wo;
+ TIMER_SAMPLE_AND_LOG2(estamps, " est_voicing");
+ #ifdef DUMP
+ dump_model(model);
+ #endif
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: ear_protection()
+ AUTHOR......: David Rowe
+ DATE CREATED: Nov 7 2012
+
+ Limits output level to protect ears when there are bit errors or the input
+ is overdriven. This doesn't correct or mask bit erros, just reduces the
+ worst of their damage.
+
+\*---------------------------------------------------------------------------*/
+
+static void ear_protection(float in_out[], int n) {
+ float max_sample, over, gain;
+ int i;
+
+ /* find maximum sample in frame */
+
+ max_sample = 0.0;
+ for(i=0; i<n; i++)
+ if (in_out[i] > max_sample)
+ max_sample = in_out[i];
+
+ /* determine how far above set point */
+
+ over = max_sample/30000.0;
+
+ /* If we are x dB over set point we reduce level by 2x dB, this
+ attenuates major excursions in amplitude (likely to be caused
+ by bit errors) more than smaller ones */
+
+ if (over > 1.0) {
+ gain = 1.0/(over*over);
+ //fprintf(stderr, "gain: %f\n", gain);
+ for(i=0; i<n; i++)
+ in_out[i] *= gain;
+ }
+}
+
+void CODEC2_WIN32SUPPORT codec2_set_lpc_post_filter(struct CODEC2 *c2, int enable, int bass_boost, float beta, float gamma)
+{
+ assert((beta >= 0.0) && (beta <= 1.0));
+ assert((gamma >= 0.0) && (gamma <= 1.0));
+ c2->lpc_pf = enable;
+ c2->bass_boost = bass_boost;
+ c2->beta = beta;
+ c2->gamma = gamma;
+}
+
+/*
+ Allows optional stealing of one of the voicing bits for use as a
+ spare bit, only 1300 & 1400 & 1600 bit/s supported for now.
+ Experimental method of sending voice/data frames for FreeDV.
+*/
+
+int CODEC2_WIN32SUPPORT codec2_get_spare_bit_index(struct CODEC2 *c2)
+{
+ assert(c2 != NULL);
+
+ switch(c2->mode) {
+ case CODEC2_MODE_1300:
+ return 2; // bit 2 (3th bit) is v2 (third voicing bit)
+ break;
+ case CODEC2_MODE_1400:
+ return 10; // bit 10 (11th bit) is v2 (third voicing bit)
+ break;
+ case CODEC2_MODE_1600:
+ return 15; // bit 15 (16th bit) is v2 (third voicing bit)
+ break;
+ }
+
+ return -1;
+}
+
+/*
+ Reconstructs the spare voicing bit. Note works on unpacked bits
+ for convenience.
+*/
+
+int CODEC2_WIN32SUPPORT codec2_rebuild_spare_bit(struct CODEC2 *c2, int unpacked_bits[])
+{
+ int v1,v3;
+
+ assert(c2 != NULL);
+
+ v1 = unpacked_bits[1];
+
+ switch(c2->mode) {
+ case CODEC2_MODE_1300:
+
+ v3 = unpacked_bits[1+1+1];
+
+ /* if either adjacent frame is voiced, make this one voiced */
+
+ unpacked_bits[2] = (v1 || v3);
+
+ return 0;
+
+ break;
+
+ case CODEC2_MODE_1400:
+
+ v3 = unpacked_bits[1+1+8+1];
+
+ /* if either adjacent frame is voiced, make this one voiced */
+
+ unpacked_bits[10] = (v1 || v3);
+
+ return 0;
+
+ break;
+
+ case CODEC2_MODE_1600:
+ v3 = unpacked_bits[1+1+8+5+1];
+
+ /* if either adjacent frame is voiced, make this one voiced */
+
+ unpacked_bits[15] = (v1 || v3);
+
+ return 0;
+
+ break;
+ }
- c2->prev_Wo = model->Wo;
+ return -1;
}