summaryrefslogtreecommitdiff
path: root/gr-vocoder/lib/codec2/quantise.c
diff options
context:
space:
mode:
Diffstat (limited to 'gr-vocoder/lib/codec2/quantise.c')
-rw-r--r--gr-vocoder/lib/codec2/quantise.c1739
1 files changed, 1429 insertions, 310 deletions
diff --git a/gr-vocoder/lib/codec2/quantise.c b/gr-vocoder/lib/codec2/quantise.c
index 25f26066ed..6423dc83df 100644
--- a/gr-vocoder/lib/codec2/quantise.c
+++ b/gr-vocoder/lib/codec2/quantise.c
@@ -36,7 +36,9 @@
#include "quantise.h"
#include "lpc.h"
#include "lsp.h"
-#include "fft.h"
+#include "kiss_fft.h"
+#undef TIMER
+#include "machdep.h"
#define LSP_DELTA1 0.01 /* grid spacing for LSP root searches */
@@ -59,38 +61,20 @@ int lsp_bits(int i) {
return lsp_cb[i].log2m;
}
-#if VECTOR_QUANTISATION
-/*---------------------------------------------------------------------------*\
-
- quantise_uniform
-
- Simulates uniform quantising of a float.
-
-\*---------------------------------------------------------------------------*/
-
-void quantise_uniform(float *val, float min, float max, int bits)
-{
- int levels = 1 << (bits-1);
- float norm;
- int index;
-
- /* hard limit to quantiser range */
-
- printf("min: %f max: %f val: %f ", min, max, val[0]);
- if (val[0] < min) val[0] = min;
- if (val[0] > max) val[0] = max;
-
- norm = (*val - min)/(max-min);
- printf("%f norm: %f ", val[0], norm);
- index = fabs(levels*norm + 0.5);
-
- *val = min + index*(max-min)/levels;
-
- printf("index %d val_: %f\n", index, val[0]);
+int lspd_bits(int i) {
+ return lsp_cbd[i].log2m;
}
+#ifdef __EXPERIMENTAL__
+int lspdt_bits(int i) {
+ return lsp_cbdt[i].log2m;
+}
#endif
+int lsp_pred_vq_bits(int i) {
+ return lsp_cbjvm[i].log2m;
+}
+
/*---------------------------------------------------------------------------*\
quantise_init
@@ -127,13 +111,16 @@ long quantise(const float * cb, float vec[], float w[], int k, int m, float *se)
float beste; /* best error so far */
long j;
int i;
+ float diff;
besti = 0;
beste = 1E32;
for(j=0; j<m; j++) {
e = 0.0;
- for(i=0; i<k; i++)
- e += pow((cb[j*k+i]-vec[i])*w[i],2.0);
+ for(i=0; i<k; i++) {
+ diff = cb[j*k+i]-vec[i];
+ e += powf(diff*w[i],2.0);
+ }
if (e < beste) {
beste = e;
besti = j;
@@ -147,16 +134,16 @@ long quantise(const float * cb, float vec[], float w[], int k, int m, float *se)
/*---------------------------------------------------------------------------*\
- lspd_quantise
+ encode_lspds_scalar()
- Scalar lsp difference quantiser.
+ Scalar/VQ LSP difference quantiser.
\*---------------------------------------------------------------------------*/
-void lspd_quantise(
- float lsp[],
- float lsp_[],
- int order
+void encode_lspds_scalar(
+ int indexes[],
+ float lsp[],
+ int order
)
{
int i,k,m;
@@ -164,10 +151,15 @@ void lspd_quantise(
float lsp__hz[LPC_MAX];
float dlsp[LPC_MAX];
float dlsp_[LPC_MAX];
- float wt[1];
+ float wt[LPC_MAX];
const float *cb;
- float se = 0.0;
- int indexes[LPC_MAX];
+ float se;
+
+ assert(order == LPC_ORD);
+
+ for(i=0; i<order; i++) {
+ wt[i] = 1.0;
+ }
/* convert from radians to Hz so we can use human readable
frequencies */
@@ -175,14 +167,13 @@ void lspd_quantise(
for(i=0; i<order; i++)
lsp_hz[i] = (4000.0/PI)*lsp[i];
- dlsp[0] = lsp_hz[0];
- for(i=1; i<order; i++)
- dlsp[i] = lsp_hz[i] - lsp_hz[i-1];
-
- /* simple uniform scalar quantisers */
+ //printf("\n");
wt[0] = 1.0;
for(i=0; i<order; i++) {
+
+ /* find difference from previous qunatised lsp */
+
if (i)
dlsp[i] = lsp_hz[i] - lsp__hz[i-1];
else
@@ -194,129 +185,565 @@ void lspd_quantise(
indexes[i] = quantise(cb, &dlsp[i], wt, k, m, &se);
dlsp_[i] = cb[indexes[i]*k];
+
if (i)
lsp__hz[i] = lsp__hz[i-1] + dlsp_[i];
else
lsp__hz[0] = dlsp_[0];
+
+ //printf("%d lsp %3.2f dlsp %3.2f dlsp_ %3.2f lsp_ %3.2f\n", i, lsp_hz[i], dlsp[i], dlsp_[i], lsp__hz[i]);
}
- for(; i<order; i++)
- lsp__hz[i] = lsp__hz[i-1] + dlsp[i];
- /* convert back to radians */
+}
+
+void decode_lspds_scalar(
+ float lsp_[],
+ int indexes[],
+ int order
+)
+{
+ int i,k;
+ float lsp__hz[LPC_MAX];
+ float dlsp_[LPC_MAX];
+ const float *cb;
+
+ assert(order == LPC_ORD);
+
+ for(i=0; i<order; i++) {
+
+ k = lsp_cbd[i].k;
+ cb = lsp_cbd[i].cb;
+ dlsp_[i] = cb[indexes[i]*k];
+
+ if (i)
+ lsp__hz[i] = lsp__hz[i-1] + dlsp_[i];
+ else
+ lsp__hz[0] = dlsp_[0];
- for(i=0; i<order; i++)
lsp_[i] = (PI/4000.0)*lsp__hz[i];
+
+ //printf("%d dlsp_ %3.2f lsp_ %3.2f\n", i, dlsp_[i], lsp__hz[i]);
+ }
+
}
+#ifdef __EXPERIMENTAL__
/*---------------------------------------------------------------------------*\
- lspd_vq_quantise
+ lspvq_quantise
- Vector lsp difference quantiser.
+ Vector LSP quantiser.
\*---------------------------------------------------------------------------*/
-void lspdvq_quantise(
+void lspvq_quantise(
float lsp[],
float lsp_[],
int order
)
{
int i,k,m,ncb, nlsp;
- float dlsp[LPC_MAX];
- float dlsp_[LPC_MAX];
- float wt[LPC_ORD];
+ float wt[LPC_ORD], lsp_hz[LPC_ORD];
const float *cb;
- float se = 0.0;
+ float se;
int index;
- dlsp[0] = lsp[0];
- for(i=1; i<order; i++)
- dlsp[i] = lsp[i] - lsp[i-1];
+ for(i=0; i<LPC_ORD; i++) {
+ wt[i] = 1.0;
+ lsp_hz[i] = 4000.0*lsp[i]/PI;
+ }
- for(i=0; i<order; i++)
- dlsp_[i] = dlsp[i];
+ /* scalar quantise LSPs 1,2,3,4 */
- for(i=0; i<order; i++)
+ /* simple uniform scalar quantisers */
+
+ for(i=0; i<4; i++) {
+ k = lsp_cb[i].k;
+ m = lsp_cb[i].m;
+ cb = lsp_cb[i].cb;
+ index = quantise(cb, &lsp_hz[i], wt, k, m, &se);
+ lsp_[i] = cb[index*k]*PI/4000.0;
+ }
+
+ //#define WGHT
+#ifdef WGHT
+ for(i=4; i<9; i++) {
+ wt[i] = 1.0/(lsp[i]-lsp[i-1]) + 1.0/(lsp[i+1]-lsp[i]);
+ //printf("wt[%d] = %f\n", i, wt[i]);
+ }
+ wt[9] = 1.0/(lsp[i]-lsp[i-1]);
+#endif
+
+ /* VQ LSPs 5,6,7,8,9,10 */
+
+ ncb = 4;
+ nlsp = 4;
+ k = lsp_cbjnd[ncb].k;
+ m = lsp_cbjnd[ncb].m;
+ cb = lsp_cbjnd[ncb].cb;
+ index = quantise(cb, &lsp_hz[nlsp], &wt[nlsp], k, m, &se);
+ for(i=4; i<LPC_ORD; i++) {
+ lsp_[i] = cb[index*k+i-4]*(PI/4000.0);
+ //printf("%4.f (%4.f) ", lsp_hz[i], cb[index*k+i-4]);
+ }
+}
+
+/*---------------------------------------------------------------------------*\
+
+ lspjnd_quantise
+
+ Experimental JND LSP quantiser.
+
+\*---------------------------------------------------------------------------*/
+
+void lspjnd_quantise(float lsps[], float lsps_[], int order)
+{
+ int i,k,m;
+ float wt[LPC_ORD], lsps_hz[LPC_ORD];
+ const float *cb;
+ float se = 0.0;
+ int index;
+
+ for(i=0; i<LPC_ORD; i++) {
wt[i] = 1.0;
+ }
- /* scalar quantise dLSPs 1,2,3,4,5 */
+ /* convert to Hz */
- for(i=0; i<5; i++) {
- if (i)
- dlsp[i] = (lsp[i] - lsp_[i-1])*4000.0/PI;
- else
- dlsp[0] = lsp[0]*4000.0/PI;
+ for(i=0; i<LPC_ORD; i++) {
+ lsps_hz[i] = lsps[i]*(4000.0/PI);
+ lsps_[i] = lsps[i];
+ }
- k = lsp_cbdvq[i].k;
- m = lsp_cbdvq[i].m;
- cb = lsp_cbdvq[i].cb;
- index = quantise(cb, &dlsp[i], wt, k, m, &se);
- dlsp_[i] = cb[index*k]*PI/4000.0;
+ /* simple uniform scalar quantisers */
- if (i)
- lsp_[i] = lsp_[i-1] + dlsp_[i];
- else
- lsp_[0] = dlsp_[0];
- }
- dlsp[i] = lsp[i] - lsp_[i-1];
- dlsp_[i] = dlsp[i];
-
- //printf("lsp[0] %f lsp_[0] %f\n", lsp[0], lsp_[0]);
- //printf("lsp[1] %f lsp_[1] %f\n", lsp[1], lsp_[1]);
-
-#ifdef TT
- /* VQ dLSPs 3,4,5 */
-
- ncb = 2;
- nlsp = 2;
- k = lsp_cbdvq[ncb].k;
- m = lsp_cbdvq[ncb].m;
- cb = lsp_cbdvq[ncb].cb;
- index = quantise(cb, &dlsp[nlsp], wt, k, m, &se);
- dlsp_[nlsp] = cb[index*k];
- dlsp_[nlsp+1] = cb[index*k+1];
- dlsp_[nlsp+2] = cb[index*k+2];
-
- lsp_[0] = dlsp_[0];
- for(i=1; i<5; i++)
- lsp_[i] = lsp_[i-1] + dlsp_[i];
- dlsp[i] = lsp[i] - lsp_[i-1];
- dlsp_[i] = dlsp[i];
+ for(i=0; i<4; i++) {
+ k = lsp_cbjnd[i].k;
+ m = lsp_cbjnd[i].m;
+ cb = lsp_cbjnd[i].cb;
+ index = quantise(cb, &lsps_hz[i], wt, k, m, &se);
+ lsps_[i] = cb[index*k]*(PI/4000.0);
+ }
+
+ /* VQ LSPs 5,6,7,8,9,10 */
+
+ k = lsp_cbjnd[4].k;
+ m = lsp_cbjnd[4].m;
+ cb = lsp_cbjnd[4].cb;
+ index = quantise(cb, &lsps_hz[4], &wt[4], k, m, &se);
+ //printf("k = %d m = %d c[0] %f cb[k] %f\n", k,m,cb[0],cb[k]);
+ //printf("index = %4d: ", index);
+ for(i=4; i<LPC_ORD; i++) {
+ lsps_[i] = cb[index*k+i-4]*(PI/4000.0);
+ //printf("%4.f (%4.f) ", lsps_hz[i], cb[index*k+i-4]);
+ }
+ //printf("\n");
+}
+
+void compute_weights(const float *x, float *w, int ndim);
+
+/*---------------------------------------------------------------------------*\
+
+ lspdt_quantise
+
+ LSP difference in time quantiser. Split VQ, encoding LSPs 1-4 with
+ one VQ, and LSPs 5-10 with a second. Update of previous lsp memory
+ is done outside of this function to handle dT between 10 or 20ms
+ frames.
+
+ mode action
+ ------------------
+
+ LSPDT_ALL VQ LSPs 1-4 and 5-10
+ LSPDT_LOW Just VQ LSPs 1-4, for LSPs 5-10 just copy previous
+ LSPDT_HIGH Just VQ LSPs 5-10, for LSPs 1-4 just copy previous
+
+\*---------------------------------------------------------------------------*/
+
+void lspdt_quantise(float lsps[], float lsps_[], float lsps__prev[], int mode)
+{
+ int i;
+ float wt[LPC_ORD];
+ float lsps_dt[LPC_ORD];
+#ifdef TRY_LSPDT_VQ
+ int k,m;
+ int index;
+ const float *cb;
+ float se = 0.0;
+#endif // TRY_LSPDT_VQ
+
+ //compute_weights(lsps, wt, LPC_ORD);
+ for(i=0; i<LPC_ORD; i++) {
+ wt[i] = 1.0;
+ }
+
+ //compute_weights(lsps, wt, LPC_ORD );
+
+ for(i=0; i<LPC_ORD; i++) {
+ lsps_dt[i] = lsps[i] - lsps__prev[i];
+ lsps_[i] = lsps__prev[i];
+ }
+
+ //#define TRY_LSPDT_VQ
+#ifdef TRY_LSPDT_VQ
+ /* this actually improves speech a bit, but 40ms updates works surprsingly well.... */
+ k = lsp_cbdt[0].k;
+ m = lsp_cbdt[0].m;
+ cb = lsp_cbdt[0].cb;
+ index = quantise(cb, lsps_dt, wt, k, m, &se);
+ for(i=0; i<LPC_ORD; i++) {
+ lsps_[i] += cb[index*k + i];
+ }
#endif
- /* VQ dLSPs 6,7,8,9,10 */
- ncb = 5;
- nlsp = 5;
- k = lsp_cbdvq[ncb].k;
- m = lsp_cbdvq[ncb].m;
- cb = lsp_cbdvq[ncb].cb;
- index = quantise(cb, &dlsp[nlsp], wt, k, m, &se);
- dlsp_[nlsp] = cb[index*k];
- dlsp_[nlsp+1] = cb[index*k+1];
- dlsp_[nlsp+2] = cb[index*k+2];
- dlsp_[nlsp+3] = cb[index*k+3];
- dlsp_[nlsp+4] = cb[index*k+4];
+}
+#endif
+
+#define MIN(a,b) ((a)<(b)?(a):(b))
+#define MAX_ENTRIES 16384
+
+void compute_weights(const float *x, float *w, int ndim)
+{
+ int i;
+ w[0] = MIN(x[0], x[1]-x[0]);
+ for (i=1;i<ndim-1;i++)
+ w[i] = MIN(x[i]-x[i-1], x[i+1]-x[i]);
+ w[ndim-1] = MIN(x[ndim-1]-x[ndim-2], PI-x[ndim-1]);
+
+ for (i=0;i<ndim;i++)
+ w[i] = 1./(.01+w[i]);
+ //w[0]*=3;
+ //w[1]*=2;
+}
+
+/* LSP weight calculation ported from m-file function kindly submitted
+ by Anssi, OH3GDD */
+
+void compute_weights_anssi_mode2(const float *x, float *w, int ndim)
+{
+ int i;
+ float d[LPC_ORD];
+
+ assert(ndim == LPC_ORD);
+
+ for(i=0; i<LPC_ORD; i++)
+ d[i] = 1.0;
+
+ d[0] = x[1];
+ for (i=1; i<LPC_ORD-1; i++)
+ d[i] = x[i+1] - x[i-1];
+ d[LPC_ORD-1] = PI - x[8];
+ for (i=0; i<LPC_ORD; i++) {
+ if (x[i]<((400.0/4000.0)*PI))
+ w[i]=5.0/(0.01+d[i]);
+ else if (x[i]<((700.0/4000.0)*PI))
+ w[i]=4.0/(0.01+d[i]);
+ else if (x[i]<((1200.0/4000.0)*PI))
+ w[i]=3.0/(0.01+d[i]);
+ else if (x[i]<((2000.0/4000.0)*PI))
+ w[i]=2.0/(0.01+d[i]);
+ else
+ w[i]=1.0/(0.01+d[i]);
+
+ w[i]=pow(w[i]+0.3, 0.66);
+ }
+}
+
+int find_nearest(const float *codebook, int nb_entries, float *x, int ndim)
+{
+ int i, j;
+ float min_dist = 1e15;
+ int nearest = 0;
+
+ for (i=0;i<nb_entries;i++)
+ {
+ float dist=0;
+ for (j=0;j<ndim;j++)
+ dist += (x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);
+ if (dist<min_dist)
+ {
+ min_dist = dist;
+ nearest = i;
+ }
+ }
+ return nearest;
+}
+
+int find_nearest_weighted(const float *codebook, int nb_entries, float *x, const float *w, int ndim)
+{
+ int i, j;
+ float min_dist = 1e15;
+ int nearest = 0;
+
+ for (i=0;i<nb_entries;i++)
+ {
+ float dist=0;
+ for (j=0;j<ndim;j++)
+ dist += w[j]*(x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);
+ if (dist<min_dist)
+ {
+ min_dist = dist;
+ nearest = i;
+ }
+ }
+ return nearest;
+}
+
+void lspjvm_quantise(float *x, float *xq, int ndim)
+{
+ int i, n1, n2, n3;
+ float err[LPC_ORD], err2[LPC_ORD], err3[LPC_ORD];
+ float w[LPC_ORD], w2[LPC_ORD], w3[LPC_ORD];
+ const float *codebook1 = lsp_cbjvm[0].cb;
+ const float *codebook2 = lsp_cbjvm[1].cb;
+ const float *codebook3 = lsp_cbjvm[2].cb;
+
+ w[0] = MIN(x[0], x[1]-x[0]);
+ for (i=1;i<ndim-1;i++)
+ w[i] = MIN(x[i]-x[i-1], x[i+1]-x[i]);
+ w[ndim-1] = MIN(x[ndim-1]-x[ndim-2], PI-x[ndim-1]);
+
+ compute_weights(x, w, ndim);
+
+ n1 = find_nearest(codebook1, lsp_cbjvm[0].m, x, ndim);
+
+ for (i=0;i<ndim;i++)
+ {
+ xq[i] = codebook1[ndim*n1+i];
+ err[i] = x[i] - xq[i];
+ }
+ for (i=0;i<ndim/2;i++)
+ {
+ err2[i] = err[2*i];
+ err3[i] = err[2*i+1];
+ w2[i] = w[2*i];
+ w3[i] = w[2*i+1];
+ }
+ n2 = find_nearest_weighted(codebook2, lsp_cbjvm[1].m, err2, w2, ndim/2);
+ n3 = find_nearest_weighted(codebook3, lsp_cbjvm[2].m, err3, w3, ndim/2);
+
+ for (i=0;i<ndim/2;i++)
+ {
+ xq[2*i] += codebook2[ndim*n2/2+i];
+ xq[2*i+1] += codebook3[ndim*n3/2+i];
+ }
+}
+
+#ifdef __EXPERIMENTAL__
+
+#define MBEST_STAGES 4
+
+struct MBEST_LIST {
+ int index[MBEST_STAGES]; /* index of each stage that lead us to this error */
+ float error;
+};
+
+struct MBEST {
+ int entries; /* number of entries in mbest list */
+ struct MBEST_LIST *list;
+};
+
+
+static struct MBEST *mbest_create(int entries) {
+ int i,j;
+ struct MBEST *mbest;
+
+ assert(entries > 0);
+ mbest = (struct MBEST *)malloc(sizeof(struct MBEST));
+ assert(mbest != NULL);
+
+ mbest->entries = entries;
+ mbest->list = (struct MBEST_LIST *)malloc(entries*sizeof(struct MBEST_LIST));
+ assert(mbest->list != NULL);
+
+ for(i=0; i<mbest->entries; i++) {
+ for(j=0; j<MBEST_STAGES; j++)
+ mbest->list[i].index[j] = 0;
+ mbest->list[i].error = 1E32;
+ }
+
+ return mbest;
+}
+
+
+static void mbest_destroy(struct MBEST *mbest) {
+ assert(mbest != NULL);
+ free(mbest->list);
+ free(mbest);
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ mbest_insert
+
+ Insert the results of a vector to codebook entry comparison. The
+ list is ordered in order or error, so those entries with the
+ smallest error will be first on the list.
+
+\*---------------------------------------------------------------------------*/
+
+static void mbest_insert(struct MBEST *mbest, int index[], float error) {
+ int i, j, found;
+ struct MBEST_LIST *list = mbest->list;
+ int entries = mbest->entries;
+
+ found = 0;
+ for(i=0; i<entries && !found; i++)
+ if (error < list[i].error) {
+ found = 1;
+ for(j=entries-1; j>i; j--)
+ list[j] = list[j-1];
+ for(j=0; j<MBEST_STAGES; j++)
+ list[i].index[j] = index[j];
+ list[i].error = error;
+ }
+}
+
- /* rebuild LSPs for dLSPs */
+static void mbest_print(char title[], struct MBEST *mbest) {
+ int i,j;
- lsp_[0] = dlsp_[0];
- for(i=1; i<order; i++)
- lsp_[i] = lsp_[i-1] + dlsp_[i];
+ printf("%s\n", title);
+ for(i=0; i<mbest->entries; i++) {
+ for(j=0; j<MBEST_STAGES; j++)
+ printf(" %4d ", mbest->list[i].index[j]);
+ printf(" %f\n", mbest->list[i].error);
+ }
}
-void check_lsp_order(float lsp[], int lpc_order)
+
+/*---------------------------------------------------------------------------*\
+
+ mbest_search
+
+ Searches vec[] to a codebbook of vectors, and maintains a list of the mbest
+ closest matches.
+
+\*---------------------------------------------------------------------------*/
+
+static void mbest_search(
+ const float *cb, /* VQ codebook to search */
+ float vec[], /* target vector */
+ float w[], /* weighting vector */
+ int k, /* dimension of vector */
+ int m, /* number on entries in codebook */
+ struct MBEST *mbest, /* list of closest matches */
+ int index[] /* indexes that lead us here */
+)
+{
+ float e;
+ int i,j;
+ float diff;
+
+ for(j=0; j<m; j++) {
+ e = 0.0;
+ for(i=0; i<k; i++) {
+ diff = cb[j*k+i]-vec[i];
+ e += pow(diff*w[i],2.0);
+ }
+ index[0] = j;
+ mbest_insert(mbest, index, e);
+ }
+}
+
+
+/* 3 stage VQ LSP quantiser. Design and guidance kindly submitted by Anssi, OH3GDD */
+
+void lspanssi_quantise(float *x, float *xq, int ndim, int mbest_entries)
+{
+ int i, j, n1, n2, n3, n4;
+ float w[LPC_ORD];
+ const float *codebook1 = lsp_cbvqanssi[0].cb;
+ const float *codebook2 = lsp_cbvqanssi[1].cb;
+ const float *codebook3 = lsp_cbvqanssi[2].cb;
+ const float *codebook4 = lsp_cbvqanssi[3].cb;
+ struct MBEST *mbest_stage1, *mbest_stage2, *mbest_stage3, *mbest_stage4;
+ float target[LPC_ORD];
+ int index[MBEST_STAGES];
+
+ mbest_stage1 = mbest_create(mbest_entries);
+ mbest_stage2 = mbest_create(mbest_entries);
+ mbest_stage3 = mbest_create(mbest_entries);
+ mbest_stage4 = mbest_create(mbest_entries);
+ for(i=0; i<MBEST_STAGES; i++)
+ index[i] = 0;
+
+ compute_weights_anssi_mode2(x, w, ndim);
+
+ #ifdef DUMP
+ dump_weights(w, ndim);
+ #endif
+
+ /* Stage 1 */
+
+ mbest_search(codebook1, x, w, ndim, lsp_cbvqanssi[0].m, mbest_stage1, index);
+ mbest_print("Stage 1:", mbest_stage1);
+
+ /* Stage 2 */
+
+ for (j=0; j<mbest_entries; j++) {
+ index[1] = n1 = mbest_stage1->list[j].index[0];
+ for(i=0; i<ndim; i++)
+ target[i] = x[i] - codebook1[ndim*n1+i];
+ mbest_search(codebook2, target, w, ndim, lsp_cbvqanssi[1].m, mbest_stage2, index);
+ }
+ mbest_print("Stage 2:", mbest_stage2);
+
+ /* Stage 3 */
+
+ for (j=0; j<mbest_entries; j++) {
+ index[2] = n1 = mbest_stage2->list[j].index[1];
+ index[1] = n2 = mbest_stage2->list[j].index[0];
+ for(i=0; i<ndim; i++)
+ target[i] = x[i] - codebook1[ndim*n1+i] - codebook2[ndim*n2+i];
+ mbest_search(codebook3, target, w, ndim, lsp_cbvqanssi[2].m, mbest_stage3, index);
+ }
+ mbest_print("Stage 3:", mbest_stage3);
+
+ /* Stage 4 */
+
+ for (j=0; j<mbest_entries; j++) {
+ index[3] = n1 = mbest_stage3->list[j].index[2];
+ index[2] = n2 = mbest_stage3->list[j].index[1];
+ index[1] = n3 = mbest_stage3->list[j].index[0];
+ for(i=0; i<ndim; i++)
+ target[i] = x[i] - codebook1[ndim*n1+i] - codebook2[ndim*n2+i] - codebook3[ndim*n3+i];
+ mbest_search(codebook4, target, w, ndim, lsp_cbvqanssi[3].m, mbest_stage4, index);
+ }
+ mbest_print("Stage 4:", mbest_stage4);
+
+ n1 = mbest_stage4->list[0].index[3];
+ n2 = mbest_stage4->list[0].index[2];
+ n3 = mbest_stage4->list[0].index[1];
+ n4 = mbest_stage4->list[0].index[0];
+ for (i=0;i<ndim;i++)
+ xq[i] = codebook1[ndim*n1+i] + codebook2[ndim*n2+i] + codebook3[ndim*n3+i] + codebook4[ndim*n4+i];
+
+ mbest_destroy(mbest_stage1);
+ mbest_destroy(mbest_stage2);
+ mbest_destroy(mbest_stage3);
+ mbest_destroy(mbest_stage4);
+}
+#endif
+
+int check_lsp_order(float lsp[], int lpc_order)
{
int i;
float tmp;
+ int swaps = 0;
for(i=1; i<lpc_order; i++)
if (lsp[i] < lsp[i-1]) {
- printf("swap %d\n",i);
+ //fprintf(stderr, "swap %d\n",i);
+ swaps++;
tmp = lsp[i-1];
- lsp[i-1] = lsp[i]-0.05;
- lsp[i] = tmp+0.05;
+ lsp[i-1] = lsp[i]-0.1;
+ lsp[i] = tmp+0.1;
+ i = 1; /* start check again, as swap may have caused out of order */
}
+
+ return swaps;
}
void force_min_lsp_dist(float lsp[], int lpc_order)
@@ -329,131 +756,159 @@ void force_min_lsp_dist(float lsp[], int lpc_order)
}
}
+
/*---------------------------------------------------------------------------*\
- lpc_model_amplitudes
+ lpc_post_filter()
+
+ Applies a post filter to the LPC synthesis filter power spectrum
+ Pw, which supresses the inter-formant energy.
+
+ The algorithm is from p267 (Section 8.6) of "Digital Speech",
+ edited by A.M. Kondoz, 1994 published by Wiley and Sons. Chapter 8
+ of this text is on the MBE vocoder, and this is a freq domain
+ adaptation of post filtering commonly used in CELP.
- Derive a LPC model for amplitude samples then estimate amplitude samples
- from this model with optional LSP quantisation.
+ I used the Octave simulation lpcpf.m to get an understaing of the
+ algorithm.
- Returns the spectral distortion for this frame.
+ Requires two more FFTs which is significantly more MIPs. However
+ it should be possible to implement this more efficiently in the
+ time domain. Just not sure how to handle relative time delays
+ between the synthesis stage and updating these coeffs. A smaller
+ FFT size might also be accetable to save CPU.
+
+ TODO:
+ [ ] sync var names between Octave and C version
+ [ ] doc gain normalisation
+ [ ] I think the first FFT is not rqd as we do the same
+ thing in aks_to_M2().
\*---------------------------------------------------------------------------*/
-float lpc_model_amplitudes(
- float Sn[], /* Input frame of speech samples */
- float w[],
- MODEL *model, /* sinusoidal model parameters */
- int order, /* LPC model order */
- int lsp_quant, /* optional LSP quantisation if non-zero */
- float ak[] /* output aks */
-)
+void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, MODEL *model, COMP Pw[], float ak[],
+ int order, int dump, float beta, float gamma, int bass_boost)
{
- float Wn[M];
- float R[LPC_MAX+1];
- float E;
- int i,j;
- float snr;
- float lsp[LPC_MAX];
- float lsp_hz[LPC_MAX];
- float lsp_[LPC_MAX];
- int roots; /* number of LSP roots found */
- int index;
- float se = 0.0;
- int k,m;
- const float * cb;
- float wt[LPC_MAX];
-
- for(i=0; i<M; i++)
- Wn[i] = Sn[i]*w[i];
- autocorrelate(Wn,R,M,order);
- levinson_durbin(R,ak,order);
-
- E = 0.0;
- for(i=0; i<=order; i++)
- E += ak[i]*R[i];
+ int i;
+ COMP x[FFT_ENC]; /* input to FFTs */
+ COMP Aw[FFT_ENC]; /* LPC analysis filter spectrum */
+ COMP Ww[FFT_ENC]; /* weighting spectrum */
+ float Rw[FFT_ENC]; /* R = WA */
+ float e_before, e_after, gain;
+ float Pfw[FFT_ENC]; /* Post filter mag spectrum */
+ float max_Rw, min_Rw;
+ float coeff;
+ TIMER_VAR(tstart, tfft1, taw, tfft2, tww, tr);
+
+ TIMER_SAMPLE(tstart);
+
+ /* Determine LPC inverse filter spectrum 1/A(exp(jw)) -----------*/
+
+ /* we actually want the synthesis filter A(exp(jw)) but the
+ inverse (analysis) filter is easier to find as it's FIR, we
+ just use the inverse of 1/A to get the synthesis filter
+ A(exp(jw)) */
+
+ for(i=0; i<FFT_ENC; i++) {
+ x[i].real = 0.0;
+ x[i].imag = 0.0;
+ }
- for(i=0; i<order; i++)
- wt[i] = 1.0;
+ for(i=0; i<=order; i++)
+ x[i].real = ak[i];
+ kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)x, (kiss_fft_cpx *)Aw);
- if (lsp_quant) {
- roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
- if (roots != order)
- printf("LSP roots not found\n");
+ TIMER_SAMPLE_AND_LOG(tfft1, tstart, " fft1");
- /* convert from radians to Hz to make quantisers more
- human readable */
+ for(i=0; i<FFT_ENC/2; i++) {
+ Aw[i].real = 1.0/(Aw[i].real*Aw[i].real + Aw[i].imag*Aw[i].imag);
+ }
- for(i=0; i<order; i++)
- lsp_hz[i] = (4000.0/PI)*lsp[i];
+ TIMER_SAMPLE_AND_LOG(taw, tfft1, " Aw");
- /* simple uniform scalar quantisers */
+ /* Determine weighting filter spectrum W(exp(jw)) ---------------*/
- for(i=0; i<10; i++) {
- k = lsp_cb[i].k;
- m = lsp_cb[i].m;
- cb = lsp_cb[i].cb;
- index = quantise(cb, &lsp_hz[i], wt, k, m, &se);
- lsp_hz[i] = cb[index*k];
+ for(i=0; i<FFT_ENC; i++) {
+ x[i].real = 0.0;
+ x[i].imag = 0.0;
}
- /* experiment: simulating uniform quantisation error
- for(i=0; i<order; i++)
- lsp[i] += PI*(12.5/4000.0)*(1.0 - 2.0*(float)rand()/RAND_MAX);
- */
-
- for(i=0; i<order; i++)
- lsp[i] = (PI/4000.0)*lsp_hz[i];
+ x[0].real = ak[0];
+ coeff = gamma;
+ for(i=1; i<=order; i++) {
+ x[i].real = ak[i] * coeff;
+ coeff *= gamma;
+ }
+ kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)x, (kiss_fft_cpx *)Ww);
- /* Bandwidth Expansion (BW). Prevents any two LSPs getting too
- close together after quantisation. We know from experiment
- that LSP quantisation errors < 12.5Hz (25Hz setp size) are
- inaudible so we use that as the minimum LSP separation.
- */
+ TIMER_SAMPLE_AND_LOG(tfft2, taw, " fft2");
- for(i=1; i<5; i++) {
- if (lsp[i] - lsp[i-1] < PI*(12.5/4000.0))
- lsp[i] = lsp[i-1] + PI*(12.5/4000.0);
+ for(i=0; i<FFT_ENC/2; i++) {
+ Ww[i].real = Ww[i].real*Ww[i].real + Ww[i].imag*Ww[i].imag;
}
- /* as quantiser gaps increased, larger BW expansion was required
- to prevent twinkly noises */
+ TIMER_SAMPLE_AND_LOG(tww, tfft2, " Ww");
+
+ /* Determined combined filter R = WA ---------------------------*/
+
+ max_Rw = 0.0; min_Rw = 1E32;
+ for(i=0; i<FFT_ENC/2; i++) {
+ Rw[i] = sqrtf(Ww[i].real * Aw[i].real);
+ if (Rw[i] > max_Rw)
+ max_Rw = Rw[i];
+ if (Rw[i] < min_Rw)
+ min_Rw = Rw[i];
- for(i=5; i<8; i++) {
- if (lsp[i] - lsp[i-1] < PI*(25.0/4000.0))
- lsp[i] = lsp[i-1] + PI*(25.0/4000.0);
}
- for(i=8; i<order; i++) {
- if (lsp[i] - lsp[i-1] < PI*(75.0/4000.0))
- lsp[i] = lsp[i-1] + PI*(75.0/4000.0);
+
+ TIMER_SAMPLE_AND_LOG(tr, tww, " R");
+
+ #ifdef DUMP
+ if (dump)
+ dump_Rw(Rw);
+ #endif
+
+ /* create post filter mag spectrum and apply ------------------*/
+
+ /* measure energy before post filtering */
+
+ e_before = 1E-4;
+ for(i=0; i<FFT_ENC/2; i++)
+ e_before += Pw[i].real;
+
+ /* apply post filter and measure energy */
+
+ #ifdef DUMP
+ if (dump)
+ dump_Pwb(Pw);
+ #endif
+
+ e_after = 1E-4;
+ for(i=0; i<FFT_ENC/2; i++) {
+ Pfw[i] = powf(Rw[i], beta);
+ Pw[i].real *= Pfw[i] * Pfw[i];
+ e_after += Pw[i].real;
}
+ gain = e_before/e_after;
- for(j=0; j<order; j++)
- lsp_[j] = lsp[j];
+ /* apply gain factor to normalise energy */
- lsp_to_lpc(lsp_, ak, order);
-#ifdef DUMP
- dump_lsp(lsp);
-#endif
- }
+ for(i=0; i<FFT_ENC/2; i++) {
+ Pw[i].real *= gain;
+ }
-#ifdef DUMP
- dump_E(E);
-#endif
- #ifdef SIM_QUANT
- /* simulated LPC energy quantisation */
- {
- float e = 10.0*log10(E);
- e += 2.0*(1.0 - 2.0*(float)rand()/RAND_MAX);
- E = pow(10.0,e/10.0);
- }
- #endif
+ if (bass_boost) {
+ /* add 3dB to first 1 kHz to account for LP effect of PF */
- aks_to_M2(ak,order,model,E,&snr, 1); /* {ak} -> {Am} LPC decode */
+ for(i=0; i<FFT_ENC/8; i++) {
+ Pw[i].real *= 1.4*1.4;
+ }
+ }
- return snr;
+ TIMER_SAMPLE_AND_LOG2(tr, " filt");
}
+
/*---------------------------------------------------------------------------*\
aks_to_M2()
@@ -465,61 +920,103 @@ float lpc_model_amplitudes(
\*---------------------------------------------------------------------------*/
void aks_to_M2(
- float ak[], /* LPC's */
- int order,
- MODEL *model, /* sinusoidal model parameters for this frame */
- float E, /* energy term */
- float *snr, /* signal to noise ratio for this frame in dB */
- int dump /* true to dump sample to dump file */
+ kiss_fft_cfg fft_fwd_cfg,
+ float ak[], /* LPC's */
+ int order,
+ MODEL *model, /* sinusoidal model parameters for this frame */
+ float E, /* energy term */
+ float *snr, /* signal to noise ratio for this frame in dB */
+ int dump, /* true to dump sample to dump file */
+ int sim_pf, /* true to simulate a post filter */
+ int pf, /* true to LPC post filter */
+ int bass_boost, /* enable LPC filter 0-1khz 3dB boost */
+ float beta,
+ float gamma /* LPC post filter parameters */
)
{
- COMP Pw[FFT_DEC]; /* power spectrum */
+ COMP pw[FFT_ENC]; /* input to FFT for power spectrum */
+ COMP Pw[FFT_ENC]; /* output power spectrum */
int i,m; /* loop variables */
int am,bm; /* limits of current band */
float r; /* no. rads/bin */
float Em; /* energy in band */
float Am; /* spectral amplitude sample */
float signal, noise;
+ TIMER_VAR(tstart, tfft, tpw, tpf);
+
+ TIMER_SAMPLE(tstart);
- r = TWO_PI/(FFT_DEC);
+ r = TWO_PI/(FFT_ENC);
/* Determine DFT of A(exp(jw)) --------------------------------------------*/
- for(i=0; i<FFT_DEC; i++) {
- Pw[i].real = 0.0;
- Pw[i].imag = 0.0;
+ for(i=0; i<FFT_ENC; i++) {
+ pw[i].real = 0.0;
+ pw[i].imag = 0.0;
}
for(i=0; i<=order; i++)
- Pw[i].real = ak[i];
- fft(&Pw[0].real,FFT_DEC,1);
+ pw[i].real = ak[i];
+ kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)pw, (kiss_fft_cpx *)Pw);
+
+ TIMER_SAMPLE_AND_LOG(tfft, tstart, " fft");
/* Determine power spectrum P(w) = E/(A(exp(jw))^2 ------------------------*/
- for(i=0; i<FFT_DEC/2; i++)
+ for(i=0; i<FFT_ENC/2; i++)
Pw[i].real = E/(Pw[i].real*Pw[i].real + Pw[i].imag*Pw[i].imag);
-#ifdef DUMP
+
+ TIMER_SAMPLE_AND_LOG(tpw, tfft, " Pw");
+
+ if (pf)
+ lpc_post_filter(fft_fwd_cfg, model, Pw, ak, order, dump, beta, gamma, bass_boost);
+
+ TIMER_SAMPLE_AND_LOG(tpf, tpw, " LPC post filter");
+
+ #ifdef DUMP
if (dump)
dump_Pw(Pw);
-#endif
+ #endif
- /* Determine magnitudes by linear interpolation of P(w) -------------------*/
+ /* Determine magnitudes from P(w) ----------------------------------------*/
- signal = noise = 0.0;
- for(m=1; m<=model->L; m++) {
- am = floor((m - 0.5)*model->Wo/r + 0.5);
- bm = floor((m + 0.5)*model->Wo/r + 0.5);
- Em = 0.0;
+ /* when used just by decoder {A} might be all zeroes so init signal
+ and noise to prevent log(0) errors */
- for(i=am; i<bm; i++)
- Em += Pw[i].real;
- Am = sqrt(Em);
+ signal = 1E-30; noise = 1E-32;
- signal += pow(model->A[m],2.0);
- noise += pow(model->A[m] - Am,2.0);
- model->A[m] = Am;
+ for(m=1; m<=model->L; m++) {
+ am = (int)((m - 0.5)*model->Wo/r + 0.5);
+ bm = (int)((m + 0.5)*model->Wo/r + 0.5);
+ Em = 0.0;
+
+ for(i=am; i<bm; i++)
+ Em += Pw[i].real;
+ Am = sqrtf(Em);
+
+ signal += model->A[m]*model->A[m];
+ noise += (model->A[m] - Am)*(model->A[m] - Am);
+
+ /* This code significantly improves perf of LPC model, in
+ particular when combined with phase0. The LPC spectrum tends
+ to track just under the peaks of the spectral envelope, and
+ just above nulls. This algorithm does the reverse to
+ compensate - raising the amplitudes of spectral peaks, while
+ attenuating the null. This enhances the formants, and
+ supresses the energy between formants. */
+
+ if (sim_pf) {
+ if (Am > model->A[m])
+ Am *= 0.7;
+ if (Am < model->A[m])
+ Am *= 1.4;
+ }
+
+ model->A[m] = Am;
}
- *snr = 10.0*log10(signal/noise);
+ *snr = 10.0*log10f(signal/noise);
+
+ TIMER_SAMPLE_AND_LOG2(tpf, " rec");
}
/*---------------------------------------------------------------------------*\
@@ -540,7 +1037,7 @@ int encode_Wo(float Wo)
float norm;
norm = (Wo - Wo_min)/(Wo_max - Wo_min);
- index = floor(WO_LEVELS * norm + 0.5);
+ index = floorf(WO_LEVELS * norm + 0.5);
if (index < 0 ) index = 0;
if (index > (WO_LEVELS-1)) index = WO_LEVELS-1;
@@ -572,6 +1069,84 @@ float decode_Wo(int index)
/*---------------------------------------------------------------------------*\
+ FUNCTION....: encode_Wo_dt()
+ AUTHOR......: David Rowe
+ DATE CREATED: 6 Nov 2011
+
+ Encodes Wo difference from last frame.
+
+\*---------------------------------------------------------------------------*/
+
+int encode_Wo_dt(float Wo, float prev_Wo)
+{
+ int index, mask, max_index, min_index;
+ float Wo_min = TWO_PI/P_MAX;
+ float Wo_max = TWO_PI/P_MIN;
+ float norm;
+
+ norm = (Wo - prev_Wo)/(Wo_max - Wo_min);
+ index = floor(WO_LEVELS * norm + 0.5);
+ //printf("ENC index: %d ", index);
+
+ /* hard limit */
+
+ max_index = (1 << (WO_DT_BITS-1)) - 1;
+ min_index = - (max_index+1);
+ if (index > max_index) index = max_index;
+ if (index < min_index) index = min_index;
+ //printf("max_index: %d min_index: %d hard index: %d ",
+ // max_index, min_index, index);
+
+ /* mask so that only LSB WO_DT_BITS remain, bit WO_DT_BITS is the sign bit */
+
+ mask = ((1 << WO_DT_BITS) - 1);
+ index &= mask;
+ //printf("mask: 0x%x index: 0x%x\n", mask, index);
+
+ return index;
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: decode_Wo_dt()
+ AUTHOR......: David Rowe
+ DATE CREATED: 6 Nov 2011
+
+ Decodes Wo using WO_DT_BITS difference from last frame.
+
+\*---------------------------------------------------------------------------*/
+
+float decode_Wo_dt(int index, float prev_Wo)
+{
+ float Wo_min = TWO_PI/P_MAX;
+ float Wo_max = TWO_PI/P_MIN;
+ float step;
+ float Wo;
+ int mask;
+
+ /* sign extend index */
+
+ //printf("DEC index: %d ");
+ if (index & (1 << (WO_DT_BITS-1))) {
+ mask = ~((1 << WO_DT_BITS) - 1);
+ index |= mask;
+ }
+ //printf("DEC mask: 0x%x index: %d \n", mask, index);
+
+ step = (Wo_max - Wo_min)/WO_LEVELS;
+ Wo = prev_Wo + step*(index);
+
+ /* bit errors can make us go out of range leading to all sorts of
+ probs like seg faults */
+
+ if (Wo > Wo_max) Wo = Wo_max;
+ if (Wo < Wo_min) Wo = Wo_min;
+
+ return Wo;
+}
+
+/*---------------------------------------------------------------------------*\
+
FUNCTION....: speech_to_uq_lsps()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
@@ -592,10 +1167,22 @@ float speech_to_uq_lsps(float lsp[],
int i, roots;
float Wn[M];
float R[LPC_MAX+1];
- float E;
+ float e, E;
- for(i=0; i<M; i++)
+ e = 0.0;
+ for(i=0; i<M; i++) {
Wn[i] = Sn[i]*w[i];
+ e += Wn[i]*Wn[i];
+ }
+
+ /* trap 0 energy case as LPC analysis will fail */
+
+ if (e == 0.0) {
+ for(i=0; i<order; i++)
+ lsp[i] = (PI/order)*(float)i;
+ return 0.0;
+ }
+
autocorrelate(Wn, R, M, order);
levinson_durbin(R, ak, order);
@@ -603,15 +1190,17 @@ float speech_to_uq_lsps(float lsp[],
for(i=0; i<=order; i++)
E += ak[i]*R[i];
+ /* 15 Hz BW expansion as I can't hear the difference and it may help
+ help occasional fails in the LSP root finding. Important to do this
+ after energy calculation to avoid -ve energy values.
+ */
+
+ for(i=0; i<=order; i++)
+ ak[i] *= powf(0.994,(float)i);
+
roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
if (roots != order) {
- /* for some reason LSP roots could not be found */
- /* some alpha testers are reporting this condition */
- fprintf(stderr, "LSP roots not found!\nroots = %d\n", roots);
- for(i=0; i<=order; i++)
- fprintf(stderr, "a[%d] = %f\n", i, ak[i]);
-
- /* some benign LSP values we can use instead */
+ /* if root finding fails use some benign LSP values instead */
for(i=0; i<order; i++)
lsp[i] = (PI/order)*(float)i;
}
@@ -621,22 +1210,22 @@ float speech_to_uq_lsps(float lsp[],
/*---------------------------------------------------------------------------*\
- FUNCTION....: encode_lsps()
+ FUNCTION....: encode_lsps_scalar()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
- From a vector of unquantised (floating point) LSPs finds the quantised
- LSP indexes.
+ Thirty-six bit sclar LSP quantiser. From a vector of unquantised
+ (floating point) LSPs finds the quantised LSP indexes.
\*---------------------------------------------------------------------------*/
-void encode_lsps(int indexes[], float lsp[], int order)
+void encode_lsps_scalar(int indexes[], float lsp[], int order)
{
int i,k,m;
float wt[1];
float lsp_hz[LPC_MAX];
const float * cb;
- float se = 0.0;
+ float se;
/* convert from radians to Hz so we can use human readable
frequencies */
@@ -644,7 +1233,7 @@ void encode_lsps(int indexes[], float lsp[], int order)
for(i=0; i<order; i++)
lsp_hz[i] = (4000.0/PI)*lsp[i];
- /* simple uniform scalar quantisers */
+ /* scalar quantisers */
wt[0] = 1.0;
for(i=0; i<order; i++) {
@@ -657,7 +1246,7 @@ void encode_lsps(int indexes[], float lsp[], int order)
/*---------------------------------------------------------------------------*\
- FUNCTION....: decode_lsps()
+ FUNCTION....: decode_lsps_scalar()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
@@ -666,7 +1255,7 @@ void encode_lsps(int indexes[], float lsp[], int order)
\*---------------------------------------------------------------------------*/
-void decode_lsps(float lsp[], int indexes[], int order)
+void decode_lsps_scalar(float lsp[], int indexes[], int order)
{
int i,k;
float lsp_hz[LPC_MAX];
@@ -684,6 +1273,273 @@ void decode_lsps(float lsp[], int indexes[], int order)
lsp[i] = (PI/4000.0)*lsp_hz[i];
}
+
+#ifdef __EXPERIMENTAL__
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: encode_lsps_diff_freq_vq()
+ AUTHOR......: David Rowe
+ DATE CREATED: 15 November 2011
+
+ Twenty-five bit LSP quantiser. LSPs 1-4 are quantised with scalar
+ LSP differences (in frequency, i.e difference from the previous
+ LSP). LSPs 5-10 are quantised with a VQ trained generated using
+ vqtrainjnd.c
+
+\*---------------------------------------------------------------------------*/
+
+void encode_lsps_diff_freq_vq(int indexes[], float lsp[], int order)
+{
+ int i,k,m;
+ float lsp_hz[LPC_MAX];
+ float lsp__hz[LPC_MAX];
+ float dlsp[LPC_MAX];
+ float dlsp_[LPC_MAX];
+ float wt[LPC_MAX];
+ const float * cb;
+ float se;
+
+ for(i=0; i<LPC_ORD; i++) {
+ wt[i] = 1.0;
+ }
+
+ /* convert from radians to Hz so we can use human readable
+ frequencies */
+
+ for(i=0; i<order; i++)
+ lsp_hz[i] = (4000.0/PI)*lsp[i];
+
+ /* scalar quantisers for LSP differences 1..4 */
+
+ wt[0] = 1.0;
+ for(i=0; i<4; i++) {
+ if (i)
+ dlsp[i] = lsp_hz[i] - lsp__hz[i-1];
+ else
+ dlsp[0] = lsp_hz[0];
+
+ k = lsp_cbd[i].k;
+ m = lsp_cbd[i].m;
+ cb = lsp_cbd[i].cb;
+ indexes[i] = quantise(cb, &dlsp[i], wt, k, m, &se);
+ dlsp_[i] = cb[indexes[i]*k];
+
+ if (i)
+ lsp__hz[i] = lsp__hz[i-1] + dlsp_[i];
+ else
+ lsp__hz[0] = dlsp_[0];
+ }
+
+ /* VQ LSPs 5,6,7,8,9,10 */
+
+ k = lsp_cbjnd[4].k;
+ m = lsp_cbjnd[4].m;
+ cb = lsp_cbjnd[4].cb;
+ indexes[4] = quantise(cb, &lsp_hz[4], &wt[4], k, m, &se);
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: decode_lsps_diff_freq_vq()
+ AUTHOR......: David Rowe
+ DATE CREATED: 15 Nov 2011
+
+ From a vector of quantised LSP indexes, returns the quantised
+ (floating point) LSPs.
+
+\*---------------------------------------------------------------------------*/
+
+void decode_lsps_diff_freq_vq(float lsp_[], int indexes[], int order)
+{
+ int i,k,m;
+ float dlsp_[LPC_MAX];
+ float lsp__hz[LPC_MAX];
+ const float * cb;
+
+ /* scalar LSP differences */
+
+ for(i=0; i<4; i++) {
+ cb = lsp_cbd[i].cb;
+ dlsp_[i] = cb[indexes[i]];
+ if (i)
+ lsp__hz[i] = lsp__hz[i-1] + dlsp_[i];
+ else
+ lsp__hz[0] = dlsp_[0];
+ }
+
+ /* VQ */
+
+ k = lsp_cbjnd[4].k;
+ m = lsp_cbjnd[4].m;
+ cb = lsp_cbjnd[4].cb;
+ for(i=4; i<order; i++)
+ lsp__hz[i] = cb[indexes[4]*k+i-4];
+
+ /* convert back to radians */
+
+ for(i=0; i<order; i++)
+ lsp_[i] = (PI/4000.0)*lsp__hz[i];
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: encode_lsps_diff_time()
+ AUTHOR......: David Rowe
+ DATE CREATED: 12 Sep 2012
+
+ Encode difference from preious frames's LSPs using
+ 3,3,2,2,2,2,1,1,1,1 scalar quantisers (18 bits total).
+
+\*---------------------------------------------------------------------------*/
+
+void encode_lsps_diff_time(int indexes[],
+ float lsps[],
+ float lsps__prev[],
+ int order)
+{
+ int i,k,m;
+ float lsps_dt[LPC_ORD];
+ float wt[LPC_MAX];
+ const float * cb;
+ float se;
+
+ /* Determine difference in time and convert from radians to Hz so
+ we can use human readable frequencies */
+
+ for(i=0; i<LPC_ORD; i++) {
+ lsps_dt[i] = (4000/PI)*(lsps[i] - lsps__prev[i]);
+ }
+
+ /* scalar quantisers */
+
+ wt[0] = 1.0;
+ for(i=0; i<order; i++) {
+ k = lsp_cbdt[i].k;
+ m = lsp_cbdt[i].m;
+ cb = lsp_cbdt[i].cb;
+ indexes[i] = quantise(cb, &lsps_dt[i], wt, k, m, &se);
+ }
+
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: decode_lsps_diff_time()
+ AUTHOR......: David Rowe
+ DATE CREATED: 15 Nov 2011
+
+ From a quantised LSP indexes, returns the quantised
+ (floating point) LSPs.
+
+\*---------------------------------------------------------------------------*/
+
+void decode_lsps_diff_time(
+ float lsps_[],
+ int indexes[],
+ float lsps__prev[],
+ int order)
+{
+ int i,k,m;
+ const float * cb;
+
+ for(i=0; i<order; i++)
+ lsps_[i] = lsps__prev[i];
+
+ for(i=0; i<order; i++) {
+ k = lsp_cbdt[i].k;
+ cb = lsp_cbdt[i].cb;
+ lsps_[i] += (PI/4000.0)*cb[indexes[i]*k];
+ }
+
+}
+#endif
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: encode_lsps_vq()
+ AUTHOR......: David Rowe
+ DATE CREATED: 15 Feb 2012
+
+ Multi-stage VQ LSP quantiser developed by Jean-Marc Valin.
+
+\*---------------------------------------------------------------------------*/
+
+void encode_lsps_vq(int *indexes, float *x, float *xq, int ndim)
+{
+ int i, n1, n2, n3;
+ float err[LPC_ORD], err2[LPC_ORD], err3[LPC_ORD];
+ float w[LPC_ORD], w2[LPC_ORD], w3[LPC_ORD];
+ const float *codebook1 = lsp_cbjvm[0].cb;
+ const float *codebook2 = lsp_cbjvm[1].cb;
+ const float *codebook3 = lsp_cbjvm[2].cb;
+
+ assert(ndim <= LPC_ORD);
+
+ w[0] = MIN(x[0], x[1]-x[0]);
+ for (i=1;i<ndim-1;i++)
+ w[i] = MIN(x[i]-x[i-1], x[i+1]-x[i]);
+ w[ndim-1] = MIN(x[ndim-1]-x[ndim-2], PI-x[ndim-1]);
+
+ compute_weights(x, w, ndim);
+
+ n1 = find_nearest(codebook1, lsp_cbjvm[0].m, x, ndim);
+
+ for (i=0;i<ndim;i++)
+ {
+ xq[i] = codebook1[ndim*n1+i];
+ err[i] = x[i] - xq[i];
+ }
+ for (i=0;i<ndim/2;i++)
+ {
+ err2[i] = err[2*i];
+ err3[i] = err[2*i+1];
+ w2[i] = w[2*i];
+ w3[i] = w[2*i+1];
+ }
+ n2 = find_nearest_weighted(codebook2, lsp_cbjvm[1].m, err2, w2, ndim/2);
+ n3 = find_nearest_weighted(codebook3, lsp_cbjvm[2].m, err3, w3, ndim/2);
+
+ indexes[0] = n1;
+ indexes[1] = n2;
+ indexes[2] = n3;
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: decode_lsps_vq()
+ AUTHOR......: David Rowe
+ DATE CREATED: 15 Feb 2012
+
+\*---------------------------------------------------------------------------*/
+
+void decode_lsps_vq(int *indexes, float *xq, int ndim)
+{
+ int i, n1, n2, n3;
+ const float *codebook1 = lsp_cbjvm[0].cb;
+ const float *codebook2 = lsp_cbjvm[1].cb;
+ const float *codebook3 = lsp_cbjvm[2].cb;
+
+ n1 = indexes[0];
+ n2 = indexes[1];
+ n3 = indexes[2];
+
+ for (i=0;i<ndim;i++)
+ {
+ xq[i] = codebook1[ndim*n1+i];
+ }
+ for (i=0;i<ndim/2;i++)
+ {
+ xq[2*i] += codebook2[ndim*n2/2+i];
+ xq[2*i+1] += codebook3[ndim*n3/2+i];
+ }
+}
+
+
/*---------------------------------------------------------------------------*\
FUNCTION....: bw_expand_lsps()
@@ -692,20 +1548,44 @@ void decode_lsps(float lsp[], int indexes[], int order)
Applies Bandwidth Expansion (BW) to a vector of LSPs. Prevents any
two LSPs getting too close together after quantisation. We know
- from experiment that LSP quantisation errors < 12.5Hz (25Hz setp
+ from experiment that LSP quantisation errors < 12.5Hz (25Hz step
size) are inaudible so we use that as the minimum LSP separation.
\*---------------------------------------------------------------------------*/
-void bw_expand_lsps(float lsp[],
+void bw_expand_lsps(float lsp[], int order, float min_sep_low, float min_sep_high)
+{
+ int i;
+
+ for(i=1; i<4; i++) {
+
+ if ((lsp[i] - lsp[i-1]) < min_sep_low*(PI/4000.0))
+ lsp[i] = lsp[i-1] + min_sep_low*(PI/4000.0);
+
+ }
+
+ /* As quantiser gaps increased, larger BW expansion was required
+ to prevent twinkly noises. This may need more experiment for
+ different quanstisers.
+ */
+
+ for(i=4; i<order; i++) {
+ if (lsp[i] - lsp[i-1] < min_sep_high*(PI/4000.0))
+ lsp[i] = lsp[i-1] + min_sep_high*(PI/4000.0);
+ }
+}
+
+void bw_expand_lsps2(float lsp[],
int order
)
{
int i;
- for(i=1; i<5; i++) {
- if (lsp[i] - lsp[i-1] < PI*(12.5/4000.0))
- lsp[i] = lsp[i-1] + PI*(12.5/4000.0);
+ for(i=1; i<4; i++) {
+
+ if ((lsp[i] - lsp[i-1]) < 100.0*(PI/4000.0))
+ lsp[i] = lsp[i-1] + 100.0*(PI/4000.0);
+
}
/* As quantiser gaps increased, larger BW expansion was required
@@ -713,16 +1593,84 @@ void bw_expand_lsps(float lsp[],
different quanstisers.
*/
- for(i=5; i<8; i++) {
- if (lsp[i] - lsp[i-1] < PI*(25.0/4000.0))
- lsp[i] = lsp[i-1] + PI*(25.0/4000.0);
+ for(i=4; i<order; i++) {
+ if (lsp[i] - lsp[i-1] < 200.0*(PI/4000.0))
+ lsp[i] = lsp[i-1] + 200.0*(PI/4000.0);
}
- for(i=8; i<order; i++) {
- if (lsp[i] - lsp[i-1] < PI*(75.0/4000.0))
- lsp[i] = lsp[i-1] + PI*(75.0/4000.0);
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: locate_lsps_jnd_steps()
+ AUTHOR......: David Rowe
+ DATE CREATED: 27/10/2011
+
+ Applies a form of Bandwidth Expansion (BW) to a vector of LSPs.
+ Listening tests have determined that "quantising" the position of
+ each LSP to the non-linear steps below introduces a "just noticable
+ difference" in the synthesised speech.
+
+ This operation can be used before quantisation to limit the input
+ data to the quantiser to a number of discrete steps.
+
+ This operation can also be used during quantisation as a form of
+ hysteresis in the calculation of quantiser error. For example if
+ the quantiser target of lsp1 is 500 Hz, candidate vectors with lsp1
+ of 515 and 495 Hz sound effectively the same.
+
+\*---------------------------------------------------------------------------*/
+
+void locate_lsps_jnd_steps(float lsps[], int order)
+{
+ int i;
+ float lsp_hz, step;
+
+ assert(order == 10);
+
+ /* quantise to 25Hz steps */
+
+ step = 25;
+ for(i=0; i<2; i++) {
+ lsp_hz = lsps[i]*4000.0/PI;
+ lsp_hz = floorf(lsp_hz/step + 0.5)*step;
+ lsps[i] = lsp_hz*PI/4000.0;
+ if (i) {
+ if (lsps[i] == lsps[i-1])
+ lsps[i] += step*PI/4000.0;
+
+ }
+ }
+
+ /* quantise to 50Hz steps */
+
+ step = 50;
+ for(i=2; i<4; i++) {
+ lsp_hz = lsps[i]*4000.0/PI;
+ lsp_hz = floorf(lsp_hz/step + 0.5)*step;
+ lsps[i] = lsp_hz*PI/4000.0;
+ if (i) {
+ if (lsps[i] == lsps[i-1])
+ lsps[i] += step*PI/4000.0;
+
+ }
+ }
+
+ /* quantise to 100Hz steps */
+
+ step = 100;
+ for(i=4; i<10; i++) {
+ lsp_hz = lsps[i]*4000.0/PI;
+ lsp_hz = floorf(lsp_hz/step + 0.5)*step;
+ lsps[i] = lsp_hz*PI/4000.0;
+ if (i) {
+ if (lsps[i] == lsps[i-1])
+ lsps[i] += step*PI/4000.0;
+
+ }
}
}
+
/*---------------------------------------------------------------------------*\
FUNCTION....: apply_lpc_correction()
@@ -758,9 +1706,9 @@ int encode_energy(float e)
float e_max = E_MAX_DB;
float norm;
- e = 10.0*log10(e);
+ e = 10.0*log10f(e);
norm = (e - e_min)/(e_max - e_min);
- index = floor(E_LEVELS * norm + 0.5);
+ index = floorf(E_LEVELS * norm + 0.5);
if (index < 0 ) index = 0;
if (index > (E_LEVELS-1)) index = E_LEVELS-1;
@@ -773,7 +1721,7 @@ int encode_energy(float e)
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
- Decodes energy using a WO_BITS quantiser.
+ Decodes energy using a E_LEVELS quantiser.
\*---------------------------------------------------------------------------*/
@@ -786,39 +1734,12 @@ float decode_energy(int index)
step = (e_max - e_min)/E_LEVELS;
e = e_min + step*(index);
- e = pow(10.0,e/10.0);
+ e = powf(10.0,e/10.0);
return e;
}
-/*---------------------------------------------------------------------------*\
-
- FUNCTION....: encode_amplitudes()
- AUTHOR......: David Rowe
- DATE CREATED: 22/8/2010
-
- Time domain LPC is used model the amplitudes which are then
- converted to LSPs and quantised. So we don't actually encode the
- amplitudes directly, rather we derive an equivalent representation
- from the time domain speech.
-
-\*---------------------------------------------------------------------------*/
-
-void encode_amplitudes(int lsp_indexes[],
- int *energy_index,
- MODEL *model,
- float Sn[],
- float w[])
-{
- float lsps[LPC_ORD];
- float ak[LPC_ORD+1];
- float e;
-
- e = speech_to_uq_lsps(lsps, ak, Sn, w, LPC_ORD);
- encode_lsps(lsp_indexes, lsps, LPC_ORD);
- *energy_index = encode_energy(e);
-}
-
+#ifdef NOT_USED
/*---------------------------------------------------------------------------*\
FUNCTION....: decode_amplitudes()
@@ -830,7 +1751,8 @@ void encode_amplitudes(int lsp_indexes[],
\*---------------------------------------------------------------------------*/
-float decode_amplitudes(MODEL *model,
+float decode_amplitudes(kiss_fft_cfg fft_fwd_cfg,
+ MODEL *model,
float ak[],
int lsp_indexes[],
int energy_index,
@@ -840,12 +1762,209 @@ float decode_amplitudes(MODEL *model,
{
float snr;
- decode_lsps(lsps, lsp_indexes, LPC_ORD);
+ decode_lsps_scalar(lsps, lsp_indexes, LPC_ORD);
bw_expand_lsps(lsps, LPC_ORD);
lsp_to_lpc(lsps, ak, LPC_ORD);
*e = decode_energy(energy_index);
- aks_to_M2(ak, LPC_ORD, model, *e, &snr, 1);
+ aks_to_M2(ak, LPC_ORD, model, *e, &snr, 1, 0, 0, 1);
apply_lpc_correction(model);
return snr;
}
+#endif
+
+static float ge_coeff[2] = {0.8, 0.9};
+
+void compute_weights2(const float *x, const float *xp, float *w, int ndim)
+{
+ w[0] = 30;
+ w[1] = 1;
+ if (x[1]<0)
+ {
+ w[0] *= .6;
+ w[1] *= .3;
+ }
+ if (x[1]<-10)
+ {
+ w[0] *= .3;
+ w[1] *= .3;
+ }
+ /* Higher weight if pitch is stable */
+ if (fabsf(x[0]-xp[0])<.2)
+ {
+ w[0] *= 2;
+ w[1] *= 1.5;
+ } else if (fabsf(x[0]-xp[0])>.5) /* Lower if not stable */
+ {
+ w[0] *= .5;
+ }
+
+ /* Lower weight for low energy */
+ if (x[1] < xp[1]-10)
+ {
+ w[1] *= .5;
+ }
+ if (x[1] < xp[1]-20)
+ {
+ w[1] *= .5;
+ }
+
+ //w[0] = 30;
+ //w[1] = 1;
+
+ /* Square the weights because it's applied on the squared error */
+ w[0] *= w[0];
+ w[1] *= w[1];
+
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: quantise_WoE()
+ AUTHOR......: Jean-Marc Valin & David Rowe
+ DATE CREATED: 29 Feb 2012
+
+ Experimental joint Wo and LPC energy vector quantiser developed by
+ Jean-Marc Valin. Exploits correlations between the difference in
+ the log pitch and log energy from frame to frame. For example
+ both the pitch and energy tend to only change by small amounts
+ during voiced speech, however it is important that these changes be
+ coded carefully. During unvoiced speech they both change a lot but
+ the ear is less sensitve to errors so coarser quantisation is OK.
+
+ The ear is sensitive to log energy and loq pitch so we quantise in
+ these domains. That way the error measure used to quantise the
+ values is close to way the ear senses errors.
+
+ See http://jmspeex.livejournal.com/10446.html
+
+\*---------------------------------------------------------------------------*/
+
+void quantise_WoE(MODEL *model, float *e, float xq[])
+{
+ int i, n1;
+ float x[2];
+ float err[2];
+ float w[2];
+ const float *codebook1 = ge_cb[0].cb;
+ int nb_entries = ge_cb[0].m;
+ int ndim = ge_cb[0].k;
+ float Wo_min = TWO_PI/P_MAX;
+ float Wo_max = TWO_PI/P_MIN;
+
+ x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2);
+ x[1] = 10.0*log10f(1e-4 + *e);
+
+ compute_weights2(x, xq, w, ndim);
+ for (i=0;i<ndim;i++)
+ err[i] = x[i]-ge_coeff[i]*xq[i];
+ n1 = find_nearest_weighted(codebook1, nb_entries, err, w, ndim);
+
+ for (i=0;i<ndim;i++)
+ {
+ xq[i] = ge_coeff[i]*xq[i] + codebook1[ndim*n1+i];
+ err[i] -= codebook1[ndim*n1+i];
+ }
+
+ /*
+ x = log2(4000*Wo/(PI*50));
+ 2^x = 4000*Wo/(PI*50)
+ Wo = (2^x)*(PI*50)/4000;
+ */
+
+ model->Wo = powf(2.0, xq[0])*(PI*50.0)/4000.0;
+
+ /* bit errors can make us go out of range leading to all sorts of
+ probs like seg faults */
+
+ if (model->Wo > Wo_max) model->Wo = Wo_max;
+ if (model->Wo < Wo_min) model->Wo = Wo_min;
+
+ model->L = PI/model->Wo; /* if we quantise Wo re-compute L */
+
+ *e = powf(10.0, xq[1]/10.0);
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: encode_WoE()
+ AUTHOR......: Jean-Marc Valin & David Rowe
+ DATE CREATED: 11 May 2012
+
+ Joint Wo and LPC energy vector quantiser developed my Jean-Marc
+ Valin. Returns index, and updated states xq[].
+
+\*---------------------------------------------------------------------------*/
+
+int encode_WoE(MODEL *model, float e, float xq[])
+{
+ int i, n1;
+ float x[2];
+ float err[2];
+ float w[2];
+ const float *codebook1 = ge_cb[0].cb;
+ int nb_entries = ge_cb[0].m;
+ int ndim = ge_cb[0].k;
+
+ assert((1<<WO_E_BITS) == nb_entries);
+
+ if (e < 0.0) e = 0; /* occasional small negative energies due LPC round off I guess */
+
+ x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2);
+ x[1] = 10.0*log10f(1e-4 + e);
+
+ compute_weights2(x, xq, w, ndim);
+ for (i=0;i<ndim;i++)
+ err[i] = x[i]-ge_coeff[i]*xq[i];
+ n1 = find_nearest_weighted(codebook1, nb_entries, err, w, ndim);
+
+ for (i=0;i<ndim;i++)
+ {
+ xq[i] = ge_coeff[i]*xq[i] + codebook1[ndim*n1+i];
+ err[i] -= codebook1[ndim*n1+i];
+ }
+
+ //printf("enc: %f %f (%f)(%f) \n", xq[0], xq[1], e, 10.0*log10(1e-4 + e));
+ return n1;
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: decode_WoE()
+ AUTHOR......: Jean-Marc Valin & David Rowe
+ DATE CREATED: 11 May 2012
+
+ Joint Wo and LPC energy vector quantiser developed my Jean-Marc
+ Valin. Given index and states xq[], returns Wo & E, and updates
+ states xq[].
+
+\*---------------------------------------------------------------------------*/
+
+void decode_WoE(MODEL *model, float *e, float xq[], int n1)
+{
+ int i;
+ const float *codebook1 = ge_cb[0].cb;
+ int ndim = ge_cb[0].k;
+ float Wo_min = TWO_PI/P_MAX;
+ float Wo_max = TWO_PI/P_MIN;
+
+ for (i=0;i<ndim;i++)
+ {
+ xq[i] = ge_coeff[i]*xq[i] + codebook1[ndim*n1+i];
+ }
+
+ //printf("dec: %f %f\n", xq[0], xq[1]);
+ model->Wo = powf(2.0, xq[0])*(PI*50.0)/4000.0;
+
+ /* bit errors can make us go out of range leading to all sorts of
+ probs like seg faults */
+
+ if (model->Wo > Wo_max) model->Wo = Wo_max;
+ if (model->Wo < Wo_min) model->Wo = Wo_min;
+
+ model->L = PI/model->Wo; /* if we quantise Wo re-compute L */
+
+ *e = powf(10.0, xq[1]/10.0);
+}
+