diff options
Diffstat (limited to 'gr-vocoder/lib/codec2/quantise.c')
-rw-r--r-- | gr-vocoder/lib/codec2/quantise.c | 1739 |
1 files changed, 1429 insertions, 310 deletions
diff --git a/gr-vocoder/lib/codec2/quantise.c b/gr-vocoder/lib/codec2/quantise.c index 25f26066ed..6423dc83df 100644 --- a/gr-vocoder/lib/codec2/quantise.c +++ b/gr-vocoder/lib/codec2/quantise.c @@ -36,7 +36,9 @@ #include "quantise.h" #include "lpc.h" #include "lsp.h" -#include "fft.h" +#include "kiss_fft.h" +#undef TIMER +#include "machdep.h" #define LSP_DELTA1 0.01 /* grid spacing for LSP root searches */ @@ -59,38 +61,20 @@ int lsp_bits(int i) { return lsp_cb[i].log2m; } -#if VECTOR_QUANTISATION -/*---------------------------------------------------------------------------*\ - - quantise_uniform - - Simulates uniform quantising of a float. - -\*---------------------------------------------------------------------------*/ - -void quantise_uniform(float *val, float min, float max, int bits) -{ - int levels = 1 << (bits-1); - float norm; - int index; - - /* hard limit to quantiser range */ - - printf("min: %f max: %f val: %f ", min, max, val[0]); - if (val[0] < min) val[0] = min; - if (val[0] > max) val[0] = max; - - norm = (*val - min)/(max-min); - printf("%f norm: %f ", val[0], norm); - index = fabs(levels*norm + 0.5); - - *val = min + index*(max-min)/levels; - - printf("index %d val_: %f\n", index, val[0]); +int lspd_bits(int i) { + return lsp_cbd[i].log2m; } +#ifdef __EXPERIMENTAL__ +int lspdt_bits(int i) { + return lsp_cbdt[i].log2m; +} #endif +int lsp_pred_vq_bits(int i) { + return lsp_cbjvm[i].log2m; +} + /*---------------------------------------------------------------------------*\ quantise_init @@ -127,13 +111,16 @@ long quantise(const float * cb, float vec[], float w[], int k, int m, float *se) float beste; /* best error so far */ long j; int i; + float diff; besti = 0; beste = 1E32; for(j=0; j<m; j++) { e = 0.0; - for(i=0; i<k; i++) - e += pow((cb[j*k+i]-vec[i])*w[i],2.0); + for(i=0; i<k; i++) { + diff = cb[j*k+i]-vec[i]; + e += powf(diff*w[i],2.0); + } if (e < beste) { beste = e; besti = j; @@ -147,16 +134,16 @@ long quantise(const float * cb, float vec[], float w[], int k, int m, float *se) /*---------------------------------------------------------------------------*\ - lspd_quantise + encode_lspds_scalar() - Scalar lsp difference quantiser. + Scalar/VQ LSP difference quantiser. \*---------------------------------------------------------------------------*/ -void lspd_quantise( - float lsp[], - float lsp_[], - int order +void encode_lspds_scalar( + int indexes[], + float lsp[], + int order ) { int i,k,m; @@ -164,10 +151,15 @@ void lspd_quantise( float lsp__hz[LPC_MAX]; float dlsp[LPC_MAX]; float dlsp_[LPC_MAX]; - float wt[1]; + float wt[LPC_MAX]; const float *cb; - float se = 0.0; - int indexes[LPC_MAX]; + float se; + + assert(order == LPC_ORD); + + for(i=0; i<order; i++) { + wt[i] = 1.0; + } /* convert from radians to Hz so we can use human readable frequencies */ @@ -175,14 +167,13 @@ void lspd_quantise( for(i=0; i<order; i++) lsp_hz[i] = (4000.0/PI)*lsp[i]; - dlsp[0] = lsp_hz[0]; - for(i=1; i<order; i++) - dlsp[i] = lsp_hz[i] - lsp_hz[i-1]; - - /* simple uniform scalar quantisers */ + //printf("\n"); wt[0] = 1.0; for(i=0; i<order; i++) { + + /* find difference from previous qunatised lsp */ + if (i) dlsp[i] = lsp_hz[i] - lsp__hz[i-1]; else @@ -194,129 +185,565 @@ void lspd_quantise( indexes[i] = quantise(cb, &dlsp[i], wt, k, m, &se); dlsp_[i] = cb[indexes[i]*k]; + if (i) lsp__hz[i] = lsp__hz[i-1] + dlsp_[i]; else lsp__hz[0] = dlsp_[0]; + + //printf("%d lsp %3.2f dlsp %3.2f dlsp_ %3.2f lsp_ %3.2f\n", i, lsp_hz[i], dlsp[i], dlsp_[i], lsp__hz[i]); } - for(; i<order; i++) - lsp__hz[i] = lsp__hz[i-1] + dlsp[i]; - /* convert back to radians */ +} + +void decode_lspds_scalar( + float lsp_[], + int indexes[], + int order +) +{ + int i,k; + float lsp__hz[LPC_MAX]; + float dlsp_[LPC_MAX]; + const float *cb; + + assert(order == LPC_ORD); + + for(i=0; i<order; i++) { + + k = lsp_cbd[i].k; + cb = lsp_cbd[i].cb; + dlsp_[i] = cb[indexes[i]*k]; + + if (i) + lsp__hz[i] = lsp__hz[i-1] + dlsp_[i]; + else + lsp__hz[0] = dlsp_[0]; - for(i=0; i<order; i++) lsp_[i] = (PI/4000.0)*lsp__hz[i]; + + //printf("%d dlsp_ %3.2f lsp_ %3.2f\n", i, dlsp_[i], lsp__hz[i]); + } + } +#ifdef __EXPERIMENTAL__ /*---------------------------------------------------------------------------*\ - lspd_vq_quantise + lspvq_quantise - Vector lsp difference quantiser. + Vector LSP quantiser. \*---------------------------------------------------------------------------*/ -void lspdvq_quantise( +void lspvq_quantise( float lsp[], float lsp_[], int order ) { int i,k,m,ncb, nlsp; - float dlsp[LPC_MAX]; - float dlsp_[LPC_MAX]; - float wt[LPC_ORD]; + float wt[LPC_ORD], lsp_hz[LPC_ORD]; const float *cb; - float se = 0.0; + float se; int index; - dlsp[0] = lsp[0]; - for(i=1; i<order; i++) - dlsp[i] = lsp[i] - lsp[i-1]; + for(i=0; i<LPC_ORD; i++) { + wt[i] = 1.0; + lsp_hz[i] = 4000.0*lsp[i]/PI; + } - for(i=0; i<order; i++) - dlsp_[i] = dlsp[i]; + /* scalar quantise LSPs 1,2,3,4 */ - for(i=0; i<order; i++) + /* simple uniform scalar quantisers */ + + for(i=0; i<4; i++) { + k = lsp_cb[i].k; + m = lsp_cb[i].m; + cb = lsp_cb[i].cb; + index = quantise(cb, &lsp_hz[i], wt, k, m, &se); + lsp_[i] = cb[index*k]*PI/4000.0; + } + + //#define WGHT +#ifdef WGHT + for(i=4; i<9; i++) { + wt[i] = 1.0/(lsp[i]-lsp[i-1]) + 1.0/(lsp[i+1]-lsp[i]); + //printf("wt[%d] = %f\n", i, wt[i]); + } + wt[9] = 1.0/(lsp[i]-lsp[i-1]); +#endif + + /* VQ LSPs 5,6,7,8,9,10 */ + + ncb = 4; + nlsp = 4; + k = lsp_cbjnd[ncb].k; + m = lsp_cbjnd[ncb].m; + cb = lsp_cbjnd[ncb].cb; + index = quantise(cb, &lsp_hz[nlsp], &wt[nlsp], k, m, &se); + for(i=4; i<LPC_ORD; i++) { + lsp_[i] = cb[index*k+i-4]*(PI/4000.0); + //printf("%4.f (%4.f) ", lsp_hz[i], cb[index*k+i-4]); + } +} + +/*---------------------------------------------------------------------------*\ + + lspjnd_quantise + + Experimental JND LSP quantiser. + +\*---------------------------------------------------------------------------*/ + +void lspjnd_quantise(float lsps[], float lsps_[], int order) +{ + int i,k,m; + float wt[LPC_ORD], lsps_hz[LPC_ORD]; + const float *cb; + float se = 0.0; + int index; + + for(i=0; i<LPC_ORD; i++) { wt[i] = 1.0; + } - /* scalar quantise dLSPs 1,2,3,4,5 */ + /* convert to Hz */ - for(i=0; i<5; i++) { - if (i) - dlsp[i] = (lsp[i] - lsp_[i-1])*4000.0/PI; - else - dlsp[0] = lsp[0]*4000.0/PI; + for(i=0; i<LPC_ORD; i++) { + lsps_hz[i] = lsps[i]*(4000.0/PI); + lsps_[i] = lsps[i]; + } - k = lsp_cbdvq[i].k; - m = lsp_cbdvq[i].m; - cb = lsp_cbdvq[i].cb; - index = quantise(cb, &dlsp[i], wt, k, m, &se); - dlsp_[i] = cb[index*k]*PI/4000.0; + /* simple uniform scalar quantisers */ - if (i) - lsp_[i] = lsp_[i-1] + dlsp_[i]; - else - lsp_[0] = dlsp_[0]; - } - dlsp[i] = lsp[i] - lsp_[i-1]; - dlsp_[i] = dlsp[i]; - - //printf("lsp[0] %f lsp_[0] %f\n", lsp[0], lsp_[0]); - //printf("lsp[1] %f lsp_[1] %f\n", lsp[1], lsp_[1]); - -#ifdef TT - /* VQ dLSPs 3,4,5 */ - - ncb = 2; - nlsp = 2; - k = lsp_cbdvq[ncb].k; - m = lsp_cbdvq[ncb].m; - cb = lsp_cbdvq[ncb].cb; - index = quantise(cb, &dlsp[nlsp], wt, k, m, &se); - dlsp_[nlsp] = cb[index*k]; - dlsp_[nlsp+1] = cb[index*k+1]; - dlsp_[nlsp+2] = cb[index*k+2]; - - lsp_[0] = dlsp_[0]; - for(i=1; i<5; i++) - lsp_[i] = lsp_[i-1] + dlsp_[i]; - dlsp[i] = lsp[i] - lsp_[i-1]; - dlsp_[i] = dlsp[i]; + for(i=0; i<4; i++) { + k = lsp_cbjnd[i].k; + m = lsp_cbjnd[i].m; + cb = lsp_cbjnd[i].cb; + index = quantise(cb, &lsps_hz[i], wt, k, m, &se); + lsps_[i] = cb[index*k]*(PI/4000.0); + } + + /* VQ LSPs 5,6,7,8,9,10 */ + + k = lsp_cbjnd[4].k; + m = lsp_cbjnd[4].m; + cb = lsp_cbjnd[4].cb; + index = quantise(cb, &lsps_hz[4], &wt[4], k, m, &se); + //printf("k = %d m = %d c[0] %f cb[k] %f\n", k,m,cb[0],cb[k]); + //printf("index = %4d: ", index); + for(i=4; i<LPC_ORD; i++) { + lsps_[i] = cb[index*k+i-4]*(PI/4000.0); + //printf("%4.f (%4.f) ", lsps_hz[i], cb[index*k+i-4]); + } + //printf("\n"); +} + +void compute_weights(const float *x, float *w, int ndim); + +/*---------------------------------------------------------------------------*\ + + lspdt_quantise + + LSP difference in time quantiser. Split VQ, encoding LSPs 1-4 with + one VQ, and LSPs 5-10 with a second. Update of previous lsp memory + is done outside of this function to handle dT between 10 or 20ms + frames. + + mode action + ------------------ + + LSPDT_ALL VQ LSPs 1-4 and 5-10 + LSPDT_LOW Just VQ LSPs 1-4, for LSPs 5-10 just copy previous + LSPDT_HIGH Just VQ LSPs 5-10, for LSPs 1-4 just copy previous + +\*---------------------------------------------------------------------------*/ + +void lspdt_quantise(float lsps[], float lsps_[], float lsps__prev[], int mode) +{ + int i; + float wt[LPC_ORD]; + float lsps_dt[LPC_ORD]; +#ifdef TRY_LSPDT_VQ + int k,m; + int index; + const float *cb; + float se = 0.0; +#endif // TRY_LSPDT_VQ + + //compute_weights(lsps, wt, LPC_ORD); + for(i=0; i<LPC_ORD; i++) { + wt[i] = 1.0; + } + + //compute_weights(lsps, wt, LPC_ORD ); + + for(i=0; i<LPC_ORD; i++) { + lsps_dt[i] = lsps[i] - lsps__prev[i]; + lsps_[i] = lsps__prev[i]; + } + + //#define TRY_LSPDT_VQ +#ifdef TRY_LSPDT_VQ + /* this actually improves speech a bit, but 40ms updates works surprsingly well.... */ + k = lsp_cbdt[0].k; + m = lsp_cbdt[0].m; + cb = lsp_cbdt[0].cb; + index = quantise(cb, lsps_dt, wt, k, m, &se); + for(i=0; i<LPC_ORD; i++) { + lsps_[i] += cb[index*k + i]; + } #endif - /* VQ dLSPs 6,7,8,9,10 */ - ncb = 5; - nlsp = 5; - k = lsp_cbdvq[ncb].k; - m = lsp_cbdvq[ncb].m; - cb = lsp_cbdvq[ncb].cb; - index = quantise(cb, &dlsp[nlsp], wt, k, m, &se); - dlsp_[nlsp] = cb[index*k]; - dlsp_[nlsp+1] = cb[index*k+1]; - dlsp_[nlsp+2] = cb[index*k+2]; - dlsp_[nlsp+3] = cb[index*k+3]; - dlsp_[nlsp+4] = cb[index*k+4]; +} +#endif + +#define MIN(a,b) ((a)<(b)?(a):(b)) +#define MAX_ENTRIES 16384 + +void compute_weights(const float *x, float *w, int ndim) +{ + int i; + w[0] = MIN(x[0], x[1]-x[0]); + for (i=1;i<ndim-1;i++) + w[i] = MIN(x[i]-x[i-1], x[i+1]-x[i]); + w[ndim-1] = MIN(x[ndim-1]-x[ndim-2], PI-x[ndim-1]); + + for (i=0;i<ndim;i++) + w[i] = 1./(.01+w[i]); + //w[0]*=3; + //w[1]*=2; +} + +/* LSP weight calculation ported from m-file function kindly submitted + by Anssi, OH3GDD */ + +void compute_weights_anssi_mode2(const float *x, float *w, int ndim) +{ + int i; + float d[LPC_ORD]; + + assert(ndim == LPC_ORD); + + for(i=0; i<LPC_ORD; i++) + d[i] = 1.0; + + d[0] = x[1]; + for (i=1; i<LPC_ORD-1; i++) + d[i] = x[i+1] - x[i-1]; + d[LPC_ORD-1] = PI - x[8]; + for (i=0; i<LPC_ORD; i++) { + if (x[i]<((400.0/4000.0)*PI)) + w[i]=5.0/(0.01+d[i]); + else if (x[i]<((700.0/4000.0)*PI)) + w[i]=4.0/(0.01+d[i]); + else if (x[i]<((1200.0/4000.0)*PI)) + w[i]=3.0/(0.01+d[i]); + else if (x[i]<((2000.0/4000.0)*PI)) + w[i]=2.0/(0.01+d[i]); + else + w[i]=1.0/(0.01+d[i]); + + w[i]=pow(w[i]+0.3, 0.66); + } +} + +int find_nearest(const float *codebook, int nb_entries, float *x, int ndim) +{ + int i, j; + float min_dist = 1e15; + int nearest = 0; + + for (i=0;i<nb_entries;i++) + { + float dist=0; + for (j=0;j<ndim;j++) + dist += (x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]); + if (dist<min_dist) + { + min_dist = dist; + nearest = i; + } + } + return nearest; +} + +int find_nearest_weighted(const float *codebook, int nb_entries, float *x, const float *w, int ndim) +{ + int i, j; + float min_dist = 1e15; + int nearest = 0; + + for (i=0;i<nb_entries;i++) + { + float dist=0; + for (j=0;j<ndim;j++) + dist += w[j]*(x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]); + if (dist<min_dist) + { + min_dist = dist; + nearest = i; + } + } + return nearest; +} + +void lspjvm_quantise(float *x, float *xq, int ndim) +{ + int i, n1, n2, n3; + float err[LPC_ORD], err2[LPC_ORD], err3[LPC_ORD]; + float w[LPC_ORD], w2[LPC_ORD], w3[LPC_ORD]; + const float *codebook1 = lsp_cbjvm[0].cb; + const float *codebook2 = lsp_cbjvm[1].cb; + const float *codebook3 = lsp_cbjvm[2].cb; + + w[0] = MIN(x[0], x[1]-x[0]); + for (i=1;i<ndim-1;i++) + w[i] = MIN(x[i]-x[i-1], x[i+1]-x[i]); + w[ndim-1] = MIN(x[ndim-1]-x[ndim-2], PI-x[ndim-1]); + + compute_weights(x, w, ndim); + + n1 = find_nearest(codebook1, lsp_cbjvm[0].m, x, ndim); + + for (i=0;i<ndim;i++) + { + xq[i] = codebook1[ndim*n1+i]; + err[i] = x[i] - xq[i]; + } + for (i=0;i<ndim/2;i++) + { + err2[i] = err[2*i]; + err3[i] = err[2*i+1]; + w2[i] = w[2*i]; + w3[i] = w[2*i+1]; + } + n2 = find_nearest_weighted(codebook2, lsp_cbjvm[1].m, err2, w2, ndim/2); + n3 = find_nearest_weighted(codebook3, lsp_cbjvm[2].m, err3, w3, ndim/2); + + for (i=0;i<ndim/2;i++) + { + xq[2*i] += codebook2[ndim*n2/2+i]; + xq[2*i+1] += codebook3[ndim*n3/2+i]; + } +} + +#ifdef __EXPERIMENTAL__ + +#define MBEST_STAGES 4 + +struct MBEST_LIST { + int index[MBEST_STAGES]; /* index of each stage that lead us to this error */ + float error; +}; + +struct MBEST { + int entries; /* number of entries in mbest list */ + struct MBEST_LIST *list; +}; + + +static struct MBEST *mbest_create(int entries) { + int i,j; + struct MBEST *mbest; + + assert(entries > 0); + mbest = (struct MBEST *)malloc(sizeof(struct MBEST)); + assert(mbest != NULL); + + mbest->entries = entries; + mbest->list = (struct MBEST_LIST *)malloc(entries*sizeof(struct MBEST_LIST)); + assert(mbest->list != NULL); + + for(i=0; i<mbest->entries; i++) { + for(j=0; j<MBEST_STAGES; j++) + mbest->list[i].index[j] = 0; + mbest->list[i].error = 1E32; + } + + return mbest; +} + + +static void mbest_destroy(struct MBEST *mbest) { + assert(mbest != NULL); + free(mbest->list); + free(mbest); +} + + +/*---------------------------------------------------------------------------*\ + + mbest_insert + + Insert the results of a vector to codebook entry comparison. The + list is ordered in order or error, so those entries with the + smallest error will be first on the list. + +\*---------------------------------------------------------------------------*/ + +static void mbest_insert(struct MBEST *mbest, int index[], float error) { + int i, j, found; + struct MBEST_LIST *list = mbest->list; + int entries = mbest->entries; + + found = 0; + for(i=0; i<entries && !found; i++) + if (error < list[i].error) { + found = 1; + for(j=entries-1; j>i; j--) + list[j] = list[j-1]; + for(j=0; j<MBEST_STAGES; j++) + list[i].index[j] = index[j]; + list[i].error = error; + } +} + - /* rebuild LSPs for dLSPs */ +static void mbest_print(char title[], struct MBEST *mbest) { + int i,j; - lsp_[0] = dlsp_[0]; - for(i=1; i<order; i++) - lsp_[i] = lsp_[i-1] + dlsp_[i]; + printf("%s\n", title); + for(i=0; i<mbest->entries; i++) { + for(j=0; j<MBEST_STAGES; j++) + printf(" %4d ", mbest->list[i].index[j]); + printf(" %f\n", mbest->list[i].error); + } } -void check_lsp_order(float lsp[], int lpc_order) + +/*---------------------------------------------------------------------------*\ + + mbest_search + + Searches vec[] to a codebbook of vectors, and maintains a list of the mbest + closest matches. + +\*---------------------------------------------------------------------------*/ + +static void mbest_search( + const float *cb, /* VQ codebook to search */ + float vec[], /* target vector */ + float w[], /* weighting vector */ + int k, /* dimension of vector */ + int m, /* number on entries in codebook */ + struct MBEST *mbest, /* list of closest matches */ + int index[] /* indexes that lead us here */ +) +{ + float e; + int i,j; + float diff; + + for(j=0; j<m; j++) { + e = 0.0; + for(i=0; i<k; i++) { + diff = cb[j*k+i]-vec[i]; + e += pow(diff*w[i],2.0); + } + index[0] = j; + mbest_insert(mbest, index, e); + } +} + + +/* 3 stage VQ LSP quantiser. Design and guidance kindly submitted by Anssi, OH3GDD */ + +void lspanssi_quantise(float *x, float *xq, int ndim, int mbest_entries) +{ + int i, j, n1, n2, n3, n4; + float w[LPC_ORD]; + const float *codebook1 = lsp_cbvqanssi[0].cb; + const float *codebook2 = lsp_cbvqanssi[1].cb; + const float *codebook3 = lsp_cbvqanssi[2].cb; + const float *codebook4 = lsp_cbvqanssi[3].cb; + struct MBEST *mbest_stage1, *mbest_stage2, *mbest_stage3, *mbest_stage4; + float target[LPC_ORD]; + int index[MBEST_STAGES]; + + mbest_stage1 = mbest_create(mbest_entries); + mbest_stage2 = mbest_create(mbest_entries); + mbest_stage3 = mbest_create(mbest_entries); + mbest_stage4 = mbest_create(mbest_entries); + for(i=0; i<MBEST_STAGES; i++) + index[i] = 0; + + compute_weights_anssi_mode2(x, w, ndim); + + #ifdef DUMP + dump_weights(w, ndim); + #endif + + /* Stage 1 */ + + mbest_search(codebook1, x, w, ndim, lsp_cbvqanssi[0].m, mbest_stage1, index); + mbest_print("Stage 1:", mbest_stage1); + + /* Stage 2 */ + + for (j=0; j<mbest_entries; j++) { + index[1] = n1 = mbest_stage1->list[j].index[0]; + for(i=0; i<ndim; i++) + target[i] = x[i] - codebook1[ndim*n1+i]; + mbest_search(codebook2, target, w, ndim, lsp_cbvqanssi[1].m, mbest_stage2, index); + } + mbest_print("Stage 2:", mbest_stage2); + + /* Stage 3 */ + + for (j=0; j<mbest_entries; j++) { + index[2] = n1 = mbest_stage2->list[j].index[1]; + index[1] = n2 = mbest_stage2->list[j].index[0]; + for(i=0; i<ndim; i++) + target[i] = x[i] - codebook1[ndim*n1+i] - codebook2[ndim*n2+i]; + mbest_search(codebook3, target, w, ndim, lsp_cbvqanssi[2].m, mbest_stage3, index); + } + mbest_print("Stage 3:", mbest_stage3); + + /* Stage 4 */ + + for (j=0; j<mbest_entries; j++) { + index[3] = n1 = mbest_stage3->list[j].index[2]; + index[2] = n2 = mbest_stage3->list[j].index[1]; + index[1] = n3 = mbest_stage3->list[j].index[0]; + for(i=0; i<ndim; i++) + target[i] = x[i] - codebook1[ndim*n1+i] - codebook2[ndim*n2+i] - codebook3[ndim*n3+i]; + mbest_search(codebook4, target, w, ndim, lsp_cbvqanssi[3].m, mbest_stage4, index); + } + mbest_print("Stage 4:", mbest_stage4); + + n1 = mbest_stage4->list[0].index[3]; + n2 = mbest_stage4->list[0].index[2]; + n3 = mbest_stage4->list[0].index[1]; + n4 = mbest_stage4->list[0].index[0]; + for (i=0;i<ndim;i++) + xq[i] = codebook1[ndim*n1+i] + codebook2[ndim*n2+i] + codebook3[ndim*n3+i] + codebook4[ndim*n4+i]; + + mbest_destroy(mbest_stage1); + mbest_destroy(mbest_stage2); + mbest_destroy(mbest_stage3); + mbest_destroy(mbest_stage4); +} +#endif + +int check_lsp_order(float lsp[], int lpc_order) { int i; float tmp; + int swaps = 0; for(i=1; i<lpc_order; i++) if (lsp[i] < lsp[i-1]) { - printf("swap %d\n",i); + //fprintf(stderr, "swap %d\n",i); + swaps++; tmp = lsp[i-1]; - lsp[i-1] = lsp[i]-0.05; - lsp[i] = tmp+0.05; + lsp[i-1] = lsp[i]-0.1; + lsp[i] = tmp+0.1; + i = 1; /* start check again, as swap may have caused out of order */ } + + return swaps; } void force_min_lsp_dist(float lsp[], int lpc_order) @@ -329,131 +756,159 @@ void force_min_lsp_dist(float lsp[], int lpc_order) } } + /*---------------------------------------------------------------------------*\ - lpc_model_amplitudes + lpc_post_filter() + + Applies a post filter to the LPC synthesis filter power spectrum + Pw, which supresses the inter-formant energy. + + The algorithm is from p267 (Section 8.6) of "Digital Speech", + edited by A.M. Kondoz, 1994 published by Wiley and Sons. Chapter 8 + of this text is on the MBE vocoder, and this is a freq domain + adaptation of post filtering commonly used in CELP. - Derive a LPC model for amplitude samples then estimate amplitude samples - from this model with optional LSP quantisation. + I used the Octave simulation lpcpf.m to get an understaing of the + algorithm. - Returns the spectral distortion for this frame. + Requires two more FFTs which is significantly more MIPs. However + it should be possible to implement this more efficiently in the + time domain. Just not sure how to handle relative time delays + between the synthesis stage and updating these coeffs. A smaller + FFT size might also be accetable to save CPU. + + TODO: + [ ] sync var names between Octave and C version + [ ] doc gain normalisation + [ ] I think the first FFT is not rqd as we do the same + thing in aks_to_M2(). \*---------------------------------------------------------------------------*/ -float lpc_model_amplitudes( - float Sn[], /* Input frame of speech samples */ - float w[], - MODEL *model, /* sinusoidal model parameters */ - int order, /* LPC model order */ - int lsp_quant, /* optional LSP quantisation if non-zero */ - float ak[] /* output aks */ -) +void lpc_post_filter(kiss_fft_cfg fft_fwd_cfg, MODEL *model, COMP Pw[], float ak[], + int order, int dump, float beta, float gamma, int bass_boost) { - float Wn[M]; - float R[LPC_MAX+1]; - float E; - int i,j; - float snr; - float lsp[LPC_MAX]; - float lsp_hz[LPC_MAX]; - float lsp_[LPC_MAX]; - int roots; /* number of LSP roots found */ - int index; - float se = 0.0; - int k,m; - const float * cb; - float wt[LPC_MAX]; - - for(i=0; i<M; i++) - Wn[i] = Sn[i]*w[i]; - autocorrelate(Wn,R,M,order); - levinson_durbin(R,ak,order); - - E = 0.0; - for(i=0; i<=order; i++) - E += ak[i]*R[i]; + int i; + COMP x[FFT_ENC]; /* input to FFTs */ + COMP Aw[FFT_ENC]; /* LPC analysis filter spectrum */ + COMP Ww[FFT_ENC]; /* weighting spectrum */ + float Rw[FFT_ENC]; /* R = WA */ + float e_before, e_after, gain; + float Pfw[FFT_ENC]; /* Post filter mag spectrum */ + float max_Rw, min_Rw; + float coeff; + TIMER_VAR(tstart, tfft1, taw, tfft2, tww, tr); + + TIMER_SAMPLE(tstart); + + /* Determine LPC inverse filter spectrum 1/A(exp(jw)) -----------*/ + + /* we actually want the synthesis filter A(exp(jw)) but the + inverse (analysis) filter is easier to find as it's FIR, we + just use the inverse of 1/A to get the synthesis filter + A(exp(jw)) */ + + for(i=0; i<FFT_ENC; i++) { + x[i].real = 0.0; + x[i].imag = 0.0; + } - for(i=0; i<order; i++) - wt[i] = 1.0; + for(i=0; i<=order; i++) + x[i].real = ak[i]; + kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)x, (kiss_fft_cpx *)Aw); - if (lsp_quant) { - roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1); - if (roots != order) - printf("LSP roots not found\n"); + TIMER_SAMPLE_AND_LOG(tfft1, tstart, " fft1"); - /* convert from radians to Hz to make quantisers more - human readable */ + for(i=0; i<FFT_ENC/2; i++) { + Aw[i].real = 1.0/(Aw[i].real*Aw[i].real + Aw[i].imag*Aw[i].imag); + } - for(i=0; i<order; i++) - lsp_hz[i] = (4000.0/PI)*lsp[i]; + TIMER_SAMPLE_AND_LOG(taw, tfft1, " Aw"); - /* simple uniform scalar quantisers */ + /* Determine weighting filter spectrum W(exp(jw)) ---------------*/ - for(i=0; i<10; i++) { - k = lsp_cb[i].k; - m = lsp_cb[i].m; - cb = lsp_cb[i].cb; - index = quantise(cb, &lsp_hz[i], wt, k, m, &se); - lsp_hz[i] = cb[index*k]; + for(i=0; i<FFT_ENC; i++) { + x[i].real = 0.0; + x[i].imag = 0.0; } - /* experiment: simulating uniform quantisation error - for(i=0; i<order; i++) - lsp[i] += PI*(12.5/4000.0)*(1.0 - 2.0*(float)rand()/RAND_MAX); - */ - - for(i=0; i<order; i++) - lsp[i] = (PI/4000.0)*lsp_hz[i]; + x[0].real = ak[0]; + coeff = gamma; + for(i=1; i<=order; i++) { + x[i].real = ak[i] * coeff; + coeff *= gamma; + } + kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)x, (kiss_fft_cpx *)Ww); - /* Bandwidth Expansion (BW). Prevents any two LSPs getting too - close together after quantisation. We know from experiment - that LSP quantisation errors < 12.5Hz (25Hz setp size) are - inaudible so we use that as the minimum LSP separation. - */ + TIMER_SAMPLE_AND_LOG(tfft2, taw, " fft2"); - for(i=1; i<5; i++) { - if (lsp[i] - lsp[i-1] < PI*(12.5/4000.0)) - lsp[i] = lsp[i-1] + PI*(12.5/4000.0); + for(i=0; i<FFT_ENC/2; i++) { + Ww[i].real = Ww[i].real*Ww[i].real + Ww[i].imag*Ww[i].imag; } - /* as quantiser gaps increased, larger BW expansion was required - to prevent twinkly noises */ + TIMER_SAMPLE_AND_LOG(tww, tfft2, " Ww"); + + /* Determined combined filter R = WA ---------------------------*/ + + max_Rw = 0.0; min_Rw = 1E32; + for(i=0; i<FFT_ENC/2; i++) { + Rw[i] = sqrtf(Ww[i].real * Aw[i].real); + if (Rw[i] > max_Rw) + max_Rw = Rw[i]; + if (Rw[i] < min_Rw) + min_Rw = Rw[i]; - for(i=5; i<8; i++) { - if (lsp[i] - lsp[i-1] < PI*(25.0/4000.0)) - lsp[i] = lsp[i-1] + PI*(25.0/4000.0); } - for(i=8; i<order; i++) { - if (lsp[i] - lsp[i-1] < PI*(75.0/4000.0)) - lsp[i] = lsp[i-1] + PI*(75.0/4000.0); + + TIMER_SAMPLE_AND_LOG(tr, tww, " R"); + + #ifdef DUMP + if (dump) + dump_Rw(Rw); + #endif + + /* create post filter mag spectrum and apply ------------------*/ + + /* measure energy before post filtering */ + + e_before = 1E-4; + for(i=0; i<FFT_ENC/2; i++) + e_before += Pw[i].real; + + /* apply post filter and measure energy */ + + #ifdef DUMP + if (dump) + dump_Pwb(Pw); + #endif + + e_after = 1E-4; + for(i=0; i<FFT_ENC/2; i++) { + Pfw[i] = powf(Rw[i], beta); + Pw[i].real *= Pfw[i] * Pfw[i]; + e_after += Pw[i].real; } + gain = e_before/e_after; - for(j=0; j<order; j++) - lsp_[j] = lsp[j]; + /* apply gain factor to normalise energy */ - lsp_to_lpc(lsp_, ak, order); -#ifdef DUMP - dump_lsp(lsp); -#endif - } + for(i=0; i<FFT_ENC/2; i++) { + Pw[i].real *= gain; + } -#ifdef DUMP - dump_E(E); -#endif - #ifdef SIM_QUANT - /* simulated LPC energy quantisation */ - { - float e = 10.0*log10(E); - e += 2.0*(1.0 - 2.0*(float)rand()/RAND_MAX); - E = pow(10.0,e/10.0); - } - #endif + if (bass_boost) { + /* add 3dB to first 1 kHz to account for LP effect of PF */ - aks_to_M2(ak,order,model,E,&snr, 1); /* {ak} -> {Am} LPC decode */ + for(i=0; i<FFT_ENC/8; i++) { + Pw[i].real *= 1.4*1.4; + } + } - return snr; + TIMER_SAMPLE_AND_LOG2(tr, " filt"); } + /*---------------------------------------------------------------------------*\ aks_to_M2() @@ -465,61 +920,103 @@ float lpc_model_amplitudes( \*---------------------------------------------------------------------------*/ void aks_to_M2( - float ak[], /* LPC's */ - int order, - MODEL *model, /* sinusoidal model parameters for this frame */ - float E, /* energy term */ - float *snr, /* signal to noise ratio for this frame in dB */ - int dump /* true to dump sample to dump file */ + kiss_fft_cfg fft_fwd_cfg, + float ak[], /* LPC's */ + int order, + MODEL *model, /* sinusoidal model parameters for this frame */ + float E, /* energy term */ + float *snr, /* signal to noise ratio for this frame in dB */ + int dump, /* true to dump sample to dump file */ + int sim_pf, /* true to simulate a post filter */ + int pf, /* true to LPC post filter */ + int bass_boost, /* enable LPC filter 0-1khz 3dB boost */ + float beta, + float gamma /* LPC post filter parameters */ ) { - COMP Pw[FFT_DEC]; /* power spectrum */ + COMP pw[FFT_ENC]; /* input to FFT for power spectrum */ + COMP Pw[FFT_ENC]; /* output power spectrum */ int i,m; /* loop variables */ int am,bm; /* limits of current band */ float r; /* no. rads/bin */ float Em; /* energy in band */ float Am; /* spectral amplitude sample */ float signal, noise; + TIMER_VAR(tstart, tfft, tpw, tpf); + + TIMER_SAMPLE(tstart); - r = TWO_PI/(FFT_DEC); + r = TWO_PI/(FFT_ENC); /* Determine DFT of A(exp(jw)) --------------------------------------------*/ - for(i=0; i<FFT_DEC; i++) { - Pw[i].real = 0.0; - Pw[i].imag = 0.0; + for(i=0; i<FFT_ENC; i++) { + pw[i].real = 0.0; + pw[i].imag = 0.0; } for(i=0; i<=order; i++) - Pw[i].real = ak[i]; - fft(&Pw[0].real,FFT_DEC,1); + pw[i].real = ak[i]; + kiss_fft(fft_fwd_cfg, (kiss_fft_cpx *)pw, (kiss_fft_cpx *)Pw); + + TIMER_SAMPLE_AND_LOG(tfft, tstart, " fft"); /* Determine power spectrum P(w) = E/(A(exp(jw))^2 ------------------------*/ - for(i=0; i<FFT_DEC/2; i++) + for(i=0; i<FFT_ENC/2; i++) Pw[i].real = E/(Pw[i].real*Pw[i].real + Pw[i].imag*Pw[i].imag); -#ifdef DUMP + + TIMER_SAMPLE_AND_LOG(tpw, tfft, " Pw"); + + if (pf) + lpc_post_filter(fft_fwd_cfg, model, Pw, ak, order, dump, beta, gamma, bass_boost); + + TIMER_SAMPLE_AND_LOG(tpf, tpw, " LPC post filter"); + + #ifdef DUMP if (dump) dump_Pw(Pw); -#endif + #endif - /* Determine magnitudes by linear interpolation of P(w) -------------------*/ + /* Determine magnitudes from P(w) ----------------------------------------*/ - signal = noise = 0.0; - for(m=1; m<=model->L; m++) { - am = floor((m - 0.5)*model->Wo/r + 0.5); - bm = floor((m + 0.5)*model->Wo/r + 0.5); - Em = 0.0; + /* when used just by decoder {A} might be all zeroes so init signal + and noise to prevent log(0) errors */ - for(i=am; i<bm; i++) - Em += Pw[i].real; - Am = sqrt(Em); + signal = 1E-30; noise = 1E-32; - signal += pow(model->A[m],2.0); - noise += pow(model->A[m] - Am,2.0); - model->A[m] = Am; + for(m=1; m<=model->L; m++) { + am = (int)((m - 0.5)*model->Wo/r + 0.5); + bm = (int)((m + 0.5)*model->Wo/r + 0.5); + Em = 0.0; + + for(i=am; i<bm; i++) + Em += Pw[i].real; + Am = sqrtf(Em); + + signal += model->A[m]*model->A[m]; + noise += (model->A[m] - Am)*(model->A[m] - Am); + + /* This code significantly improves perf of LPC model, in + particular when combined with phase0. The LPC spectrum tends + to track just under the peaks of the spectral envelope, and + just above nulls. This algorithm does the reverse to + compensate - raising the amplitudes of spectral peaks, while + attenuating the null. This enhances the formants, and + supresses the energy between formants. */ + + if (sim_pf) { + if (Am > model->A[m]) + Am *= 0.7; + if (Am < model->A[m]) + Am *= 1.4; + } + + model->A[m] = Am; } - *snr = 10.0*log10(signal/noise); + *snr = 10.0*log10f(signal/noise); + + TIMER_SAMPLE_AND_LOG2(tpf, " rec"); } /*---------------------------------------------------------------------------*\ @@ -540,7 +1037,7 @@ int encode_Wo(float Wo) float norm; norm = (Wo - Wo_min)/(Wo_max - Wo_min); - index = floor(WO_LEVELS * norm + 0.5); + index = floorf(WO_LEVELS * norm + 0.5); if (index < 0 ) index = 0; if (index > (WO_LEVELS-1)) index = WO_LEVELS-1; @@ -572,6 +1069,84 @@ float decode_Wo(int index) /*---------------------------------------------------------------------------*\ + FUNCTION....: encode_Wo_dt() + AUTHOR......: David Rowe + DATE CREATED: 6 Nov 2011 + + Encodes Wo difference from last frame. + +\*---------------------------------------------------------------------------*/ + +int encode_Wo_dt(float Wo, float prev_Wo) +{ + int index, mask, max_index, min_index; + float Wo_min = TWO_PI/P_MAX; + float Wo_max = TWO_PI/P_MIN; + float norm; + + norm = (Wo - prev_Wo)/(Wo_max - Wo_min); + index = floor(WO_LEVELS * norm + 0.5); + //printf("ENC index: %d ", index); + + /* hard limit */ + + max_index = (1 << (WO_DT_BITS-1)) - 1; + min_index = - (max_index+1); + if (index > max_index) index = max_index; + if (index < min_index) index = min_index; + //printf("max_index: %d min_index: %d hard index: %d ", + // max_index, min_index, index); + + /* mask so that only LSB WO_DT_BITS remain, bit WO_DT_BITS is the sign bit */ + + mask = ((1 << WO_DT_BITS) - 1); + index &= mask; + //printf("mask: 0x%x index: 0x%x\n", mask, index); + + return index; +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: decode_Wo_dt() + AUTHOR......: David Rowe + DATE CREATED: 6 Nov 2011 + + Decodes Wo using WO_DT_BITS difference from last frame. + +\*---------------------------------------------------------------------------*/ + +float decode_Wo_dt(int index, float prev_Wo) +{ + float Wo_min = TWO_PI/P_MAX; + float Wo_max = TWO_PI/P_MIN; + float step; + float Wo; + int mask; + + /* sign extend index */ + + //printf("DEC index: %d "); + if (index & (1 << (WO_DT_BITS-1))) { + mask = ~((1 << WO_DT_BITS) - 1); + index |= mask; + } + //printf("DEC mask: 0x%x index: %d \n", mask, index); + + step = (Wo_max - Wo_min)/WO_LEVELS; + Wo = prev_Wo + step*(index); + + /* bit errors can make us go out of range leading to all sorts of + probs like seg faults */ + + if (Wo > Wo_max) Wo = Wo_max; + if (Wo < Wo_min) Wo = Wo_min; + + return Wo; +} + +/*---------------------------------------------------------------------------*\ + FUNCTION....: speech_to_uq_lsps() AUTHOR......: David Rowe DATE CREATED: 22/8/2010 @@ -592,10 +1167,22 @@ float speech_to_uq_lsps(float lsp[], int i, roots; float Wn[M]; float R[LPC_MAX+1]; - float E; + float e, E; - for(i=0; i<M; i++) + e = 0.0; + for(i=0; i<M; i++) { Wn[i] = Sn[i]*w[i]; + e += Wn[i]*Wn[i]; + } + + /* trap 0 energy case as LPC analysis will fail */ + + if (e == 0.0) { + for(i=0; i<order; i++) + lsp[i] = (PI/order)*(float)i; + return 0.0; + } + autocorrelate(Wn, R, M, order); levinson_durbin(R, ak, order); @@ -603,15 +1190,17 @@ float speech_to_uq_lsps(float lsp[], for(i=0; i<=order; i++) E += ak[i]*R[i]; + /* 15 Hz BW expansion as I can't hear the difference and it may help + help occasional fails in the LSP root finding. Important to do this + after energy calculation to avoid -ve energy values. + */ + + for(i=0; i<=order; i++) + ak[i] *= powf(0.994,(float)i); + roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1); if (roots != order) { - /* for some reason LSP roots could not be found */ - /* some alpha testers are reporting this condition */ - fprintf(stderr, "LSP roots not found!\nroots = %d\n", roots); - for(i=0; i<=order; i++) - fprintf(stderr, "a[%d] = %f\n", i, ak[i]); - - /* some benign LSP values we can use instead */ + /* if root finding fails use some benign LSP values instead */ for(i=0; i<order; i++) lsp[i] = (PI/order)*(float)i; } @@ -621,22 +1210,22 @@ float speech_to_uq_lsps(float lsp[], /*---------------------------------------------------------------------------*\ - FUNCTION....: encode_lsps() + FUNCTION....: encode_lsps_scalar() AUTHOR......: David Rowe DATE CREATED: 22/8/2010 - From a vector of unquantised (floating point) LSPs finds the quantised - LSP indexes. + Thirty-six bit sclar LSP quantiser. From a vector of unquantised + (floating point) LSPs finds the quantised LSP indexes. \*---------------------------------------------------------------------------*/ -void encode_lsps(int indexes[], float lsp[], int order) +void encode_lsps_scalar(int indexes[], float lsp[], int order) { int i,k,m; float wt[1]; float lsp_hz[LPC_MAX]; const float * cb; - float se = 0.0; + float se; /* convert from radians to Hz so we can use human readable frequencies */ @@ -644,7 +1233,7 @@ void encode_lsps(int indexes[], float lsp[], int order) for(i=0; i<order; i++) lsp_hz[i] = (4000.0/PI)*lsp[i]; - /* simple uniform scalar quantisers */ + /* scalar quantisers */ wt[0] = 1.0; for(i=0; i<order; i++) { @@ -657,7 +1246,7 @@ void encode_lsps(int indexes[], float lsp[], int order) /*---------------------------------------------------------------------------*\ - FUNCTION....: decode_lsps() + FUNCTION....: decode_lsps_scalar() AUTHOR......: David Rowe DATE CREATED: 22/8/2010 @@ -666,7 +1255,7 @@ void encode_lsps(int indexes[], float lsp[], int order) \*---------------------------------------------------------------------------*/ -void decode_lsps(float lsp[], int indexes[], int order) +void decode_lsps_scalar(float lsp[], int indexes[], int order) { int i,k; float lsp_hz[LPC_MAX]; @@ -684,6 +1273,273 @@ void decode_lsps(float lsp[], int indexes[], int order) lsp[i] = (PI/4000.0)*lsp_hz[i]; } + +#ifdef __EXPERIMENTAL__ + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: encode_lsps_diff_freq_vq() + AUTHOR......: David Rowe + DATE CREATED: 15 November 2011 + + Twenty-five bit LSP quantiser. LSPs 1-4 are quantised with scalar + LSP differences (in frequency, i.e difference from the previous + LSP). LSPs 5-10 are quantised with a VQ trained generated using + vqtrainjnd.c + +\*---------------------------------------------------------------------------*/ + +void encode_lsps_diff_freq_vq(int indexes[], float lsp[], int order) +{ + int i,k,m; + float lsp_hz[LPC_MAX]; + float lsp__hz[LPC_MAX]; + float dlsp[LPC_MAX]; + float dlsp_[LPC_MAX]; + float wt[LPC_MAX]; + const float * cb; + float se; + + for(i=0; i<LPC_ORD; i++) { + wt[i] = 1.0; + } + + /* convert from radians to Hz so we can use human readable + frequencies */ + + for(i=0; i<order; i++) + lsp_hz[i] = (4000.0/PI)*lsp[i]; + + /* scalar quantisers for LSP differences 1..4 */ + + wt[0] = 1.0; + for(i=0; i<4; i++) { + if (i) + dlsp[i] = lsp_hz[i] - lsp__hz[i-1]; + else + dlsp[0] = lsp_hz[0]; + + k = lsp_cbd[i].k; + m = lsp_cbd[i].m; + cb = lsp_cbd[i].cb; + indexes[i] = quantise(cb, &dlsp[i], wt, k, m, &se); + dlsp_[i] = cb[indexes[i]*k]; + + if (i) + lsp__hz[i] = lsp__hz[i-1] + dlsp_[i]; + else + lsp__hz[0] = dlsp_[0]; + } + + /* VQ LSPs 5,6,7,8,9,10 */ + + k = lsp_cbjnd[4].k; + m = lsp_cbjnd[4].m; + cb = lsp_cbjnd[4].cb; + indexes[4] = quantise(cb, &lsp_hz[4], &wt[4], k, m, &se); +} + + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: decode_lsps_diff_freq_vq() + AUTHOR......: David Rowe + DATE CREATED: 15 Nov 2011 + + From a vector of quantised LSP indexes, returns the quantised + (floating point) LSPs. + +\*---------------------------------------------------------------------------*/ + +void decode_lsps_diff_freq_vq(float lsp_[], int indexes[], int order) +{ + int i,k,m; + float dlsp_[LPC_MAX]; + float lsp__hz[LPC_MAX]; + const float * cb; + + /* scalar LSP differences */ + + for(i=0; i<4; i++) { + cb = lsp_cbd[i].cb; + dlsp_[i] = cb[indexes[i]]; + if (i) + lsp__hz[i] = lsp__hz[i-1] + dlsp_[i]; + else + lsp__hz[0] = dlsp_[0]; + } + + /* VQ */ + + k = lsp_cbjnd[4].k; + m = lsp_cbjnd[4].m; + cb = lsp_cbjnd[4].cb; + for(i=4; i<order; i++) + lsp__hz[i] = cb[indexes[4]*k+i-4]; + + /* convert back to radians */ + + for(i=0; i<order; i++) + lsp_[i] = (PI/4000.0)*lsp__hz[i]; +} + + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: encode_lsps_diff_time() + AUTHOR......: David Rowe + DATE CREATED: 12 Sep 2012 + + Encode difference from preious frames's LSPs using + 3,3,2,2,2,2,1,1,1,1 scalar quantisers (18 bits total). + +\*---------------------------------------------------------------------------*/ + +void encode_lsps_diff_time(int indexes[], + float lsps[], + float lsps__prev[], + int order) +{ + int i,k,m; + float lsps_dt[LPC_ORD]; + float wt[LPC_MAX]; + const float * cb; + float se; + + /* Determine difference in time and convert from radians to Hz so + we can use human readable frequencies */ + + for(i=0; i<LPC_ORD; i++) { + lsps_dt[i] = (4000/PI)*(lsps[i] - lsps__prev[i]); + } + + /* scalar quantisers */ + + wt[0] = 1.0; + for(i=0; i<order; i++) { + k = lsp_cbdt[i].k; + m = lsp_cbdt[i].m; + cb = lsp_cbdt[i].cb; + indexes[i] = quantise(cb, &lsps_dt[i], wt, k, m, &se); + } + +} + + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: decode_lsps_diff_time() + AUTHOR......: David Rowe + DATE CREATED: 15 Nov 2011 + + From a quantised LSP indexes, returns the quantised + (floating point) LSPs. + +\*---------------------------------------------------------------------------*/ + +void decode_lsps_diff_time( + float lsps_[], + int indexes[], + float lsps__prev[], + int order) +{ + int i,k,m; + const float * cb; + + for(i=0; i<order; i++) + lsps_[i] = lsps__prev[i]; + + for(i=0; i<order; i++) { + k = lsp_cbdt[i].k; + cb = lsp_cbdt[i].cb; + lsps_[i] += (PI/4000.0)*cb[indexes[i]*k]; + } + +} +#endif + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: encode_lsps_vq() + AUTHOR......: David Rowe + DATE CREATED: 15 Feb 2012 + + Multi-stage VQ LSP quantiser developed by Jean-Marc Valin. + +\*---------------------------------------------------------------------------*/ + +void encode_lsps_vq(int *indexes, float *x, float *xq, int ndim) +{ + int i, n1, n2, n3; + float err[LPC_ORD], err2[LPC_ORD], err3[LPC_ORD]; + float w[LPC_ORD], w2[LPC_ORD], w3[LPC_ORD]; + const float *codebook1 = lsp_cbjvm[0].cb; + const float *codebook2 = lsp_cbjvm[1].cb; + const float *codebook3 = lsp_cbjvm[2].cb; + + assert(ndim <= LPC_ORD); + + w[0] = MIN(x[0], x[1]-x[0]); + for (i=1;i<ndim-1;i++) + w[i] = MIN(x[i]-x[i-1], x[i+1]-x[i]); + w[ndim-1] = MIN(x[ndim-1]-x[ndim-2], PI-x[ndim-1]); + + compute_weights(x, w, ndim); + + n1 = find_nearest(codebook1, lsp_cbjvm[0].m, x, ndim); + + for (i=0;i<ndim;i++) + { + xq[i] = codebook1[ndim*n1+i]; + err[i] = x[i] - xq[i]; + } + for (i=0;i<ndim/2;i++) + { + err2[i] = err[2*i]; + err3[i] = err[2*i+1]; + w2[i] = w[2*i]; + w3[i] = w[2*i+1]; + } + n2 = find_nearest_weighted(codebook2, lsp_cbjvm[1].m, err2, w2, ndim/2); + n3 = find_nearest_weighted(codebook3, lsp_cbjvm[2].m, err3, w3, ndim/2); + + indexes[0] = n1; + indexes[1] = n2; + indexes[2] = n3; +} + + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: decode_lsps_vq() + AUTHOR......: David Rowe + DATE CREATED: 15 Feb 2012 + +\*---------------------------------------------------------------------------*/ + +void decode_lsps_vq(int *indexes, float *xq, int ndim) +{ + int i, n1, n2, n3; + const float *codebook1 = lsp_cbjvm[0].cb; + const float *codebook2 = lsp_cbjvm[1].cb; + const float *codebook3 = lsp_cbjvm[2].cb; + + n1 = indexes[0]; + n2 = indexes[1]; + n3 = indexes[2]; + + for (i=0;i<ndim;i++) + { + xq[i] = codebook1[ndim*n1+i]; + } + for (i=0;i<ndim/2;i++) + { + xq[2*i] += codebook2[ndim*n2/2+i]; + xq[2*i+1] += codebook3[ndim*n3/2+i]; + } +} + + /*---------------------------------------------------------------------------*\ FUNCTION....: bw_expand_lsps() @@ -692,20 +1548,44 @@ void decode_lsps(float lsp[], int indexes[], int order) Applies Bandwidth Expansion (BW) to a vector of LSPs. Prevents any two LSPs getting too close together after quantisation. We know - from experiment that LSP quantisation errors < 12.5Hz (25Hz setp + from experiment that LSP quantisation errors < 12.5Hz (25Hz step size) are inaudible so we use that as the minimum LSP separation. \*---------------------------------------------------------------------------*/ -void bw_expand_lsps(float lsp[], +void bw_expand_lsps(float lsp[], int order, float min_sep_low, float min_sep_high) +{ + int i; + + for(i=1; i<4; i++) { + + if ((lsp[i] - lsp[i-1]) < min_sep_low*(PI/4000.0)) + lsp[i] = lsp[i-1] + min_sep_low*(PI/4000.0); + + } + + /* As quantiser gaps increased, larger BW expansion was required + to prevent twinkly noises. This may need more experiment for + different quanstisers. + */ + + for(i=4; i<order; i++) { + if (lsp[i] - lsp[i-1] < min_sep_high*(PI/4000.0)) + lsp[i] = lsp[i-1] + min_sep_high*(PI/4000.0); + } +} + +void bw_expand_lsps2(float lsp[], int order ) { int i; - for(i=1; i<5; i++) { - if (lsp[i] - lsp[i-1] < PI*(12.5/4000.0)) - lsp[i] = lsp[i-1] + PI*(12.5/4000.0); + for(i=1; i<4; i++) { + + if ((lsp[i] - lsp[i-1]) < 100.0*(PI/4000.0)) + lsp[i] = lsp[i-1] + 100.0*(PI/4000.0); + } /* As quantiser gaps increased, larger BW expansion was required @@ -713,16 +1593,84 @@ void bw_expand_lsps(float lsp[], different quanstisers. */ - for(i=5; i<8; i++) { - if (lsp[i] - lsp[i-1] < PI*(25.0/4000.0)) - lsp[i] = lsp[i-1] + PI*(25.0/4000.0); + for(i=4; i<order; i++) { + if (lsp[i] - lsp[i-1] < 200.0*(PI/4000.0)) + lsp[i] = lsp[i-1] + 200.0*(PI/4000.0); } - for(i=8; i<order; i++) { - if (lsp[i] - lsp[i-1] < PI*(75.0/4000.0)) - lsp[i] = lsp[i-1] + PI*(75.0/4000.0); +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: locate_lsps_jnd_steps() + AUTHOR......: David Rowe + DATE CREATED: 27/10/2011 + + Applies a form of Bandwidth Expansion (BW) to a vector of LSPs. + Listening tests have determined that "quantising" the position of + each LSP to the non-linear steps below introduces a "just noticable + difference" in the synthesised speech. + + This operation can be used before quantisation to limit the input + data to the quantiser to a number of discrete steps. + + This operation can also be used during quantisation as a form of + hysteresis in the calculation of quantiser error. For example if + the quantiser target of lsp1 is 500 Hz, candidate vectors with lsp1 + of 515 and 495 Hz sound effectively the same. + +\*---------------------------------------------------------------------------*/ + +void locate_lsps_jnd_steps(float lsps[], int order) +{ + int i; + float lsp_hz, step; + + assert(order == 10); + + /* quantise to 25Hz steps */ + + step = 25; + for(i=0; i<2; i++) { + lsp_hz = lsps[i]*4000.0/PI; + lsp_hz = floorf(lsp_hz/step + 0.5)*step; + lsps[i] = lsp_hz*PI/4000.0; + if (i) { + if (lsps[i] == lsps[i-1]) + lsps[i] += step*PI/4000.0; + + } + } + + /* quantise to 50Hz steps */ + + step = 50; + for(i=2; i<4; i++) { + lsp_hz = lsps[i]*4000.0/PI; + lsp_hz = floorf(lsp_hz/step + 0.5)*step; + lsps[i] = lsp_hz*PI/4000.0; + if (i) { + if (lsps[i] == lsps[i-1]) + lsps[i] += step*PI/4000.0; + + } + } + + /* quantise to 100Hz steps */ + + step = 100; + for(i=4; i<10; i++) { + lsp_hz = lsps[i]*4000.0/PI; + lsp_hz = floorf(lsp_hz/step + 0.5)*step; + lsps[i] = lsp_hz*PI/4000.0; + if (i) { + if (lsps[i] == lsps[i-1]) + lsps[i] += step*PI/4000.0; + + } } } + /*---------------------------------------------------------------------------*\ FUNCTION....: apply_lpc_correction() @@ -758,9 +1706,9 @@ int encode_energy(float e) float e_max = E_MAX_DB; float norm; - e = 10.0*log10(e); + e = 10.0*log10f(e); norm = (e - e_min)/(e_max - e_min); - index = floor(E_LEVELS * norm + 0.5); + index = floorf(E_LEVELS * norm + 0.5); if (index < 0 ) index = 0; if (index > (E_LEVELS-1)) index = E_LEVELS-1; @@ -773,7 +1721,7 @@ int encode_energy(float e) AUTHOR......: David Rowe DATE CREATED: 22/8/2010 - Decodes energy using a WO_BITS quantiser. + Decodes energy using a E_LEVELS quantiser. \*---------------------------------------------------------------------------*/ @@ -786,39 +1734,12 @@ float decode_energy(int index) step = (e_max - e_min)/E_LEVELS; e = e_min + step*(index); - e = pow(10.0,e/10.0); + e = powf(10.0,e/10.0); return e; } -/*---------------------------------------------------------------------------*\ - - FUNCTION....: encode_amplitudes() - AUTHOR......: David Rowe - DATE CREATED: 22/8/2010 - - Time domain LPC is used model the amplitudes which are then - converted to LSPs and quantised. So we don't actually encode the - amplitudes directly, rather we derive an equivalent representation - from the time domain speech. - -\*---------------------------------------------------------------------------*/ - -void encode_amplitudes(int lsp_indexes[], - int *energy_index, - MODEL *model, - float Sn[], - float w[]) -{ - float lsps[LPC_ORD]; - float ak[LPC_ORD+1]; - float e; - - e = speech_to_uq_lsps(lsps, ak, Sn, w, LPC_ORD); - encode_lsps(lsp_indexes, lsps, LPC_ORD); - *energy_index = encode_energy(e); -} - +#ifdef NOT_USED /*---------------------------------------------------------------------------*\ FUNCTION....: decode_amplitudes() @@ -830,7 +1751,8 @@ void encode_amplitudes(int lsp_indexes[], \*---------------------------------------------------------------------------*/ -float decode_amplitudes(MODEL *model, +float decode_amplitudes(kiss_fft_cfg fft_fwd_cfg, + MODEL *model, float ak[], int lsp_indexes[], int energy_index, @@ -840,12 +1762,209 @@ float decode_amplitudes(MODEL *model, { float snr; - decode_lsps(lsps, lsp_indexes, LPC_ORD); + decode_lsps_scalar(lsps, lsp_indexes, LPC_ORD); bw_expand_lsps(lsps, LPC_ORD); lsp_to_lpc(lsps, ak, LPC_ORD); *e = decode_energy(energy_index); - aks_to_M2(ak, LPC_ORD, model, *e, &snr, 1); + aks_to_M2(ak, LPC_ORD, model, *e, &snr, 1, 0, 0, 1); apply_lpc_correction(model); return snr; } +#endif + +static float ge_coeff[2] = {0.8, 0.9}; + +void compute_weights2(const float *x, const float *xp, float *w, int ndim) +{ + w[0] = 30; + w[1] = 1; + if (x[1]<0) + { + w[0] *= .6; + w[1] *= .3; + } + if (x[1]<-10) + { + w[0] *= .3; + w[1] *= .3; + } + /* Higher weight if pitch is stable */ + if (fabsf(x[0]-xp[0])<.2) + { + w[0] *= 2; + w[1] *= 1.5; + } else if (fabsf(x[0]-xp[0])>.5) /* Lower if not stable */ + { + w[0] *= .5; + } + + /* Lower weight for low energy */ + if (x[1] < xp[1]-10) + { + w[1] *= .5; + } + if (x[1] < xp[1]-20) + { + w[1] *= .5; + } + + //w[0] = 30; + //w[1] = 1; + + /* Square the weights because it's applied on the squared error */ + w[0] *= w[0]; + w[1] *= w[1]; + +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: quantise_WoE() + AUTHOR......: Jean-Marc Valin & David Rowe + DATE CREATED: 29 Feb 2012 + + Experimental joint Wo and LPC energy vector quantiser developed by + Jean-Marc Valin. Exploits correlations between the difference in + the log pitch and log energy from frame to frame. For example + both the pitch and energy tend to only change by small amounts + during voiced speech, however it is important that these changes be + coded carefully. During unvoiced speech they both change a lot but + the ear is less sensitve to errors so coarser quantisation is OK. + + The ear is sensitive to log energy and loq pitch so we quantise in + these domains. That way the error measure used to quantise the + values is close to way the ear senses errors. + + See http://jmspeex.livejournal.com/10446.html + +\*---------------------------------------------------------------------------*/ + +void quantise_WoE(MODEL *model, float *e, float xq[]) +{ + int i, n1; + float x[2]; + float err[2]; + float w[2]; + const float *codebook1 = ge_cb[0].cb; + int nb_entries = ge_cb[0].m; + int ndim = ge_cb[0].k; + float Wo_min = TWO_PI/P_MAX; + float Wo_max = TWO_PI/P_MIN; + + x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2); + x[1] = 10.0*log10f(1e-4 + *e); + + compute_weights2(x, xq, w, ndim); + for (i=0;i<ndim;i++) + err[i] = x[i]-ge_coeff[i]*xq[i]; + n1 = find_nearest_weighted(codebook1, nb_entries, err, w, ndim); + + for (i=0;i<ndim;i++) + { + xq[i] = ge_coeff[i]*xq[i] + codebook1[ndim*n1+i]; + err[i] -= codebook1[ndim*n1+i]; + } + + /* + x = log2(4000*Wo/(PI*50)); + 2^x = 4000*Wo/(PI*50) + Wo = (2^x)*(PI*50)/4000; + */ + + model->Wo = powf(2.0, xq[0])*(PI*50.0)/4000.0; + + /* bit errors can make us go out of range leading to all sorts of + probs like seg faults */ + + if (model->Wo > Wo_max) model->Wo = Wo_max; + if (model->Wo < Wo_min) model->Wo = Wo_min; + + model->L = PI/model->Wo; /* if we quantise Wo re-compute L */ + + *e = powf(10.0, xq[1]/10.0); +} + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: encode_WoE() + AUTHOR......: Jean-Marc Valin & David Rowe + DATE CREATED: 11 May 2012 + + Joint Wo and LPC energy vector quantiser developed my Jean-Marc + Valin. Returns index, and updated states xq[]. + +\*---------------------------------------------------------------------------*/ + +int encode_WoE(MODEL *model, float e, float xq[]) +{ + int i, n1; + float x[2]; + float err[2]; + float w[2]; + const float *codebook1 = ge_cb[0].cb; + int nb_entries = ge_cb[0].m; + int ndim = ge_cb[0].k; + + assert((1<<WO_E_BITS) == nb_entries); + + if (e < 0.0) e = 0; /* occasional small negative energies due LPC round off I guess */ + + x[0] = log10f((model->Wo/PI)*4000.0/50.0)/log10f(2); + x[1] = 10.0*log10f(1e-4 + e); + + compute_weights2(x, xq, w, ndim); + for (i=0;i<ndim;i++) + err[i] = x[i]-ge_coeff[i]*xq[i]; + n1 = find_nearest_weighted(codebook1, nb_entries, err, w, ndim); + + for (i=0;i<ndim;i++) + { + xq[i] = ge_coeff[i]*xq[i] + codebook1[ndim*n1+i]; + err[i] -= codebook1[ndim*n1+i]; + } + + //printf("enc: %f %f (%f)(%f) \n", xq[0], xq[1], e, 10.0*log10(1e-4 + e)); + return n1; +} + + +/*---------------------------------------------------------------------------*\ + + FUNCTION....: decode_WoE() + AUTHOR......: Jean-Marc Valin & David Rowe + DATE CREATED: 11 May 2012 + + Joint Wo and LPC energy vector quantiser developed my Jean-Marc + Valin. Given index and states xq[], returns Wo & E, and updates + states xq[]. + +\*---------------------------------------------------------------------------*/ + +void decode_WoE(MODEL *model, float *e, float xq[], int n1) +{ + int i; + const float *codebook1 = ge_cb[0].cb; + int ndim = ge_cb[0].k; + float Wo_min = TWO_PI/P_MAX; + float Wo_max = TWO_PI/P_MIN; + + for (i=0;i<ndim;i++) + { + xq[i] = ge_coeff[i]*xq[i] + codebook1[ndim*n1+i]; + } + + //printf("dec: %f %f\n", xq[0], xq[1]); + model->Wo = powf(2.0, xq[0])*(PI*50.0)/4000.0; + + /* bit errors can make us go out of range leading to all sorts of + probs like seg faults */ + + if (model->Wo > Wo_max) model->Wo = Wo_max; + if (model->Wo < Wo_min) model->Wo = Wo_min; + + model->L = PI/model->Wo; /* if we quantise Wo re-compute L */ + + *e = powf(10.0, xq[1]/10.0); +} + |