diff options
Diffstat (limited to 'gr-vocoder/lib/codec2/c2sim.c')
-rw-r--r-- | gr-vocoder/lib/codec2/c2sim.c | 1131 |
1 files changed, 795 insertions, 336 deletions
diff --git a/gr-vocoder/lib/codec2/c2sim.c b/gr-vocoder/lib/codec2/c2sim.c index e335078b64..8f07299d0d 100644 --- a/gr-vocoder/lib/codec2/c2sim.c +++ b/gr-vocoder/lib/codec2/c2sim.c @@ -4,8 +4,9 @@ AUTHOR......: David Rowe DATE CREATED: 20/8/2010 - Codec2 simulation. Combines encoder and decoder and allows switching in - out various algorithms and quantisation steps. + Codec2 simulation. Combines encoder and decoder and allows + switching in and out various algorithms and quantisation steps. Used + for algorithm development. \*---------------------------------------------------------------------------*/ @@ -32,6 +33,8 @@ #include <string.h> #include <errno.h> #include <math.h> +#include <unistd.h> +#include <getopt.h> #include "defines.h" #include "sine.h" @@ -43,32 +46,12 @@ #include "phase.h" #include "postfilter.h" #include "interp.h" +#include "ampexp.h" +#include "phaseexp.h" -/*---------------------------------------------------------------------------*\ - - switch_present() - - Searches the command line arguments for a "switch". If the switch is - found, returns the command line argument where it ws found, else returns - NULL. - -\*---------------------------------------------------------------------------*/ - -int switch_present(sw,argc,argv) -register char sw[]; /* switch in string form */ -register int argc; /* number of command line arguments */ -register char *argv[]; /* array of command line arguments in string form */ -{ - register int i; /* loop variable */ - - for(i=1; i<argc; i++) - if (!strcmp(sw,argv[i])) - return(i); +void synth_one_frame(kiss_fft_cfg fft_inv_cfg, short buf[], MODEL *model, float Sn_[], float Pn[], int prede, float *de_mem, float gain); +void print_help(const struct option *long_options, int num_opts, char* argv[]); - return 0; -} - -void synth_one_frame(short buf[], MODEL *model, float Sn_[], float Pn[]); /*---------------------------------------------------------------------------*\ @@ -78,386 +61,829 @@ void synth_one_frame(short buf[], MODEL *model, float Sn_[], float Pn[]); int main(int argc, char *argv[]) { - FILE *fout; /* output speech file */ - FILE *fin; /* input speech file */ - short buf[N]; /* input/output buffer */ - float Sn[M]; /* float input speech samples */ - COMP Sw[FFT_ENC]; /* DFT of Sn[] */ - float w[M]; /* time domain hamming window */ - COMP W[FFT_ENC]; /* DFT of w[] */ - MODEL model; - float Pn[2*N]; /* trapezoidal synthesis window */ - float Sn_[2*N]; /* synthesised speech */ - int i; /* loop variable */ - int frames; - float prev_Wo; - float pitch; - int voiced1 = 0; - - char out_file[MAX_STR]; - int arg; - float snr; - float sum_snr; - - int lpc_model, order = LPC_ORD; - int lsp, lspd, lspdvq, lsp_quantiser; - float ak[LPC_MAX]; - COMP Sw_[FFT_ENC]; - COMP Ew[FFT_ENC]; - - int dump; - - int phase0; - float ex_phase[MAX_AMP+1]; - - int postfilt; - float bg_est; - - int hand_voicing; - FILE *fvoicing = 0; - - MODEL prev_model, interp_model; - int decimate; - float lsps[LPC_ORD]; - float prev_lsps[LPC_ORD]; - float e, prev_e; - float ak_interp[LPC_MAX]; - - void *nlp_states; - float hpf_states[2]; - int resample; - float AresdB_prev[MAX_AMP]; - - for(i=0; i<MAX_AMP; i++) - AresdB_prev[i] = 0.0; - - for(i=0; i<M; i++) - Sn[i] = 1.0; - for(i=0; i<2*N; i++) - Sn_[i] = 0; - - prev_Wo = TWO_PI/P_MAX; - - prev_model.Wo = TWO_PI/P_MIN; - prev_model.L = floor(PI/prev_model.Wo); - for(i=1; i<=prev_model.L; i++) { - prev_model.A[i] = 0.0; - prev_model.phi[i] = 0.0; - } - for(i=1; i<=MAX_AMP; i++) { - ex_phase[i] = 0.0; - } - for(i=0; i<LPC_ORD; i++) { - prev_lsps[i] = i*PI/(LPC_ORD+1); - } - e = prev_e = 1; - hpf_states[0] = hpf_states[1] = 0.0; - - nlp_states = nlp_create(); - - if (argc < 2) { - fprintf(stderr, "\nCodec2 - 2400 bit/s speech codec - Simulation Program\n" - "\thttp://rowetel.com/codec2.html\n\n" - "usage: %s InputFile [-o OutputFile]\n" - "\t[--lpc Order]\n" - "\t[--lsp]\n" - "\t[--lspd]\n" - "\t[--lspdvq]\n" - "\t[--phase0]\n" - "\t[--postfilter]\n" - "\t[--hand_voicing]\n" - "\t[--dec]\n" - "\t[--dump DumpFilePrefix]\n", argv[0]); - exit(1); - } - - /* Interpret command line arguments -------------------------------------*/ - - /* Input file */ - - if ((fin = fopen(argv[1],"rb")) == NULL) { - fprintf(stderr, "Error opening input speech file: %s: %s.\n", - argv[1], strerror(errno)); - exit(1); - } - - /* Output file */ - - if ((arg = switch_present("-o",argc,argv))) { - if ((fout = fopen(argv[arg+1],"wb")) == NULL) { - fprintf(stderr, "Error opening output speech file: %s: %s.\n", - argv[arg+1], strerror(errno)); - exit(1); + FILE *fout = NULL; /* output speech file */ + FILE *fin; /* input speech file */ + short buf[N]; /* input/output buffer */ + float Sn[M]; /* float input speech samples */ + float Sn_pre[M]; /* pre-emphasised input speech samples */ + COMP Sw[FFT_ENC]; /* DFT of Sn[] */ + kiss_fft_cfg fft_fwd_cfg; + kiss_fft_cfg fft_inv_cfg; + float w[M]; /* time domain hamming window */ + COMP W[FFT_ENC]; /* DFT of w[] */ + MODEL model; + float Pn[2*N]; /* trapezoidal synthesis window */ + float Sn_[2*N]; /* synthesised speech */ + int i; /* loop variable */ + int frames; + float prev_Wo, prev__Wo, uq_Wo, prev_uq_Wo; + float pitch; + int voiced1 = 0; + char out_file[MAX_STR]; + char ampexp_arg[MAX_STR]; + char phaseexp_arg[MAX_STR]; + float snr; + float sum_snr; + + int lpc_model = 0, order = LPC_ORD; + int lsp = 0, lspd = 0, lspvq = 0; + int lspres = 0; + int lspdt = 0, lspdt_mode = LSPDT_ALL; + int dt = 0, lspjvm = 0, lspanssi = 0, lspjnd = 0, lspmel = 0; + int prede = 0; + float pre_mem = 0.0, de_mem = 0.0; + float ak[LPC_MAX]; + COMP Sw_[FFT_ENC]; + COMP Ew[FFT_ENC]; + + int phase0 = 0; + float ex_phase[MAX_AMP+1]; + + int postfilt; + float bg_est; + + int hand_voicing = 0, phaseexp = 0, ampexp = 0, hi = 0, simlpcpf = 0; + int lpcpf = 0; + FILE *fvoicing = 0; + + MODEL prev_model, interp_model; + int decimate = 0; + float lsps[LPC_MAX]; + float prev_lsps[LPC_MAX], prev_lsps_[LPC_MAX]; + float lsps__prev[LPC_MAX]; + float lsps__prev2[LPC_MAX]; + float e, prev_e; + float ak_interp[LPC_MAX]; + int lsp_indexes[LPC_MAX]; + float lsps_[LPC_MAX]; + float Woe_[2]; + + void *nlp_states; + float hpf_states[2]; + int scalar_quant_Wo_e = 0; + int vector_quant_Wo_e = 0; + int dump_pitch_e = 0; + FILE *fjvm = NULL; + #ifdef DUMP + int dump; + #endif + struct PEXP *pexp = NULL; + struct AEXP *aexp = NULL; + float gain = 1.0; + + char* opt_string = "ho:"; + struct option long_options[] = { + { "lpc", required_argument, &lpc_model, 1 }, + { "lspjnd", no_argument, &lspjnd, 1 }, + { "lspmel", no_argument, &lspmel, 1 }, + { "lsp", no_argument, &lsp, 1 }, + { "lspd", no_argument, &lspd, 1 }, + { "lspvq", no_argument, &lspvq, 1 }, + { "lspres", no_argument, &lspres, 1 }, + #ifdef __EXPERIMENTAL__ + { "lspdt", no_argument, &lspdt, 1 }, + { "lspdt_mode", required_argument, NULL, 0 }, + #endif + { "lspjvm", no_argument, &lspjvm, 1 }, + #ifdef __EXPERIMENTAL__ + { "lspanssi", no_argument, &lspanssi, 1 }, + #endif + { "phase0", no_argument, &phase0, 1 }, + { "phaseexp", required_argument, &phaseexp, 1 }, + { "ampexp", required_argument, &exp, 1 }, + { "postfilter", no_argument, &postfilt, 1 }, + { "hand_voicing", required_argument, &hand_voicing, 1 }, + { "dec", no_argument, &decimate, 1 }, + { "dt", no_argument, &dt, 1 }, + { "hi", no_argument, &hi, 1 }, + { "simlpcpf", no_argument, &simlpcpf, 1 }, + { "lpcpf", no_argument, &lpcpf, 1 }, + { "prede", no_argument, &prede, 1 }, + { "dump_pitch_e", required_argument, &dump_pitch_e, 1 }, + { "sq_pitch_e", no_argument, &scalar_quant_Wo_e, 1 }, + { "vq_pitch_e", no_argument, &vector_quant_Wo_e, 1 }, + { "rate", required_argument, NULL, 0 }, + { "gain", required_argument, NULL, 0 }, + #ifdef DUMP + { "dump", required_argument, &dump, 1 }, + #endif + { "help", no_argument, NULL, 'h' }, + { NULL, no_argument, NULL, 0 } + }; + int num_opts=sizeof(long_options)/sizeof(struct option); + + for(i=0; i<M; i++) { + Sn[i] = 1.0; + Sn_pre[i] = 1.0; } - strcpy(out_file,argv[arg+1]); - } - else - fout = NULL; - - lpc_model = 0; - if ((arg = switch_present("--lpc",argc,argv))) { - lpc_model = 1; - order = atoi(argv[arg+1]); - if ((order < 4) || (order > 20)) { - fprintf(stderr, "Error in lpc order: %d\n", order); - exit(1); - } - } - - dump = switch_present("--dump",argc,argv); -#ifdef DUMP - if (dump) - dump_on(argv[dump+1]); -#endif - - lsp = switch_present("--lsp",argc,argv); - lsp_quantiser = 0; - if (lsp) - assert(order == LPC_ORD); + for(i=0; i<2*N; i++) + Sn_[i] = 0; - lspd = switch_present("--lspd",argc,argv); - if (lspd) - assert(order == LPC_ORD); + prev_uq_Wo = prev_Wo = prev__Wo = TWO_PI/P_MAX; - lspdvq = switch_present("--lspdvq",argc,argv); - if (lspdvq) - assert(order == LPC_ORD); + prev_model.Wo = TWO_PI/P_MIN; + prev_model.L = floor(PI/prev_model.Wo); + for(i=1; i<=prev_model.L; i++) { + prev_model.A[i] = 0.0; + prev_model.phi[i] = 0.0; + } + for(i=1; i<=MAX_AMP; i++) { + //ex_phase[i] = (PI/3)*(float)rand()/RAND_MAX; + ex_phase[i] = 0.0; + } + for(i=0; i<LPC_ORD; i++) { + lsps_[i] = prev_lsps[i] = prev_lsps_[i] = i*PI/(LPC_ORD+1); + lsps__prev[i] = lsps__prev2[i] = i*PI/(LPC_ORD+1); + } + e = prev_e = 1; + hpf_states[0] = hpf_states[1] = 0.0; - phase0 = switch_present("--phase0",argc,argv); - if (phase0) { - ex_phase[0] = 0; - } + nlp_states = nlp_create(M); - hand_voicing = switch_present("--hand_voicing",argc,argv); - if (hand_voicing) { - fvoicing = fopen(argv[hand_voicing+1],"rt"); - assert(fvoicing != NULL); - } + if (argc < 2) { + print_help(long_options, num_opts, argv); + } - bg_est = 0.0; - postfilt = switch_present("--postfilter",argc,argv); + /*----------------------------------------------------------------*\ + + Interpret Command Line Arguments + + \*----------------------------------------------------------------*/ + + while(1) { + int option_index = 0; + int opt = getopt_long(argc, argv, opt_string, + long_options, &option_index); + if (opt == -1) + break; + switch (opt) { + case 0: + if(strcmp(long_options[option_index].name, "lpc") == 0) { + order = atoi(optarg); + if((order < 4) || (order > 20)) { + fprintf(stderr, "Error in LPC order: %s\n", optarg); + exit(1); + } + #ifdef DUMP + } else if(strcmp(long_options[option_index].name, "dump") == 0) { + if (dump) + dump_on(optarg); + #endif + } else if(strcmp(long_options[option_index].name, "lsp") == 0 + || strcmp(long_options[option_index].name, "lspd") == 0 + || strcmp(long_options[option_index].name, "lspvq") == 0) { + assert(order == LPC_ORD); + } else if(strcmp(long_options[option_index].name, "lspdt_mode") == 0) { + if (strcmp(optarg,"all") == 0) + lspdt_mode = LSPDT_ALL; + else if (strcmp(optarg,"low") == 0) + lspdt_mode = LSPDT_LOW; + else if (strcmp(optarg,"high") == 0) + lspdt_mode = LSPDT_HIGH; + else { + fprintf(stderr, "Error in lspdt_mode: %s\n", optarg); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "hand_voicing") == 0) { + if ((fvoicing = fopen(optarg,"rt")) == NULL) { + fprintf(stderr, "Error opening voicing file: %s: %s.\n", + optarg, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "dump_pitch_e") == 0) { + if ((fjvm = fopen(optarg,"wt")) == NULL) { + fprintf(stderr, "Error opening pitch & energy dump file: %s: %s.\n", + optarg, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "phaseexp") == 0) { + strcpy(phaseexp_arg, optarg); + } else if(strcmp(long_options[option_index].name, "ampexp") == 0) { + strcpy(ampexp_arg, optarg); + } else if(strcmp(long_options[option_index].name, "gain") == 0) { + gain = atof(optarg); + } else if(strcmp(long_options[option_index].name, "rate") == 0) { + if(strcmp(optarg,"3200") == 0) { + lpc_model = 1; order = 10; + scalar_quant_Wo_e = 1; + lspd = 1; + phase0 = 1; + postfilt = 1; + decimate = 1; + lpcpf = 1; + } else if(strcmp(optarg,"2400") == 0) { + lpc_model = 1; order = 10; + vector_quant_Wo_e = 1; + lsp = 1; + phase0 = 1; + postfilt = 1; + decimate = 1; + lpcpf = 1; + } else if(strcmp(optarg,"1400") == 0) { + lpc_model = 1; order = 10; + vector_quant_Wo_e = 1; + lsp = 1; lspdt = 1; + phase0 = 1; + postfilt = 1; + decimate = 1; + dt = 1; + lpcpf = 1; + } else if(strcmp(optarg,"1200") == 0) { + lpc_model = 1; order = 10; + scalar_quant_Wo_e = 1; + lspjvm = 1; lspdt = 1; + phase0 = 1; + postfilt = 1; + decimate = 1; + dt = 1; + lpcpf = 1; + } else { + fprintf(stderr, "Error: invalid output rate %s\n", optarg); + exit(1); + } + } + break; + + case 'h': + print_help(long_options, num_opts, argv); + break; + + case 'o': + if (strcmp(optarg, "-") == 0) fout = stdout; + else if ((fout = fopen(optarg,"wb")) == NULL) { + fprintf(stderr, "Error opening output speech file: %s: %s.\n", + optarg, strerror(errno)); + exit(1); + } + strcpy(out_file,optarg); + break; + + default: + /* This will never be reached */ + break; + } + } - decimate = switch_present("--dec",argc,argv); + /* Input file */ - arg = switch_present("--resample",argc,argv); - resample = atoi(argv[arg+1]); + if ((fin = fopen(argv[optind],"rb")) == NULL) { + fprintf(stderr, "Error opening input speech file: %s: %s.\n", + argv[optind], strerror(errno)); + exit(1); + } - /* Initialise ------------------------------------------------------------*/ + ex_phase[0] = 0; + bg_est = 0.0; + Woe_[0] = Woe_[1] = 1.0; - make_analysis_window(w,W); - make_synthesis_window(Pn); - quantise_init(); + /* + printf("lspd: %d lspdt: %d lspdt_mode: %d phase0: %d postfilt: %d " + "decimate: %d dt: %d\n",lspd,lspdt,lspdt_mode,phase0,postfilt, + decimate,dt); + */ - /* Main loop ------------------------------------------------------------*/ + /* Initialise ------------------------------------------------------------*/ - frames = 0; - sum_snr = 0; - while(fread(buf,sizeof(short),N,fin)) { - frames++; - //printf("frame: %d", frames); + fft_fwd_cfg = kiss_fft_alloc(FFT_ENC, 0, NULL, NULL); /* fwd FFT,used in several places */ + fft_inv_cfg = kiss_fft_alloc(FFT_DEC, 1, NULL, NULL); /* inverse FFT, used just for synth */ + make_analysis_window(fft_fwd_cfg, w, W); + make_synthesis_window(Pn); + quantise_init(); + if (phaseexp) + pexp = phase_experiment_create(); + if (ampexp) + aexp = amp_experiment_create(); - /* Read input speech */ + /*----------------------------------------------------------------*\ - for(i=0; i<M-N; i++) - Sn[i] = Sn[i+N]; - for(i=0; i<N; i++) { - //Sn[i+M-N] = hpf((float)buf[i], hpf_states); - Sn[i+M-N] = (float)buf[i]; - } + Main Loop - /* Estimate pitch */ + \*----------------------------------------------------------------*/ - nlp(nlp_states,Sn,N,M,P_MIN,P_MAX,&pitch,Sw,&prev_Wo); - model.Wo = TWO_PI/pitch; + frames = 0; + sum_snr = 0; + while(fread(buf,sizeof(short),N,fin)) { + frames++; + //printf("frame: %d ", frames); - /* estimate model parameters */ + /* Read input speech */ - dft_speech(Sw, Sn, w); - two_stage_pitch_refinement(&model, Sw); - estimate_amplitudes(&model, Sw, W); -#ifdef DUMP - dump_Sn(Sn); dump_Sw(Sw); dump_model(&model); -#endif + for(i=0; i<M-N; i++) { + Sn[i] = Sn[i+N]; + Sn_pre[i] = Sn_pre[i+N]; + } + for(i=0; i<N; i++) + Sn[i+M-N] = buf[i]; - /* optional zero-phase modelling */ + pre_emp(&Sn_pre[M-N], &Sn[M-N], &pre_mem, N); - if (phase0) { - float Wn[M]; /* windowed speech samples */ - float Rk[LPC_MAX+1]; /* autocorrelation coeffs */ -#ifdef DUMP - dump_phase(&model.phi[0], model.L); -#endif + /*------------------------------------------------------------*\ - /* find aks here, these are overwritten if LPC modelling is enabled */ + Estimate Sinusoidal Model Parameters - for(i=0; i<M; i++) - Wn[i] = Sn[i]*w[i]; - autocorrelate(Wn,Rk,M,order); - levinson_durbin(Rk,ak,order); + \*------------------------------------------------------------*/ -#ifdef DUMP - dump_ak(ak, LPC_ORD); -#endif + nlp(nlp_states,Sn,N,P_MIN,P_MAX,&pitch,Sw,W,&prev_uq_Wo); + model.Wo = TWO_PI/pitch; - /* determine voicing */ + dft_speech(fft_fwd_cfg, Sw, Sn, w); + two_stage_pitch_refinement(&model, Sw); + estimate_amplitudes(&model, Sw, W, 1); + uq_Wo = model.Wo; - snr = est_voicing_mbe(&model, Sw, W, Sw_, Ew, prev_Wo); -#ifdef DUMP - dump_Sw_(Sw_); - dump_Ew(Ew); - dump_snr(snr); -#endif + #ifdef DUMP + dump_Sn(Sn); dump_Sw(Sw); dump_model(&model); + #endif - /* just to make sure we are not cheating - kill all phases */ + if (ampexp) + amp_experiment(aexp, &model, ampexp_arg); - for(i=0; i<MAX_AMP; i++) - model.phi[i] = 0; + if (phaseexp) { + #ifdef DUMP + dump_phase(&model.phi[0], model.L); + #endif + phase_experiment(pexp, &model, phaseexp_arg); + #ifdef DUMP + dump_phase_(&model.phi[0], model.L); + #endif + } - if (hand_voicing) { - fscanf(fvoicing,"%d\n",&model.voiced); + if (hi) { + int m; + for(m=1; m<model.L/2; m++) + model.A[m] = 0.0; + for(m=3*model.L/4; m<=model.L; m++) + model.A[m] = 0.0; } - } - /* optional LPC model amplitudes */ + /*------------------------------------------------------------*\ - if (lpc_model) { - int lsp_indexes[LPC_MAX]; + Zero-phase modelling - e = speech_to_uq_lsps(lsps, ak, Sn, w, order); + \*------------------------------------------------------------*/ - if (lsp) { - encode_lsps(lsp_indexes, lsps, LPC_ORD); - decode_lsps(lsps, lsp_indexes, LPC_ORD); - bw_expand_lsps(lsps, LPC_ORD); - lsp_to_lpc(lsps, ak, LPC_ORD); - } + if (phase0) { + float Wn[M]; /* windowed speech samples */ + float Rk[LPC_MAX+1]; /* autocorrelation coeffs */ - if (lspd) { - float lsps_[LPC_ORD]; + #ifdef DUMP + dump_phase(&model.phi[0], model.L); + #endif - lspd_quantise(lsps, lsps_, LPC_ORD); - lsp_to_lpc(lsps_, ak, LPC_ORD); - } + /* find aks here, these are overwritten if LPC modelling is enabled */ - if (lspdvq) { - float lsps_[LPC_ORD]; + if (prede) { + for(i=0; i<M; i++) + Wn[i] = Sn_pre[i]*w[i]; + } + else { - lspdvq_quantise(lsps, lsps_, LPC_ORD); - lsp_to_lpc(lsps_, ak, LPC_ORD); - } + for(i=0; i<M; i++) + Wn[i] = Sn[i]*w[i]; + } + autocorrelate(Wn,Rk,M,order); + levinson_durbin(Rk,ak,order); - e = decode_energy(encode_energy(e)); - model.Wo = decode_Wo(encode_Wo(model.Wo)); + /* determine voicing */ - aks_to_M2(ak, order, &model, e, &snr, 1); - apply_lpc_correction(&model); - sum_snr += snr; -#ifdef DUMP - dump_quantised_model(&model); -#endif - } + snr = est_voicing_mbe(&model, Sw, W, Sw_, Ew, prev_uq_Wo); - /* optional resampling of model amplitudes */ + if (dump_pitch_e) + fprintf(fjvm, "%f %f %d ", model.Wo, snr, model.voiced); - printf("frames=%d\n", frames); - if (resample) { - snr = resample_amp_nl(&model, resample, AresdB_prev); - sum_snr += snr; -#ifdef DUMP - dump_quantised_model(&model); -#endif - } + //printf("snr %3.2f v: %d Wo: %f prev_Wo: %f\n", snr, model.voiced, + // model.Wo, prev_uq_Wo); + #ifdef DUMP + dump_Sw_(Sw_); + dump_Ew(Ew); + dump_snr(snr); + #endif - /* option decimation to 20ms rate, which enables interpolation - routine to synthesise in between frame */ + /* just to make sure we are not cheating - kill all phases */ - if (decimate) { - if (!phase0) { - printf("needs --phase0 to resample phase for interpolated Wo\n"); - exit(0); - } - if (!lpc_model) { - printf("needs --lpc 10 to resample amplitudes\n"); - exit(0); + for(i=0; i<=MAX_AMP; i++) + model.phi[i] = 0; + + if (hand_voicing) { + fscanf(fvoicing,"%d\n",&model.voiced); + } } - /* odd frame - interpolate */ + /*------------------------------------------------------------*\ + + LPC model amplitudes and LSP quantisation + + \*------------------------------------------------------------*/ + + if (lpc_model) { + + if (prede) + e = speech_to_uq_lsps(lsps, ak, Sn_pre, w, order); + else + e = speech_to_uq_lsps(lsps, ak, Sn, w, order); + + #ifdef DUMP + dump_ak(ak, LPC_ORD); + #endif + + /* tracking down -ve energy values with BW expansion */ + /* + if (e < 0.0) { + int i; + FILE*f=fopen("x.txt","wt"); + for(i=0; i<M; i++) + fprintf(f,"%f\n", Sn[i]); + fclose(f); + printf("e = %f frames = %d\n", e, frames); + for(i=0; i<order; i++) + printf("%f ", ak[i]); + exit(0); + } + */ + + if (dump_pitch_e) + fprintf(fjvm, "%f\n", e); + + #ifdef DUMP + /* dump order is different if we are decimating */ + if (!decimate) + dump_lsp(lsps); + for(i=0; i<LPC_ORD; i++) + prev_lsps[i] = lsps[i]; + #endif + + /* various LSP quantisation schemes */ + + if (lsp) { + encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); + decode_lsps_scalar(lsps_, lsp_indexes, LPC_ORD); + bw_expand_lsps(lsps_, LPC_ORD, 50.0, 100.0); + lsp_to_lpc(lsps_, ak, LPC_ORD); + } + + if (lspd) { + encode_lspds_scalar(lsp_indexes, lsps, LPC_ORD); + decode_lspds_scalar(lsps_, lsp_indexes, LPC_ORD); + lsp_to_lpc(lsps_, ak, LPC_ORD); + } + +#ifdef __EXPERIMENTAL__ + if (lspvq) { + lspvq_quantise(lsps, lsps_, LPC_ORD); + bw_expand_lsps(lsps_, LPC_ORD, 50.0, 100.0); + lsp_to_lpc(lsps_, ak, LPC_ORD); + } +#endif + + if (lspjvm) { + /* Jean-Marc's multi-stage, split VQ */ + lspjvm_quantise(lsps, lsps_, LPC_ORD); + { + float lsps_bw[LPC_ORD]; + memcpy(lsps_bw, lsps_, sizeof(float)*LPC_ORD); + bw_expand_lsps(lsps_bw, LPC_ORD, 50.0, 100.0); + lsp_to_lpc(lsps_bw, ak, LPC_ORD); + } + } + +#ifdef __EXPERIMENTAL__ + if (lspanssi) { + /* multi-stage VQ from Anssi Ramo OH3GDD */ + + lspanssi_quantise(lsps, lsps_, LPC_ORD, 5); + bw_expand_lsps(lsps_, LPC_ORD, 50.0, 100.0); + lsp_to_lpc(lsps_, ak, LPC_ORD); + } +#endif - if (frames%2) { + /* experimenting with non-linear LSP spacing to see if + it's just noticable */ + + if (lspjnd) { + for(i=0; i<LPC_ORD; i++) + lsps_[i] = lsps[i]; + locate_lsps_jnd_steps(lsps_, LPC_ORD); + lsp_to_lpc(lsps_, ak, LPC_ORD); + } + + /* Another experiment with non-linear LSP spacing, this + time using a scaled version of mel frequency axis + warping. The scaling is such that the integer output + can be directly sent over the channel. + */ + + if (lspmel) { + float f, f_; + int mel[LPC_ORD]; + + for(i=0; i<LPC_ORD; i++) { + f = (4000.0/PI)*lsps[i]; + mel[i] = floor(100.0*log10(1.0 + f/700.0) + 0.5); + } + + for(i=1; i<LPC_ORD; i++) { + if (mel[i] == mel[i-1]) + mel[i]++; + } + + for(i=0; i<LPC_ORD; i++) { + f_ = 700.0*( pow(10.0, (float)mel[i]/100.0) - 1.0); + lsps_[i] = f_*(PI/4000.0); + } + for(i=5; i<10; i++) { + lsps_[i] = lsps[i]; + } + + lsp_to_lpc(lsps_, ak, LPC_ORD); + } + + /* we need lsp__prev[] for lspdt and decimate. If no + other LSP quantisation is used we use original LSPs as + there is no quantised version available. TODO: this is + mess, we should have structures and standard + nomenclature for previous frames values, lsp_[] + shouldn't be overwritten as we may want to dump it for + analysis. Re-design some time. + */ + + if (!lsp && !lspd && !lspvq && !lspres && !lspjvm && !lspanssi && !lspjnd && !lspmel) + for(i=0; i<LPC_ORD; i++) + lsps_[i] = lsps[i]; + + /* Odd frames are generated by quantising the difference + between the previous frames LSPs and this frames */ + +#ifdef __EXPERIMENTAL__ + if (lspdt && !decimate) { + if (frames%2) { + lspdt_quantise(lsps, lsps_, lsps__prev, lspdt_mode); + bw_expand_lsps(lsps_, LPC_ORD, 50.0, 100.0); + lsp_to_lpc(lsps_, ak, LPC_ORD); + } + for(i=0; i<LPC_ORD; i++) + lsps__prev[i] = lsps_[i]; + } +#endif - interp_model.voiced = voiced1; + /* + When decimation is enabled we only send LSPs to the + decoder on odd frames. In the Delta-time LSPs case we + encode every second odd frame (i.e. every 3rd frame out + of 4) by quantising the difference between the 1st + frames LSPs and the 3rd frames: + + 10ms, frame 1: discard (interpolate at decoder) + 20ms, frame 2: send "full" LSP frame + 30ms, frame 3: discard (interpolate at decoder) + 40ms, frame 4: send LSPs differences between frame 4 and frame 2 + */ + + if (lspdt && decimate) { + /* print previous LSPs to make sure we are using the right set */ + if ((frames%4) == 0) { + //printf(" lspdt "); + //#define LSPDT + #ifdef LSPDT + lspdt_quantise(lsps, lsps_, lsps__prev2, lspdt_mode); + #else + for(i=0; i<LPC_ORD; i++) + lsps_[i] = lsps__prev2[i]; + #endif + bw_expand_lsps(lsps_, LPC_ORD, 50.0, 100.0); + lsp_to_lpc(lsps_, ak, LPC_ORD); + } + + for(i=0; i<LPC_ORD; i++) { + lsps__prev2[i] = lsps__prev[i]; + lsps__prev[i] = lsps_[i]; + } + } + #ifdef DUMP + /* if using decimated (20ms) frames we dump interp + LSPs below */ + if (!decimate) + dump_lsp_(lsps_); + #endif + + if (scalar_quant_Wo_e) { + + e = decode_energy(encode_energy(e)); + + if (!decimate) { + /* we send params every 10ms, delta-time every 20ms */ + if (dt && (frames % 2)) + model.Wo = decode_Wo_dt(encode_Wo_dt(model.Wo, prev_Wo),prev_Wo); + else + model.Wo = decode_Wo(encode_Wo(model.Wo)); + } + + if (decimate) { + /* we send params every 20ms */ + if (dt && ((frames % 4) == 0)) { + /* delta-time every 40ms */ + model.Wo = decode_Wo_dt(encode_Wo_dt(model.Wo, prev__Wo),prev__Wo); + } + else + model.Wo = decode_Wo(encode_Wo(model.Wo)); + } + + model.L = PI/model.Wo; /* if we quantise Wo re-compute L */ + } + + if (vector_quant_Wo_e) { + + /* JVM's experimental joint Wo & LPC energy quantiser */ + + //printf("\nWo %f e %f\n", model.Wo, e); + quantise_WoE(&model, &e, Woe_); + //printf("Wo %f e %f\n", model.Wo, e); + + } + + aks_to_M2(fft_fwd_cfg, ak, order, &model, e, &snr, 1, simlpcpf, lpcpf, 1, LPCPF_BETA, LPCPF_GAMMA); + apply_lpc_correction(&model); + + #ifdef DUMP + dump_ak_(ak, LPC_ORD); + #endif + + /* note SNR on interpolated frames can't be measured properly + by comparing Am as L has changed. We can dump interp lsps + and compare them, + */ + #ifdef DUMP + dump_lpc_snr(snr); + #endif + sum_snr += snr; + #ifdef DUMP + dump_quantised_model(&model); + #endif + } - #ifdef LOG_LIN_INTERP - interpolate(&interp_model, &prev_model, &model); - #else - interpolate_lsp(&interp_model, &prev_model, &model, - prev_lsps, prev_e, lsps, e, ak_interp); - apply_lpc_correction(&interp_model); - #endif + /*------------------------------------------------------------*\ + + Decimation to 20ms frame rate + + \*------------------------------------------------------------*/ + + if (decimate) { + float lsps_interp[LPC_ORD]; + + if (!phase0) { + printf("needs --phase0 to resample phase for interpolated Wo\n"); + exit(0); + } + if (!lpc_model) { + printf("needs --lpc 10 to resample amplitudes\n"); + exit(0); + } + + /* + Each 20ms we synthesise two 10ms frames: + + frame 1: discard except for voicing bit + frame 2: interpolate frame 1 LSPs from frame 2 and frame 0 + synthesise frame 1 and frame 2 speech + frame 3: discard except for voicing bit + frame 4: interpolate frame 3 LSPs from frame 4 and frame 2 + synthesise frame 3 and frame 4 speech + */ + + if ((frames%2) == 0) { + //printf("frame: %d\n", frames); + + /* decode interpolated frame */ + + interp_model.voiced = voiced1; + + interpolate_lsp(fft_fwd_cfg, &interp_model, &prev_model, &model, + prev_lsps_, prev_e, lsps_, e, ak_interp, lsps_interp); + apply_lpc_correction(&interp_model); + + /* used to compare with c2enc/c2dec version + + printf(" Wo: %1.5f L: %d v1: %d prev_e: %f\n", + interp_model.Wo, interp_model.L, interp_model.voiced, prev_e); + printf(" lsps_interp: "); + for(i=0; i<LPC_ORD; i++) + printf("%5.3f ", lsps_interp[i]); + printf("\n A..........: "); + for(i=0; i<10; i++) + printf("%5.3f ",interp_model.A[i]); + + printf("\n Wo: %1.5f L: %d e: %3.2f v2: %d\n", + model.Wo, model.L, e, model.voiced); + printf(" lsps_......: "); + for(i=0; i<LPC_ORD; i++) + printf("%5.3f ", lsps_[i]); + printf("\n A..........: "); + for(i=0; i<10; i++) + printf("%5.3f ",model.A[i]); + printf("\n"); + */ + + #ifdef DUMP + /* do dumping here so we get lsp dump file in correct order */ + dump_lsp(prev_lsps); + dump_lsp(lsps_interp); + dump_lsp(lsps); + dump_lsp(lsps_); + #endif + + if (phase0) + phase_synth_zero_order(fft_fwd_cfg, &interp_model, ak_interp, ex_phase, + order); + if (postfilt) + postfilter(&interp_model, &bg_est); + synth_one_frame(fft_inv_cfg, buf, &interp_model, Sn_, Pn, prede, &de_mem, gain); + //printf(" buf[0] %d\n", buf[0]); + if (fout != NULL) + fwrite(buf,sizeof(short),N,fout); + + /* decode this frame */ + + if (phase0) + phase_synth_zero_order(fft_fwd_cfg, &model, ak, ex_phase, order); + if (postfilt) + postfilter(&model, &bg_est); + synth_one_frame(fft_inv_cfg, buf, &model, Sn_, Pn, prede, &de_mem, gain); + //printf(" buf[0] %d\n", buf[0]); + if (fout != NULL) + fwrite(buf,sizeof(short),N,fout); + + /* update states for next time */ + + prev_model = model; + for(i=0; i<LPC_ORD; i++) + prev_lsps_[i] = lsps_[i]; + prev_e = e; + } + else { + voiced1 = model.voiced; + } + } + else { + /* no decimation - sythesise each 10ms frame immediately */ if (phase0) - phase_synth_zero_order(&interp_model, ak_interp, ex_phase, - order); - if (postfilt) - postfilter(&interp_model, &bg_est); - synth_one_frame(buf, &interp_model, Sn_, Pn); - if (fout != NULL) fwrite(buf,sizeof(short),N,fout); + phase_synth_zero_order(fft_fwd_cfg, &model, ak, ex_phase, order); - if (phase0) - phase_synth_zero_order(&model, ak, ex_phase, order); if (postfilt) postfilter(&model, &bg_est); - synth_one_frame(buf, &model, Sn_, Pn); + synth_one_frame(fft_inv_cfg, buf, &model, Sn_, Pn, prede, &de_mem, gain); if (fout != NULL) fwrite(buf,sizeof(short),N,fout); - - prev_model = model; - for(i=0; i<LPC_ORD; i++) - prev_lsps[i] = lsps[i]; - prev_e = e; - } - else { - voiced1 = model.voiced; } + + prev__Wo = prev_Wo; + prev_Wo = model.Wo; + prev_uq_Wo = uq_Wo; + //if (frames == 8) { + // exit(0); + //} } - else { - if (phase0) - phase_synth_zero_order(&model, ak, ex_phase, order); - if (postfilt) - postfilter(&model, &bg_est); - synth_one_frame(buf, &model, Sn_, Pn); - if (fout != NULL) fwrite(buf,sizeof(short),N,fout); - } - prev_Wo = TWO_PI/pitch; - } - fclose(fin); - if (fout != NULL) - fclose(fout); + /*----------------------------------------------------------------*\ - if (lpc_model || resample) - printf("SNR av = %5.2f dB\n", sum_snr/frames); + End Main Loop -#ifdef DUMP - if (dump) - dump_off(); -#endif + \*----------------------------------------------------------------*/ + + fclose(fin); - if (hand_voicing) - fclose(fvoicing); + if (fout != NULL) + fclose(fout); - nlp_destroy(nlp_states); + if (lpc_model) + printf("SNR av = %5.2f dB\n", sum_snr/frames); - return 0; + if (phaseexp) + phase_experiment_destroy(pexp); + if (ampexp) + amp_experiment_destroy(aexp); + #ifdef DUMP + if (dump) + dump_off(); + #endif + + if (hand_voicing) + fclose(fvoicing); + + nlp_destroy(nlp_states); + + return 0; } -void synth_one_frame(short buf[], MODEL *model, float Sn_[], float Pn[]) +void synth_one_frame(kiss_fft_cfg fft_inv_cfg, short buf[], MODEL *model, float Sn_[], float Pn[], int prede, float *de_mem, float gain) { int i; - synthesise(Sn_, model, Pn, 1); + synthesise(fft_inv_cfg, Sn_, model, Pn, 1); + if (prede) + de_emp(Sn_, Sn_, de_mem, N); for(i=0; i<N; i++) { + Sn_[i] *= gain; if (Sn_[i] > 32767.0) buf[i] = 32767; else if (Sn_[i] < -32767.0) @@ -467,3 +893,36 @@ void synth_one_frame(short buf[], MODEL *model, float Sn_[], float Pn[]) } } + +void print_help(const struct option* long_options, int num_opts, char* argv[]) +{ + int i; + char *option_parameters; + + fprintf(stderr, "\nCodec2 - low bit rate speech codec - Simulation Program\n" + "\thttp://rowetel.com/codec2.html\n\n" + "usage: %s [OPTIONS] <InputFile>\n\n" + "Options:\n" + "\t-o <OutputFile>\n", argv[0]); + for(i=0; i<num_opts-1; i++) { + if(long_options[i].has_arg == no_argument) { + option_parameters=""; + } else if (strcmp("lpc", long_options[i].name) == 0) { + option_parameters = " <Order>"; + } else if (strcmp("lspdt_mode", long_options[i].name) == 0) { + option_parameters = " <all|high|low>"; + } else if (strcmp("hand_voicing", long_options[i].name) == 0) { + option_parameters = " <VoicingFile>"; + } else if (strcmp("dump_pitch_e", long_options[i].name) == 0) { + option_parameters = " <Dump File>"; + } else if (strcmp("rate", long_options[i].name) == 0) { + option_parameters = " <4800|2400|1400|1200>"; + } else if (strcmp("dump", long_options[i].name) == 0) { + option_parameters = " <DumpFilePrefix>"; + } else { + option_parameters = " <UNDOCUMENTED parameter>"; + } + fprintf(stderr, "\t--%s%s\n", long_options[i].name, option_parameters); + } + exit(1); +} |