From b409a4b0c6131e01fc5a03c0fc31caa4829b0dec Mon Sep 17 00:00:00 2001
From: Johnathan Corgan <jcorgan@corganenterprises.com>
Date: Mon, 18 Jul 2011 15:54:43 -0700
Subject: gr-vocoder: re-implemented gr-codec2-vocoder inside gr-vocoder

---
 gr-vocoder/lib/codec2/phase.c | 262 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 262 insertions(+)
 create mode 100644 gr-vocoder/lib/codec2/phase.c

(limited to 'gr-vocoder/lib/codec2/phase.c')

diff --git a/gr-vocoder/lib/codec2/phase.c b/gr-vocoder/lib/codec2/phase.c
new file mode 100644
index 0000000000..0e1a14a601
--- /dev/null
+++ b/gr-vocoder/lib/codec2/phase.c
@@ -0,0 +1,262 @@
+/*---------------------------------------------------------------------------*\
+                                                                             
+  FILE........: phase.c                                           
+  AUTHOR......: David Rowe                                             
+  DATE CREATED: 1/2/09                                                 
+                                                                             
+  Functions for modelling and synthesising phase.
+                                                                             
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not,see <http://www.gnu.org/licenses/>. 
+*/
+
+#include "defines.h"
+#include "phase.h"
+#include "fft.h"
+#include "comp.h"
+#include "glottal.c"
+
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define GLOTTAL_FFT_SIZE 512
+
+/*---------------------------------------------------------------------------*\
+
+  aks_to_H()
+
+  Samples the complex LPC synthesis filter spectrum at the harmonic
+  frequencies.
+
+\*---------------------------------------------------------------------------*/
+
+void aks_to_H(
+	      MODEL *model,	/* model parameters */
+	      float  aks[],	/* LPC's */
+	      float  G,	        /* energy term */
+	      COMP   H[],	/* complex LPC spectral samples */
+	      int    order
+)
+{
+  COMP  Pw[FFT_DEC];	/* power spectrum */
+  int   i,m;		/* loop variables */
+  int   am,bm;		/* limits of current band */
+  float r;		/* no. rads/bin */
+  float Em;		/* energy in band */
+  float Am;		/* spectral amplitude sample */
+  int   b;		/* centre bin of harmonic */
+  float phi_;		/* phase of LPC spectra */
+
+  r = TWO_PI/(FFT_DEC);
+
+  /* Determine DFT of A(exp(jw)) ------------------------------------------*/
+
+  for(i=0; i<FFT_DEC; i++) {
+    Pw[i].real = 0.0;
+    Pw[i].imag = 0.0;
+  }
+
+  for(i=0; i<=order; i++)
+    Pw[i].real = aks[i];
+
+  fft(&Pw[0].real,FFT_DEC,-1);
+
+  /* Sample magnitude and phase at harmonics */
+
+  for(m=1; m<=model->L; m++) {
+    am = floor((m - 0.5)*model->Wo/r + 0.5);
+    bm = floor((m + 0.5)*model->Wo/r + 0.5);
+    b = floor(m*model->Wo/r + 0.5);
+
+    Em = 0.0;
+    for(i=am; i<bm; i++)
+      Em += G/(Pw[i].real*Pw[i].real + Pw[i].imag*Pw[i].imag);
+    Am = sqrt(fabs(Em/(bm-am)));
+
+    phi_ = -atan2(Pw[b].imag,Pw[b].real);
+    H[m].real = Am*cos(phi_);
+    H[m].imag = Am*sin(phi_);
+  }
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+   phase_synth_zero_order()
+
+   Synthesises phases based on SNR and a rule based approach.  No phase 
+   parameters are required apart from the SNR (which can be reduced to a
+   1 bit V/UV decision per frame).
+
+   The phase of each harmonic is modelled as the phase of a LPC
+   synthesis filter excited by an impulse.  Unlike the first order
+   model the position of the impulse is not transmitted, so we create
+   an excitation pulse train using a rule based approach.  
+
+   Consider a pulse train with a pulse starting time n=0, with pulses
+   repeated at a rate of Wo, the fundamental frequency.  A pulse train
+   in the time domain is equivalent to harmonics in the frequency
+   domain.  We can make an excitation pulse train using a sum of
+   sinsusoids:
+
+     for(m=1; m<=L; m++)
+       ex[n] = cos(m*Wo*n)
+
+   Note: the Octave script ../octave/phase.m is an example of this if
+   you would like to try making a pulse train.
+
+   The phase of each excitation harmonic is:
+
+     arg(E[m]) = mWo
+
+   where E[m] are the complex excitation (freq domain) samples,
+   arg(x), just returns the phase of a complex sample x.
+
+   As we don't transmit the pulse position for this model, we need to
+   synthesise it.  Now the excitation pulses occur at a rate of Wo.
+   This means the phase of the first harmonic advances by N samples
+   over a synthesis frame of N samples.  For example if Wo is pi/20
+   (200 Hz), then over a 10ms frame (N=80 samples), the phase of the
+   first harmonic would advance (pi/20)*80 = 4*pi or two complete
+   cycles.
+
+   We generate the excitation phase of the fundamental (first
+   harmonic):
+
+     arg[E[1]] = Wo*N;
+
+   We then relate the phase of the m-th excitation harmonic to the
+   phase of the fundamental as:
+
+     arg(E[m]) = m*arg(E[1])
+
+   This E[m] then gets passed through the LPC synthesis filter to
+   determine the final harmonic phase.
+     
+   Comparing to speech synthesised using original phases:
+
+   - Through headphones speech synthesised with this model is not as 
+     good. Through a loudspeaker it is very close to original phases.
+
+   - If there are voicing errors, the speech can sound clicky or
+     staticy.  If V speech is mistakenly declared UV, this model tends to
+     synthesise impulses or clicks, as there is usually very little shift or
+     dispersion through the LPC filter.
+
+   - When combined with LPC amplitude modelling there is an additional
+     drop in quality.  I am not sure why, theory is interformant energy
+     is raised making any phase errors more obvious.
+
+   NOTES:
+
+     1/ This synthesis model is effectively the same as a simple LPC-10
+     vocoders, and yet sounds much better.  Why? Conventional wisdom
+     (AMBE, MELP) says mixed voicing is required for high quality
+     speech.
+
+     2/ I am pretty sure the Lincoln Lab sinusoidal coding guys (like xMBE
+     also from MIT) first described this zero phase model, I need to look
+     up the paper.
+
+     3/ Note that this approach could cause some discontinuities in
+     the phase at the edge of synthesis frames, as no attempt is made
+     to make sure that the phase tracks are continuous (the excitation
+     phases are continuous, but not the final phases after filtering
+     by the LPC spectra).  Technically this is a bad thing.  However
+     this may actually be a good thing, disturbing the phase tracks a
+     bit.  More research needed, e.g. test a synthesis model that adds
+     a small delta-W to make phase tracks line up for voiced
+     harmonics.
+
+\*---------------------------------------------------------------------------*/
+
+void phase_synth_zero_order(
+    MODEL *model,
+    float  aks[],
+    float *ex_phase,            /* excitation phase of fundamental */
+    int    order
+)
+{
+  int   m;
+  float new_phi;
+  COMP  Ex[MAX_AMP];		/* excitation samples */
+  COMP  A_[MAX_AMP];		/* synthesised harmonic samples */
+  COMP  H[MAX_AMP];             /* LPC freq domain samples */
+  float G;
+  float jitter = 0.0;
+  float r;
+  int   b;
+
+  G = 1.0;
+  aks_to_H(model, aks, G, H, order);
+
+  /* 
+     Update excitation fundamental phase track, this sets the position
+     of each pitch pulse during voiced speech.  After much experiment
+     I found that using just this frame's Wo improved quality for UV
+     sounds compared to interpolating two frames Wo like this:
+     
+     ex_phase[0] += (*prev_Wo+mode->Wo)*N/2;
+  */
+  
+  ex_phase[0] += (model->Wo)*N;
+  ex_phase[0] -= TWO_PI*floor(ex_phase[0]/TWO_PI + 0.5);
+  r = TWO_PI/GLOTTAL_FFT_SIZE;
+
+  for(m=1; m<=model->L; m++) {
+	  
+      /* generate excitation */
+
+      if (model->voiced) {
+	  /* I think adding a little jitter helps improve low pitch
+	     males like hts1a. This moves the onset of each harmonic
+	     over at +/- 0.25 of a sample.
+	  */
+	  jitter = 0.25*(1.0 - 2.0*rand()/RAND_MAX);
+	  b = floor(m*model->Wo/r + 0.5);
+	  if (b > ((GLOTTAL_FFT_SIZE/2)-1)) {
+	      b = (GLOTTAL_FFT_SIZE/2)-1;
+	  }
+	  Ex[m].real = cos(ex_phase[0]*m - jitter*model->Wo*m + glottal[b]);
+	  Ex[m].imag = sin(ex_phase[0]*m - jitter*model->Wo*m + glottal[b]);
+      }
+      else {
+
+	  /* When a few samples were tested I found that LPC filter
+	     phase is not needed in the unvoiced case, but no harm in
+	     keeping it.
+	  */
+	  float phi = TWO_PI*(float)rand()/RAND_MAX;
+	  Ex[m].real = cos(phi);
+	  Ex[m].imag = sin(phi);
+      }
+
+      /* filter using LPC filter */
+
+      A_[m].real = H[m].real*Ex[m].real - H[m].imag*Ex[m].imag;
+      A_[m].imag = H[m].imag*Ex[m].real + H[m].real*Ex[m].imag;
+
+      /* modify sinusoidal phase */
+   
+      new_phi = atan2(A_[m].imag, A_[m].real+1E-12);
+      model->phi[m] = new_phi;
+  }
+
+}
-- 
cgit v1.2.3