/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                         Copyright (c) 1998                            */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission to use, copy, modify, distribute this software and its    */
/*  documentation for research, educational and individual use only, is  */
/*  hereby granted without fee, subject to the following conditions:     */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*  This software may not be used for commercial purposes without        */
/*  specific prior written permission from the authors.                  */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*             Author :  Alan W Black                                    */
/*             Date   :  April 1998                                      */
/*-----------------------------------------------------------------------*/
/*                                                                       */
/*  Yet another unit selection method.                                   */
/*                                                                       */
/*  Using an acoustic measure find the distance between all units in the */
/*  db.  Try to minimise the mean difference between units in a cluster  */
/*  using CART technology, based on features like phonetic and prosodic  */
/*  context.  This gives a bunch of CARTs for each unit type in the db   */
/*  which are acoustically close.  Use these as candidates and optimise  */
/*  a path through them minimising join using a viterbi search.          */
/*                                                                       */
/*  Advantages:                                                          */
/*    requires little or no measurements at selection time               */
/*    allows for clear method of pruning                                 */
/*    no weights need to be generated                                    */
/*    will optimise appropriately with varying numbers of example units  */
/*                                                                       */
/*  Disadvantages:                                                       */
/*    Units can't cross between clusters                                 */
/*                                                                       */
/*  Implementation of Black, A. and Taylor, P. (1997). Automatically     */
/*  clustering similar units for unit selection in speech synthesis      */
/*  Proceedings of Eurospeech 97, vol2 pp 601-604, Rhodes, Greece.       */
/*                                                                       */
/*  postscript: http://www.cstr.ed.ac.uk/~awb/papers/ES97units.ps        */
/*  http://www.cstr.ed.ac.uk/~awb/papers/ES97units/ES97units.html        */
/*                                                                       */
/*  Comments:                                                            */
/*                                                                       */
/*  This is a new implementation using the newer unit selection/signal   */
/*  processing archtecture in festival                                   */
/*                                                                       */
/*  This is still considered experimental code, its doesn't produce      */
/*  reliable high quality synthesis (though does shine occasionally)     */
/*  it is also slow due to optimal coupling being down in a naive way    */
/*                                                                       */
/*=======================================================================*/
#include <stdlib.h>
#include <math.h>
#include "festival.h"
#include "clunits.h"

static void setup_clunits_params();
static EST_VTCandidate *TS_candlist(EST_Item &s);
static EST_VTPath *TS_npath(EST_VTPath *p,EST_VTCandidate *c);
static float naive_join_cost(const EST_String &u0,
			     const EST_String &u1,
			     EST_Item *s);
static float optimal_couple(const EST_String &u0,
			    const EST_String &u1,
			    float &u0_move,
			    float &u1_move);

LISP selection_trees = NIL;
LISP clunits_params = NIL;
int optimal_coupling = 0;
float continuity_weight = 1;

static LISP clunits_select(LISP utt)
{
    // Select units from db using CARTs to index into clustered unit groups
    EST_Utterance *u = get_c_utt(utt);
    EST_Viterbi_Decoder v(TS_candlist,TS_npath,-1);
    v.set_big_is_good(FALSE);  // big is bad

    setup_clunits_params();

    v.initialise(u->relation("Segment"));
    v.search();
    v.result("unit_id");
    v.copy_feature("unit_this_move");
    v.copy_feature("unit_prev_move");

    return utt;
}

static void setup_clunits_params()
{
    // Set up params
    clunits_params = siod_get_lval("clunits_params",
				    "CLUNITS: no parameters set for module");
    optimal_coupling = get_param_int("optimal_coupling",clunits_params,0);
    continuity_weight = get_param_float("continuity_weight",clunits_params,1);
    selection_trees = siod_get_lval("clunits_selection_trees",
				    "CLUNITS: clunits_selection_trees unbound");
}

static EST_VTCandidate *TS_candlist(EST_Item &s)
{
    // Return a list of candidate units for target s
    // Use the appropriate CART to select a small group of candidates
    EST_VTCandidate *all_cands = 0;
    EST_VTCandidate *c;
    LISP tree,group,l,pd;
    float cluster_mean;

    tree = car(cdr(siod_assoc_str(s.name(),selection_trees)));
    pd = wagon_pd(&s,tree);
    if (pd == NIL)
    {
	cerr << "CLUNITS: no predicted class for " << s.name() << endl;
	festival_error();
    }
    group = car(pd);
    cluster_mean = get_c_float(car(cdr(pd)));

    for (l=group; l != NIL; l=cdr(l))
    {
	c = new EST_VTCandidate;
	c->name = s.name()+"_"+get_c_string(car(car(l)));
	c->s = &s;
	// Mean distance from others in cluster (coudl be precalculated)
	c->score = get_c_float(car(cdr(car(l))))-cluster_mean;
	c->score *= c->score;
	// Maybe this should be divided by overall mean of set
	// to normalise this figure (??)

	c->next = all_cands;
	all_cands = c;
    }

    return all_cands;
}

static EST_VTPath *TS_npath(EST_VTPath *p,EST_VTCandidate *c)
{
    // Combine candidate c with previous path updating score 
    // with join cost
    float cost;
    EST_VTPath *np = new EST_VTPath;
    EST_String u0, u1;
    float u0_move=0.0, u1_move=0.0;

    np->c = c;
    np->from = p;
    if ((p == 0) || (p->c == 0))
	u0 = c->name;
    else
	u0 = p->c->name;
    u1 = c->name;
    if (optimal_coupling)
	cost = optimal_couple(u0,u1,u0_move,u1_move);
    else // naive measure
	cost = naive_join_cost(u0,u1,c->s);
    cost *= continuity_weight;
    np->state = c->pos;  // "state" is candidate number

    np->f.set("lscore",c->score+cost);
    np->f.set("unit_prev_move",-u0_move);
    np->f.set("unit_this_move",-u1_move);
    if (p==0)
	np->score = (c->score+cost);
    else
	np->score = (c->score+cost) + p->score;
    
    return np;
}

static float optimal_couple(const EST_String &u0,
			    const EST_String &u1,
			    float &u0_move,
			    float &u1_move)
{
    // Find the best score for joining these, and the best place to
    // do the join
    (void)u0;
    (void)u1;
    u0_move = 0;
    u1_move = 0;
    return 1.0;
}

static float naive_join_cost(const EST_String &u0,
			     const EST_String &u1,
			     EST_Item *s)
{
    // A naive join cost, because I haven't ported the info yet
    (void)u0;
    (void)u1;

    if (ph_is_silence(s->name()))
	return 0;
    else if (ph_is_stop(s->name()))
	return 0.2;
    else if (ph_is_fricative(s->name()))
	return 0.3;
    else
	return 1.0;
}

void festival_clunits_init(void)
{
    // Initialization for clunits selection
    proclaim_module("clunits");

    gc_protect(&clunits_params);
    gc_protect(&selection_trees);

    festival_def_utt_module("Clunits_Select",clunits_select,
    "(Clunits_Select UTT)\n\
  Select units from current databases using cluster selection method.");

    init_subr_1("clunits:load_db",cl_load_db,
    "(clunits:load_db PARAMS)\n\
  Load index file for cluster database and set up params.");

    init_subr_2("acost:build_disttabs",make_unit_distance_tables,
    "(acost:build_disttabs UTTTYPES PARAMS)\n\
  Built matrices of distances between each ling_item in each each list\n\
  of ling_items in uttypes.   Uses acoustic weights in PARAMS and save\n\
  the result as a matrix for later use.");

    init_subr_2("acost:utt.load_coeffs",acost_utt_load_coeffs,
    "(acost:utt.load_coeffs UTT PARAMS)\n\
  Load in the acoustic coefficients into UTT and set the Acoustic_Coeffs\n\
  feature for each segment in UTT.");

    init_subr_3("acost:file_difference",ac_distance_tracks,
    "(acost:file_difference FILENAME1 FILENAME2 PARAMS)\n\
  Load in the two named tracks and find the acoustic difference over all\n\
  based on the weights in PARAMS.");

}
