%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% File: syllabifyBabble.m
%
% Helper to turn the phonemes into syllables.
% This will be an iterative process, so what's
% returned is really possible parses. These are
% split into two parts each, the parsed syllables
% and the unparsed phonemes. The calling function(s)
% will then have to deal with it.
%
% NOTE: Parsed syllables are simply numbers that
% index into the global all_syllables map.
%
% Inputs:
%   * phonemes: The phonemes to syllabify.
%   * l_ctr: The line they come from.
%
% Outputs: 
%   * syllables: The possible parses.
%
% Usage: syllables = syllabifyBabble({[97], [204 154]})
%
% Author: Doug Bemis
% Date: 11/27/11
%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function syllables = syllabifyBabble(phonemes, l_ctr)

% First, the phonemes are in utf-8, so all ASCII 
% characters are one byte, others are two. First, turn
% to bytes and then parse into one or two bytes 
% accordingly
real_phonemes = getPhonemesFromBytes(phonemes);

% Go through each phoneme and assign possible syllable parses
% Each parse is split into two, the first part are the syllables
% the second is the to be parsed phonemes. If we get to the
% end of the phonemes without a complete parse, we'll signal failure.
failed_parses = {};
full_parses = {};
possible_parses = {{[],real_phonemes}};

% Check each possible parse
while ~isempty(possible_parses)

    % Check it's possibility
    curr_parse = possible_parses{1};
    
    % And remove
    possible_parses = possible_parses(2:end);
    
    % Go through and assign any syllables we can to
    %   the unparsed phonemes
    for i = 1:length(curr_parse{2})
        s = getIndexFromSyllable(curr_parse{2}(1:i));
        
        % If we got one, create a new parse
        if ~isempty(s)
            possible_parses{end+1} = {[curr_parse{1} s], curr_parse{2}(i+1:end)};
        end
    end
    
    % See if we failed or succeeded
    if isempty(curr_parse{2})
        full_parses{end+1} = curr_parse;
    else
        failed_parses{end+1} = curr_parse;
    end
end

% If we have any full parses, return them
if ~isempty(full_parses)
    
    % Handle ambiguous parses here...
    if length(full_parses) > 1
        disp(['Warning: Ambiguous parse for line: ' num2str(l_ctr) '.']);
        
        % Use a heuristic to grab the one with the most two phoneme
        % syllables...
        num_two_ph_syllables = zeros(length(full_parses),1);
        for p = 1:length(full_parses)
            for s = 1:length(full_parses{p}{1})
                if length(getSyllableFromIndex(full_parses{p}{1}(s),2)) == 2
                    num_two_ph_syllables(p) = num_two_ph_syllables(p)+1;
                end
            end
        end
        [val ind] = sort(num_two_ph_syllables);
        if val(end) == val(end-1)
            
            % Hardcoded from Ben14.cha, line 74
            if (length(real_phonemes) == 5 && ...
                    length(real_phonemes{1}) == 1 && real_phonemes{1} == 100 &&...
                    length(real_phonemes{2}) == 1 && real_phonemes{2} == 117 &&...
                    length(real_phonemes{3}) == 2 && real_phonemes{3}(1) == 201 && real_phonemes{3}(2) == 170 &&...
                    length(real_phonemes{4}) == 1 && real_phonemes{4} == 103 &&...
                    length(real_phonemes{5}) == 2 && real_phonemes{5}(1) == 202 && real_phonemes{5}(2) == 140)
                disp('Using a hardcoded value.');
                full_parses = {{[7 4 27],{}}};
                
            % From Ben14.cha line 128
            elseif (length(real_phonemes) == 3 && ...
                    length(real_phonemes{1}) == 2 && real_phonemes{1}(1) == 201 && real_phonemes{1}(2) == 170 &&...
                    length(real_phonemes{2}) == 1 && real_phonemes{2} == 103 &&...
                    length(real_phonemes{3}) == 1 && real_phonemes{3} == 111)
                disp('Using a hardcoded value.');
                full_parses = {{[4 28],{}}};
                
            % From Ben23.cha line 84
            elseif (length(real_phonemes) == 4 && ...
                    length(real_phonemes{1}) == 1 && real_phonemes{1} == 101 &&...
                    length(real_phonemes{2}) == 2 && real_phonemes{2}(1) == 202 && real_phonemes{2}(2) == 140 &&...
                    length(real_phonemes{3}) == 1 && real_phonemes{3} == 116 &&...
                    length(real_phonemes{4}) == 2 && real_phonemes{4}(1) == 202 && real_phonemes{4}(2) == 140)
                disp('Using a hardcoded value.');
                full_parses = {{[86 17 52],{}}};
            else
                disp('WARNING: Heuristic failed. Might need to hardcode.');
                full_parses = {{{},{}}};
            end
        else
            disp('    Using the parse with more two phoneme syllables.');
            full_parses = {full_parses{ind(end)}};
        end
    end
    
    syllables = full_parses;

% Otherwise, we failed
else
    syllables = failed_parses;
end

