As in the example below, the orth values are actually expanded and there are no expanded values.
{
"audio": "http://wikispeech-tts-release.wmflabs.org/audio/tmp7g2mo0gx.opus",
"message": [
"output token \"one\" not found in input string \"Utterance 1 Utterance 2 Utterance 3\n\"",
"output token \"two\" not found in input string \"Utterance 1 Utterance 2 Utterance 3\n\"",
"output token \"three\" not found in input string \"Utterance 1 Utterance 2 Utterance 3\n\"",
"output token \"sil\" not found in input string \"Utterance 1 Utterance 2 Utterance 3\n\"",
"WARNING: Unable to correct output token list. Input contains 6 tokens, output contains 7 non-empty tokens.",
"input token list : ['Utterance', '1', 'Utterance', '2', 'Utterance', '3']",
"output token list: ['Utterance', 'one', 'Utterance', 'two', 'Utterance', 'three', 'sil']"
],
"tokens": [
{
"endtime": 0.724,
"orth": "Utterance"
},
{
"endtime": 0.895,
"orth": "one"
},
{
"endtime": 1.315,
"orth": "Utterance"
},
{
"endtime": 1.582,
"orth": "two"
},
{
"endtime": 1.955,
"orth": "Utterance"
},
{
"endtime": 2.422,
"orth": "three"
},
{
"endtime": 2.673,
"orth": "sil"
}
]
}Compare with the response for the same text with MaryTTS:
{
"audio": "http://wikispeech-tts-release.wmflabs.org/audio/tmpzvf7zkf9.opus",
"tokens": [
{
"endtime": 0.46,
"orth": "Utterance"
},
{
"endtime": 0.64,
"expanded": "one",
"orth": "1"
},
{
"endtime": 1.08,
"orth": "Utterance"
},
{
"endtime": 1.23,
"expanded": "two",
"orth": "2"
},
{
"endtime": 1.675,
"orth": "Utterance"
},
{
"endtime": 1.995,
"expanded": "three",
"orth": "3"
},
{
"endtime": 2.4,
"orth": ""
}
]
}