Package: libttspico-utils Version: 1.0+git20130326-7 Severity: wishlist Control: tag -1 + patch upstream Control: owner -1 paolo.gre...@libpf.com
(Transforming this into a bug) 2017-11-13 16:57 GMT+01:00 Paolo Greppi <paolo.gre...@libpf.com>: > Hi, I see you are both involved in maintaining the svox package, and Mathieu > is also the author of pico2wave. Hi, I'm not involved anymore in svox (I don't use it anymore). Samuel, you can remove me in the next upload. > I was looking at a way of converting to wav a larger chunk of text than what > can be supplied on the command line, and was unsatisfied by this python > wrapper around pico2wave: > https://github.com/GwadaLUG/pico-read-speaker > > Looking at the source of pico2wave, I found this TODO in the code: > > //TODO: stdin not supported yet. > > So I set forth implementing this missing functionality. Here is a repo: > https://gitlab.com/simevo/pico2wave > > I got it to read from stdin any length of text, but it crashes if more than > 32767 chars are supplied. That's because it uses a pico_Int16 variable to > scan the buffer, and pico_Int16 is signed short, and SHRT_MAX is 32767. > > What is left to do is to break up the text in chunks of < 32767 chars, > repeatedly call pico_putTextUtf8 and append everything to the same output wav > file > > At this point I sense that this could be feasible to complete, I am willing > to do it, but I though of interfacing with you as upstream on the best way to > move this forward so that the improved pico2wave lands in debian. Samuel or others can decide what they do about it... Regards -- Mathieu Parent
/* pico2wave.c * Copyright (C) 2009 Mathieu Parent <math.par...@gmail.com> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Convert text to .wav using svox text-to-speech system. * */ #include <popt.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <assert.h> #include <picoapi.h> #include <picoapid.h> #include <picoos.h> /* adaptation layer defines */ #define PICO_MEM_SIZE 2500000 #define DummyLen 100000000 /* string constants */ #define MAX_OUTBUF_SIZE 128 #ifdef picolangdir const char * PICO_LINGWARE_PATH = picolangdir "/"; #else const char * PICO_LINGWARE_PATH = "./lang/"; #endif const char * PICO_VOICE_NAME = "PicoVoice"; /* supported voices Pico does not seperately specify the voice and locale. */ const char * picoSupportedLangIso3[] = { "eng", "eng", "deu", "spa", "fra", "ita" }; const char * picoSupportedCountryIso3[] = { "USA", "GBR", "DEU", "ESP", "FRA", "ITA" }; const char * picoSupportedLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" }; const char * picoInternalLang[] = { "en-US", "en-GB", "de-DE", "es-ES", "fr-FR", "it-IT" }; const char * picoInternalTaLingware[] = { "en-US_ta.bin", "en-GB_ta.bin", "de-DE_ta.bin", "es-ES_ta.bin", "fr-FR_ta.bin", "it-IT_ta.bin" }; const char * picoInternalSgLingware[] = { "en-US_lh0_sg.bin", "en-GB_kh0_sg.bin", "de-DE_gl0_sg.bin", "es-ES_zl0_sg.bin", "fr-FR_nk0_sg.bin", "it-IT_cm0_sg.bin" }; const char * picoInternalUtppLingware[] = { "en-US_utpp.bin", "en-GB_utpp.bin", "de-DE_utpp.bin", "es-ES_utpp.bin", "fr-FR_utpp.bin", "it-IT_utpp.bin" }; const int picoNumSupportedVocs = 6; /* adapation layer global variables */ void * picoMemArea = NULL; pico_System picoSystem = NULL; pico_Resource picoTaResource = NULL; pico_Resource picoSgResource = NULL; pico_Resource picoUtppResource = NULL; pico_Engine picoEngine = NULL; pico_Char * picoTaFileName = NULL; pico_Char * picoSgFileName = NULL; pico_Char * picoUtppFileName = NULL; pico_Char * picoTaResourceName = NULL; pico_Char * picoSgResourceName = NULL; pico_Char * picoUtppResourceName = NULL; int picoSynthAbort = 0; #define CHUNK_SIZE 16384UL /* buffered read from source until EOF; user should free the buffer */ size_t read(FILE *source, char **buffer) { size_t count; size_t size = 0; size_t offset = 0; *buffer = NULL; do { // printf("allocate %ld chars\n", CHUNK_SIZE); *buffer = (char *)realloc(*buffer, size + CHUNK_SIZE); assert(*buffer); size += CHUNK_SIZE; // printf("size = %ld\n", size); count = fread(&((*buffer)[offset]), 1, CHUNK_SIZE, source); // printf("read chars = %ld\n", count); assert(!ferror(source)); if (feof(source)) { // printf("feof = %ld\n", offset + count); (*buffer)[offset + count] = 0; return offset + count; } offset += CHUNK_SIZE; // printf("offset = %ld\n", offset); } while (1); } int main(int argc, const char *argv[]) { char * wavefile = NULL; char * lang = "en-US"; int langIndex = -1, langIndexTmp = -1; char * text = NULL; int8_t * buffer; size_t bufferSize = 256; /* Parsing options */ poptContext optCon; /* context for parsing command-line options */ int opt; /* used for argument parsing */ struct poptOption optionsTable[] = { { "wave", 'w', POPT_ARG_STRING, &wavefile, 0, "Write output to this WAV file (extension SHOULD be .wav)", "filename.wav" }, { "lang", 'l', POPT_ARG_STRING | POPT_ARGFLAG_SHOW_DEFAULT, &lang, 0, "Language", "lang" }, POPT_AUTOHELP POPT_TABLEEND }; optCon = poptGetContext(NULL, argc, argv, optionsTable, POPT_CONTEXT_POSIXMEHARDER); poptSetOtherOptionHelp(optCon, "<words>"); /* Reporting about invalid extra options */ while ((opt = poptGetNextOpt(optCon)) != -1) { switch (opt) { default: fprintf(stderr, "Invalid option %s: %s\n", poptBadOption(optCon, 0), poptStrerror(opt)); poptPrintHelp(optCon, stderr, 0); exit(1); } } /* Mandatory option: --wave */ if(!wavefile) { fprintf(stderr, "Mandatory option: %s\n\n", "--wave=filename.wav"); poptPrintHelp(optCon, stderr, 0); exit(1); } /* option: --lang */ for(langIndexTmp =0; langIndexTmp<picoNumSupportedVocs; langIndexTmp++) { if(!strcmp(picoSupportedLang[langIndexTmp], lang)) { langIndex = langIndexTmp; break; } } if(langIndex == -1) { fprintf(stderr, "Unknown language: %s\nValid languages:\n", lang); for(langIndexTmp =0; langIndexTmp<picoNumSupportedVocs; langIndexTmp++) { fprintf(stderr, "%s\n", picoSupportedLang[langIndexTmp]); } lang = "en-US"; fprintf(stderr, "\n"); poptPrintHelp(optCon, stderr, 0); exit(1); } /* Remaining argument is <words> */ const char **extra_argv; extra_argv = poptGetArgs(optCon); if(extra_argv) { text = (char *) &(*extra_argv)[0]; } else { // read from stdin size_t len = read(stdin, &text); // debug buffered read: make this command a pass-though cat-like command and compare input with output // printf("read %ld characters\n", len); // fwrite(text, 1, len, stdout); // exit(1); // TODO: break text in chunks of max size equal to pico_Int16 textSize, // /usr/include/picoapi.h:typedef short pico_Int16; // /usr/include/limits.h:# define SHRT_MAX 32767 } poptFreeContext(optCon); buffer = malloc( bufferSize ); int ret, getstatus; pico_Char * inp = NULL; pico_Char * local_text = NULL; short outbuf[MAX_OUTBUF_SIZE/2]; pico_Int16 bytes_sent, bytes_recv, text_remaining, out_data_type; pico_Retstring outMessage; picoSynthAbort = 0; picoMemArea = malloc( PICO_MEM_SIZE ); if((ret = pico_initialize( picoMemArea, PICO_MEM_SIZE, &picoSystem ))) { pico_getSystemStatusMessage(picoSystem, ret, outMessage); fprintf(stderr, "Cannot initialize pico (%i): %s\n", ret, outMessage); goto terminate; } /* Load the text analysis Lingware resource file. */ picoTaFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); strcpy((char *) picoTaFileName, PICO_LINGWARE_PATH); strcat((char *) picoTaFileName, (const char *) picoInternalTaLingware[langIndex]); if((ret = pico_loadResource( picoSystem, picoTaFileName, &picoTaResource ))) { pico_getSystemStatusMessage(picoSystem, ret, outMessage); fprintf(stderr, "Cannot load text analysis resource file (%i): %s\n", ret, outMessage); goto unloadTaResource; } /* Load the signal generation Lingware resource file. */ picoSgFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); strcpy((char *) picoSgFileName, PICO_LINGWARE_PATH); strcat((char *) picoSgFileName, (const char *) picoInternalSgLingware[langIndex]); if((ret = pico_loadResource( picoSystem, picoSgFileName, &picoSgResource ))) { pico_getSystemStatusMessage(picoSystem, ret, outMessage); fprintf(stderr, "Cannot load signal generation Lingware resource file (%i): %s\n", ret, outMessage); goto unloadSgResource; } /* Load the utpp Lingware resource file if exists - NOTE: this file is optional and is currently not used. Loading is only attempted for future compatibility. If this file is not present the loading will still succeed. // picoUtppFileName = (pico_Char *) malloc( PICO_MAX_DATAPATH_NAME_SIZE + PICO_MAX_FILE_NAME_SIZE ); strcpy((char *) picoUtppFileName, PICO_LINGWARE_PATH); strcat((char *) picoUtppFileName, (const char *) picoInternalUtppLingware[langIndex]); ret = pico_loadResource( picoSystem, picoUtppFileName, &picoUtppResource ); pico_getSystemStatusMessage(picoSystem, ret, outMessage); printf("pico_loadResource: %i: %s\n", ret, outMessage); */ /* Get the text analysis resource name. */ picoTaResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); if((ret = pico_getResourceName( picoSystem, picoTaResource, (char *) picoTaResourceName ))) { pico_getSystemStatusMessage(picoSystem, ret, outMessage); fprintf(stderr, "Cannot get the text analysis resource name (%i): %s\n", ret, outMessage); goto unloadUtppResource; } /* Get the signal generation resource name. */ picoSgResourceName = (pico_Char *) malloc( PICO_MAX_RESOURCE_NAME_SIZE ); if((ret = pico_getResourceName( picoSystem, picoSgResource, (char *) picoSgResourceName ))) { pico_getSystemStatusMessage(picoSystem, ret, outMessage); fprintf(stderr, "Cannot get the signal generation resource name (%i): %s\n", ret, outMessage); goto unloadUtppResource; } /* Create a voice definition. */ if((ret = pico_createVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME ))) { pico_getSystemStatusMessage(picoSystem, ret, outMessage); fprintf(stderr, "Cannot create voice definition (%i): %s\n", ret, outMessage); goto unloadUtppResource; } /* Add the text analysis resource to the voice. */ if((ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoTaResourceName ))) { pico_getSystemStatusMessage(picoSystem, ret, outMessage); fprintf(stderr, "Cannot add the text analysis resource to the voice (%i): %s\n", ret, outMessage); goto unloadUtppResource; } /* Add the signal generation resource to the voice. */ if((ret = pico_addResourceToVoiceDefinition( picoSystem, (const pico_Char *) PICO_VOICE_NAME, picoSgResourceName ))) { pico_getSystemStatusMessage(picoSystem, ret, outMessage); fprintf(stderr, "Cannot add the signal generation resource to the voice (%i): %s\n", ret, outMessage); goto unloadUtppResource; } /* Create a new Pico engine. */ if((ret = pico_newEngine( picoSystem, (const pico_Char *) PICO_VOICE_NAME, &picoEngine ))) { pico_getSystemStatusMessage(picoSystem, ret, outMessage); fprintf(stderr, "Cannot create a new pico engine (%i): %s\n", ret, outMessage); goto disposeEngine; } local_text = (pico_Char *) text ; text_remaining = strlen((const char *) local_text) + 1; inp = (pico_Char *) local_text; size_t bufused = 0; picoos_Common common = (picoos_Common) pico_sysGetCommon(picoSystem); picoos_SDFile sdOutFile = NULL; picoos_bool done = TRUE; if(TRUE != (done = picoos_sdfOpenOut(common, &sdOutFile, (picoos_char *) wavefile, SAMPLE_FREQ_16KHZ, PICOOS_ENC_LIN))) { fprintf(stderr, "Cannot open output wave file\n"); ret = 1; goto disposeEngine; } /* synthesis loop */ while (text_remaining) { /* Feed the text into the engine. */ printf("text_remaining = %d\n", text_remaining); if((ret = pico_putTextUtf8( picoEngine, inp, text_remaining, &bytes_sent ))) { pico_getSystemStatusMessage(picoSystem, ret, outMessage); fprintf(stderr, "Cannot put Text (%i): %s\n", ret, outMessage); goto disposeEngine; } text_remaining -= bytes_sent; inp += bytes_sent; do { if (picoSynthAbort) { goto disposeEngine; } /* Retrieve the samples and add them to the buffer. */ getstatus = pico_getData( picoEngine, (void *) outbuf, MAX_OUTBUF_SIZE, &bytes_recv, &out_data_type ); if((getstatus !=PICO_STEP_BUSY) && (getstatus !=PICO_STEP_IDLE)){ pico_getSystemStatusMessage(picoSystem, getstatus, outMessage); fprintf(stderr, "Cannot get Data (%i): %s\n", getstatus, outMessage); goto disposeEngine; } if (bytes_recv) { if ((bufused + bytes_recv) <= bufferSize) { memcpy(buffer+bufused, (int8_t *) outbuf, bytes_recv); bufused += bytes_recv; } else { done = picoos_sdfPutSamples( sdOutFile, bufused / 2, (picoos_int16*) (buffer)); bufused = 0; memcpy(buffer, (int8_t *) outbuf, bytes_recv); bufused += bytes_recv; } } } while (PICO_STEP_BUSY == getstatus); /* This chunk of synthesis is finished; pass the remaining samples. */ if (!picoSynthAbort) { done = picoos_sdfPutSamples( sdOutFile, bufused / 2, (picoos_int16*) (buffer)); } picoSynthAbort = 0; } if(TRUE != (done = picoos_sdfCloseOut(common, &sdOutFile))) { fprintf(stderr, "Cannot close output wave file\n"); ret = 1; goto disposeEngine; } disposeEngine: if (picoEngine) { pico_disposeEngine( picoSystem, &picoEngine ); pico_releaseVoiceDefinition( picoSystem, (pico_Char *) PICO_VOICE_NAME ); picoEngine = NULL; } unloadUtppResource: if (picoUtppResource) { pico_unloadResource( picoSystem, &picoUtppResource ); picoUtppResource = NULL; } unloadSgResource: if (picoSgResource) { pico_unloadResource( picoSystem, &picoSgResource ); picoSgResource = NULL; } unloadTaResource: if (picoTaResource) { pico_unloadResource( picoSystem, &picoTaResource ); picoTaResource = NULL; } terminate: if (picoSystem) { pico_terminate(&picoSystem); picoSystem = NULL; } if (text) free(text); exit(ret); }