{"title":"Automatic generation of words toward flexible vocabulary isolated word recognition","authors":"P. Laface, L. Fissore, F. Ravera","doi":"10.21437/ICSLP.1994-556","DOIUrl":"https://doi.org/10.21437/ICSLP.1994-556","url":null,"abstract":"","PeriodicalId":90685,"journal":{"name":"Proceedings : ICSLP. International Conference on Spoken Language Processing","volume":"34 1","pages":"2215-2218"},"PeriodicalIF":0.0,"publicationDate":"1994-09-18","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"74798100","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"Italian clusters in continuous speech","authors":"E. Farnetani, M. G. Busà","doi":"10.21437/ICSLP.1994-93","DOIUrl":"https://doi.org/10.21437/ICSLP.1994-93","url":null,"abstract":"","PeriodicalId":90685,"journal":{"name":"Proceedings : ICSLP. International Conference on Spoken Language Processing","volume":"26 1","pages":"359-362"},"PeriodicalIF":0.0,"publicationDate":"1994-09-18","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"75034144","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"An HMM duration control algorithm with a low computational cost","authors":"Satoshi Takahashi, Yasuhiro Minami, K. Shikano","doi":"10.21437/ICSLP.1994-70","DOIUrl":"https://doi.org/10.21437/ICSLP.1994-70","url":null,"abstract":"","PeriodicalId":90685,"journal":{"name":"Proceedings : ICSLP. International Conference on Spoken Language Processing","volume":"78 1","pages":"267-270"},"PeriodicalIF":0.0,"publicationDate":"1994-09-18","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"76501668","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
A. Kiessling, R. Kompe, A. Batliner, H. Niemann, E. Nöth
{"title":"Automatic labeling of phrase accents in German","authors":"A. Kiessling, R. Kompe, A. Batliner, H. Niemann, E. Nöth","doi":"10.21437/ICSLP.1994-32","DOIUrl":"https://doi.org/10.21437/ICSLP.1994-32","url":null,"abstract":"In this paper a method for the automatic labeling of phrase accents is described, based on a large text corpus that has been generated automatically and read by 100 speakers. Perception experiments on a subset of 500 utterances show a high agreement between the automatically generated accent labels and the judgment scores obtained. We computed different prosodic feature vectors from the speech signal for each syllable and trained different Gaussian distribution classifiers and artificial neural networks using the automatically generated accent labels. Recognition rates of up to 83% could be achieved for the distinction of accentuated vs. unaccentuated syllables. Similar results could be obtained for the comparison of the listeners judgments with the automatic classification.","PeriodicalId":90685,"journal":{"name":"Proceedings : ICSLP. International Conference on Spoken Language Processing","volume":"79 1","pages":"115-118"},"PeriodicalIF":0.0,"publicationDate":"1994-09-18","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"76579926","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"Evaluation of phonetic feature recognition with a time-delay neural network","authors":"S. Okawa, C. Windheuser, F. Bimbot, K. Shirai","doi":"10.21437/ICSLP.1994-397","DOIUrl":"https://doi.org/10.21437/ICSLP.1994-397","url":null,"abstract":"","PeriodicalId":90685,"journal":{"name":"Proceedings : ICSLP. International Conference on Spoken Language Processing","volume":"465 1","pages":"1531-1534"},"PeriodicalIF":0.0,"publicationDate":"1994-09-18","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"77526393","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"8 Kb/s Low-delay Speech Coding with 4 Ms Frame Size","authors":"P. Rao, Y. Asakawa, Hidetoshi Sekine","doi":"10.21437/ICSLP.1994-521","DOIUrl":"https://doi.org/10.21437/ICSLP.1994-521","url":null,"abstract":"","PeriodicalId":90685,"journal":{"name":"Proceedings : ICSLP. International Conference on Spoken Language Processing","volume":"256 1","pages":"2075-2078"},"PeriodicalIF":0.0,"publicationDate":"1994-09-18","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"77671107","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
R. Cole, D. Novick, M. Fanty, Pieter J. E. Vermeulen, S. Sutton, D. Burnett, J. Schalkwyk
{"title":"A prototype voice-response questionnaire for the u.s. census","authors":"R. Cole, D. Novick, M. Fanty, Pieter J. E. Vermeulen, S. Sutton, D. Burnett, J. Schalkwyk","doi":"10.21437/ICSLP.1994-173","DOIUrl":"https://doi.org/10.21437/ICSLP.1994-173","url":null,"abstract":"APROTOTYPEVOICE-RESPONSEQUESTIONNAIREFORTHEU.S.CENSUSRonald Cole, David G. Novick, Mark Fanty,Pieter Vermeulen, Stephen Sutton, Dan Burnett and Johan SchalkwykCenter for Sp oken Language UnderstandingOregon Graduate Institute of Science and Technology20000 N.W. Walker Road, P.O. Box 91000, Portland, OR 97291-1000, USAABSTRACTThis pap er describ es a study conducted to determine thefeasibilityof using a sp okenquestionnaireto collect infor-mationfortheYear2000CensusinUSA.Tore nethedialogueandtotrainrecognizers,wecollectedcom-pleteproto colsfromover4000callers.Fortheresp onseslab eled(ab outhalf ),over99p ercentoftheanswerscon-tain the desired information.The recognizers trained so farrangeinp erformancefrom75p ercentcorrectonyearofbirth to over 99 p ercent for maritalstatus.We develop eda prototyp esystemthat engagesthecallersina dialoguetoobtainthedesiredinformation,reviewsrecognizedinformationatthe endof thecall,andasksthecallertoidentify the resp onse categories that are incorrect.1.INTRODUCTIONWehavconductedastudytodeterminethefeasibilityof usingan automatedsp okenquestionnaireto collectin-formationfortheYear2000CensusinUnitedStatesofAmerica.Thegoalthestudywastodevelopandevaluate a telephone questionnaire that automaticall y cap-turesandrecognizesthefollowinginformation:(1)fullname, (2) sex, (3) birth date, (4) marital status (now mar-ried, widowed, divorced, separated, never married|cho oseone),(5)Hispanicorigin(yesorno);ifHispanic:Mexi-can, Mexican-American, Chicano, Puerto Rican, Cuban orother (sp eci y), (6) race:White, Black or Negro, AmericanIndian(sp ecifytrib e),Eskimo,Aleut,Chinese,Japanese,Filipino ,AsianIndian,Hawaiian,Samoan,Korean,Gua-manian, Vietnamese or other (sp ecify).After preliminaryrounds of data collectionto re ne theselectionandwordingof the system prompts,a large,re-gionallydiversedatacollectione ortresultedinapproxi-mately4000calls.Thispap erdescrib esthee ectivenessof theproto colinelicitingthedesiredinformationanditdescrib es the sp oken language system that resulted.2.SYSTEM2.1.RecognitionSignal Pro cessing.The caller's resp onse is transmitted overthe digital phone line as a 8 kHz mu-law enco ded digital sig-nal.A seventhorderPerceptualLinearPredictive(PLP)analysis [1] is p erformed every 6 msec using a 10 msec win-dow.Phonetic Classi cation.Each 6 msec frame of the signalisclassi edphoneticall ybyathreelaerneuralnetwork.To achieve maximum p erformance, a separate vo cabulary-dep endentnetworkistrainedforeachresp onsecategory,using a phoneme set particular to the exp ected pronuncia-tionsof words in that resp onsecategory.This consistsofthesubsetofstandardphonemeswhicho ccurinvo-cabulary, plus any additional context-dep endent phonemeswhichweredeemednecessary(e.g.[tw]forthe[t]intwenty\" and telve\").The background noise and silenceare mo deled by a sp ecial phoneme [.pau].For each frame of sp eech, the neural network is providedwith 70 inputs, which consists of eight PLP co e\u000ecients andtwovoicingoutputsfromtheframeto","PeriodicalId":90685,"journal":{"name":"Proceedings : ICSLP. International Conference on Spoken Language Processing","volume":"83 1","pages":"683-686"},"PeriodicalIF":0.0,"publicationDate":"1994-09-18","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"79797190","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"Speech recognition using HMM with decreased intra-group variation in the temporal structure","authors":"N. Minematsu, K. Hirose","doi":"10.21437/ICSLP.1994-50","DOIUrl":"https://doi.org/10.21437/ICSLP.1994-50","url":null,"abstract":"","PeriodicalId":90685,"journal":{"name":"Proceedings : ICSLP. International Conference on Spoken Language Processing","volume":"4 1","pages":"187-190"},"PeriodicalIF":0.0,"publicationDate":"1994-09-18","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"80068081","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"A study of applying adaptive learning to a multi-module system","authors":"Tung-Hui Chiang, Yi-Chung Lin, Keh-Yih Su","doi":"10.21437/ICSLP.1994-130","DOIUrl":"https://doi.org/10.21437/ICSLP.1994-130","url":null,"abstract":"","PeriodicalId":90685,"journal":{"name":"Proceedings : ICSLP. International Conference on Spoken Language Processing","volume":"25 1","pages":"463-466"},"PeriodicalIF":0.0,"publicationDate":"1994-09-18","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"80078607","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"Natural utterance segmentation and discourse label assignment","authors":"M. Tomokiyo","doi":"10.21437/ICSLP.1994-432","DOIUrl":"https://doi.org/10.21437/ICSLP.1994-432","url":null,"abstract":"","PeriodicalId":90685,"journal":{"name":"Proceedings : ICSLP. International Conference on Spoken Language Processing","volume":"18 1","pages":"1671-1674"},"PeriodicalIF":0.0,"publicationDate":"1994-09-18","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"80476138","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}