{"title":"Speech recognition based on the transformer's multi-head attention in Arabic","authors":"Omayma Mahmoudi, Mouncef Filali-Bouami, Mohamed Benchat","doi":"10.1007/s10772-024-10092-x","DOIUrl":"https://doi.org/10.1007/s10772-024-10092-x","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"42 3","pages":""},"PeriodicalIF":0.0,"publicationDate":"2024-03-29","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"140368252","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"Feature extraction using GTCC spectrogram and ResNet50 based classification for audio spoof detection","authors":"N. Chakravarty, Mohit Dua","doi":"10.1007/s10772-024-10093-w","DOIUrl":"https://doi.org/10.1007/s10772-024-10093-w","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"62 20","pages":"1-13"},"PeriodicalIF":0.0,"publicationDate":"2024-03-29","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"140367823","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"Stockwell-Transform based feature representation for detection and assessment of voice disorders","authors":"Purva Barche, K. Gurugubelli, A. Vuppala","doi":"10.1007/s10772-024-10085-w","DOIUrl":"https://doi.org/10.1007/s10772-024-10085-w","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"12 4","pages":""},"PeriodicalIF":0.0,"publicationDate":"2024-02-29","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"140412538","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"Correction to: Automated detection system for texture feature based classification on different image datasets using S-transform","authors":"O. Kesav, G. K. Rajini","doi":"10.1007/s10772-024-10083-y","DOIUrl":"https://doi.org/10.1007/s10772-024-10083-y","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"58 20","pages":""},"PeriodicalIF":0.0,"publicationDate":"2024-01-29","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"140487019","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"A review on speech emotion recognition for late deafened educators in online education","authors":"Aparna Vyakaranam, Tomas Maul, Bavani Ramayah","doi":"10.1007/s10772-023-10064-7","DOIUrl":"https://doi.org/10.1007/s10772-023-10064-7","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"60 4","pages":""},"PeriodicalIF":0.0,"publicationDate":"2024-01-24","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"139601433","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"Advancements in encoded speech data by background noise suppression under uncontrolled environment","authors":"B. G. Nagaraja, G. T. Yadava, Mohamed Anees","doi":"10.1007/s10772-023-10078-1","DOIUrl":"https://doi.org/10.1007/s10772-023-10078-1","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"1 1","pages":""},"PeriodicalIF":0.0,"publicationDate":"2024-01-06","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"139380910","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"Scene text visual question answering by using YOLO and STN","authors":"Kimiya Nourali, Elham Dolkhani","doi":"10.1007/s10772-023-10081-6","DOIUrl":"https://doi.org/10.1007/s10772-023-10081-6","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":"8 8","pages":""},"PeriodicalIF":0.0,"publicationDate":"2024-01-03","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"139389461","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"An optimized convolutional neural network for speech enhancement","authors":"A. Karthik, J. L. Mazher Iqbal","doi":"10.1007/s10772-023-10073-6","DOIUrl":"https://doi.org/10.1007/s10772-023-10073-6","url":null,"abstract":"","PeriodicalId":14305,"journal":{"name":"International Journal of Speech Technology","volume":" 46","pages":""},"PeriodicalIF":0.0,"publicationDate":"2023-12-29","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"139144647","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":0,"RegionCategory":"","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}