Alba Bonet-Jover, Robiert Sepúlveda-Torres, E. Saquete, P. Martínez-Barco, Mario Nieto-Pérez
{"title":"RUN-AS: a novel approach to annotate news reliability for disinformation detection","authors":"Alba Bonet-Jover, Robiert Sepúlveda-Torres, E. Saquete, P. Martínez-Barco, Mario Nieto-Pérez","doi":"10.1007/s10579-023-09678-9","DOIUrl":"https://doi.org/10.1007/s10579-023-09678-9","url":null,"abstract":"","PeriodicalId":49927,"journal":{"name":"Language Resources and Evaluation","volume":" ","pages":""},"PeriodicalIF":2.7,"publicationDate":"2023-08-06","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"44243946","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":3,"RegionCategory":"计算机科学","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Aaron Maladry, Els Lefever, Cynthia Van Hee, Veronique Hoste
{"title":"The limitations of irony detection in Dutch social media","authors":"Aaron Maladry, Els Lefever, Cynthia Van Hee, Veronique Hoste","doi":"10.1007/s10579-023-09656-1","DOIUrl":"https://doi.org/10.1007/s10579-023-09656-1","url":null,"abstract":"","PeriodicalId":49927,"journal":{"name":"Language Resources and Evaluation","volume":" ","pages":""},"PeriodicalIF":2.7,"publicationDate":"2023-07-23","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"46825933","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":3,"RegionCategory":"计算机科学","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"Fine-tuning language models to recognize semantic relations","authors":"D. Roussinov, S. Sharoff, Nadezhda Puchnina","doi":"10.1007/s10579-023-09677-w","DOIUrl":"https://doi.org/10.1007/s10579-023-09677-w","url":null,"abstract":"","PeriodicalId":49927,"journal":{"name":"Language Resources and Evaluation","volume":" ","pages":""},"PeriodicalIF":2.7,"publicationDate":"2023-07-23","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"48753056","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":3,"RegionCategory":"计算机科学","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
L. Bischetti, C. Pompei, Biagio Scalingi, F. Frau, M. Bosia, G. Arcara, V. Bambini
{"title":"Assessment of pragmatic abilities and cognitive substrates (APACS) brief remote: a novel tool for the rapid and tele-evaluation of pragmatic skills in Italian","authors":"L. Bischetti, C. Pompei, Biagio Scalingi, F. Frau, M. Bosia, G. Arcara, V. Bambini","doi":"10.1007/s10579-023-09667-y","DOIUrl":"https://doi.org/10.1007/s10579-023-09667-y","url":null,"abstract":"","PeriodicalId":49927,"journal":{"name":"Language Resources and Evaluation","volume":" ","pages":""},"PeriodicalIF":2.7,"publicationDate":"2023-07-23","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"48627622","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":3,"RegionCategory":"计算机科学","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Ismael Garrido-Muñoz, F. Martínez-Santiago, Arturo Montejo-Ráez
{"title":"MarIA and BETO are sexist: evaluating gender bias in large language models for Spanish","authors":"Ismael Garrido-Muñoz, F. Martínez-Santiago, Arturo Montejo-Ráez","doi":"10.1007/s10579-023-09670-3","DOIUrl":"https://doi.org/10.1007/s10579-023-09670-3","url":null,"abstract":"","PeriodicalId":49927,"journal":{"name":"Language Resources and Evaluation","volume":" ","pages":""},"PeriodicalIF":2.7,"publicationDate":"2023-07-23","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"48048666","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":3,"RegionCategory":"计算机科学","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"FullStop: punctuation and segmentation prediction for Dutch with transformers","authors":"Vincent Vandeghinste, Oliver Guhr","doi":"10.1007/s10579-023-09676-x","DOIUrl":"https://doi.org/10.1007/s10579-023-09676-x","url":null,"abstract":"<p>When applying automated speech recognition (ASR) for Belgian Dutch, the output consists of an unsegmented stream of words, without any punctuation. A next step is to perform segmentation and insert punctuation, making the ASR output more readable and easy to manually correct. We present the first (as far as we know) publicly available punctuation insertion system for Dutch that functions at a usable level and that is publicly available. The model we present here is an extension of the approach of Guhr et al. (In: Swiss Text Analytics Conference. Shared task on Sentence End and Punctuation Prediction in NLG Text, 2021) for Dutch: we finetuned the Dutch language model RobBERT on a punctuation prediction sequence classification task. The model was finetuned on two datasets: the Dutch side of Europarl and the SoNaR corpus. For every word in the input sequence, the model predicts a punctuation marker that follows the word. In cases where the language is unknown or where code switching applies, we have extended an existing multilingual model with Dutch. Previous work showed that such a multilingual model, based on “xlm-roberta-base” performs on par or sometimes even better than the monolingual cases. The system was evaluated on in-domain data as a classifier and on out-of-domain data as a sentence segmentation system through full stop prediction. The evaluations on sentence segmentation on out of domain data show that models finetuned on SoNaR show the best results, which can be attributed to SoNaR being a reference corpus containing different language registers. The multilingual models show an even better precision (at the cost of a lower recall) compared to the monolingual models.</p>","PeriodicalId":49927,"journal":{"name":"Language Resources and Evaluation","volume":"3 1","pages":""},"PeriodicalIF":2.7,"publicationDate":"2023-07-14","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"138513877","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":3,"RegionCategory":"计算机科学","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"adaptNMT: an open-source, language-agnostic development environment for neural machine translation","authors":"Séamus Lankford, Haithem Afli, Andy Way","doi":"10.1007/s10579-023-09671-2","DOIUrl":"https://doi.org/10.1007/s10579-023-09671-2","url":null,"abstract":"","PeriodicalId":49927,"journal":{"name":"Language Resources and Evaluation","volume":" ","pages":""},"PeriodicalIF":2.7,"publicationDate":"2023-07-14","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"46494310","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":3,"RegionCategory":"计算机科学","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Neil Cohn, Bruno Cardoso, Bien Klomberg, Irmak Hacımusaoğlu
{"title":"The Visual Language Research Corpus (VLRC): an annotated corpus of comics from Asia, Europe, and the United States","authors":"Neil Cohn, Bruno Cardoso, Bien Klomberg, Irmak Hacımusaoğlu","doi":"10.1007/s10579-023-09673-0","DOIUrl":"https://doi.org/10.1007/s10579-023-09673-0","url":null,"abstract":"","PeriodicalId":49927,"journal":{"name":"Language Resources and Evaluation","volume":" ","pages":""},"PeriodicalIF":2.7,"publicationDate":"2023-07-14","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"43778264","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":3,"RegionCategory":"计算机科学","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
{"title":"Evaluation of a rule-based approach to automatic factual question generation using syntactic and semantic analysis","authors":"A. Gašpar, Ani Grubišić, Ines Šarić-Grgić","doi":"10.1007/s10579-023-09672-1","DOIUrl":"https://doi.org/10.1007/s10579-023-09672-1","url":null,"abstract":"","PeriodicalId":49927,"journal":{"name":"Language Resources and Evaluation","volume":" ","pages":""},"PeriodicalIF":2.7,"publicationDate":"2023-07-10","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"42592713","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":3,"RegionCategory":"计算机科学","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}
Daniela Vianna, Fernando Carneiro, Jonnathan Carvalho, Alexandre Plastino, A. Paes
{"title":"Sentiment analysis in Portuguese tweets: an evaluation of diverse word representation models","authors":"Daniela Vianna, Fernando Carneiro, Jonnathan Carvalho, Alexandre Plastino, A. Paes","doi":"10.1007/s10579-023-09661-4","DOIUrl":"https://doi.org/10.1007/s10579-023-09661-4","url":null,"abstract":"","PeriodicalId":49927,"journal":{"name":"Language Resources and Evaluation","volume":" ","pages":""},"PeriodicalIF":2.7,"publicationDate":"2023-06-28","publicationTypes":"Journal Article","fieldsOfStudy":null,"isOpenAccess":false,"openAccessPdf":"","citationCount":null,"resultStr":null,"platform":"Semanticscholar","paperid":"47640741","PeriodicalName":null,"FirstCategoryId":null,"ListUrlMain":null,"RegionNum":3,"RegionCategory":"计算机科学","ArticlePicture":[],"TitleCN":null,"AbstractTextCN":null,"PMCID":"","EPubDate":null,"PubModel":null,"JCR":null,"JCRName":null,"Score":null,"Total":0}