@article{WilkschAbramova2023, author = {Wilksch, Moritz and Abramova, Olga}, title = {PyFin-sentiment}, series = {International journal of information management data insights}, volume = {3}, journal = {International journal of information management data insights}, number = {1}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2667-0968}, doi = {10.1016/j.jjimei.2023.100171}, pages = {10}, year = {2023}, abstract = {Responding to the poor performance of generic automated sentiment analysis solutions on domain-specific texts, we collect a dataset of 10,000 tweets discussing the topics of finance and investing. We manually assign each tweet its market sentiment, i.e., the investor's anticipation of a stock's future return. Using this data, we show that all existing sentiment models trained on adjacent domains struggle with accurate market sentiment analysis due to the task's specialized vocabulary. Consequently, we design, train, and deploy our own sentiment model. It outperforms all previous models (VADER, NTUSD-Fin, FinBERT, TwitterRoBERTa) when evaluated on Twitter posts. On posts from a different platform, our model performs on par with BERT-based large language models. We achieve this result at a fraction of the training and inference costs due to the model's simple design. We publish the artifact as a python library to facilitate its use by future researchers and practitioners.}, language = {en} } @article{MunnesHarschKnoblochetal.2022, author = {Munnes, Stefan and Harsch, Corinna and Knobloch, Marcel and Vogel, Johannes S. and Hipp, Lena and Schilling, Erik}, title = {Examining Sentiment in Complex Texts. A Comparison of Different Computational Approaches}, series = {Frontiers in Big Data}, volume = {5}, journal = {Frontiers in Big Data}, publisher = {Frontiers Media}, address = {Lausanne}, issn = {2624-909X}, doi = {10.3389/fdata.2022.886362}, pages = {16}, year = {2022}, abstract = {Can we rely on computational methods to accurately analyze complex texts? To answer this question, we compared different dictionary and scaling methods used in predicting the sentiment of German literature reviews to the "gold standard " of human-coded sentiments. Literature reviews constitute a challenging text corpus for computational analysis as they not only contain different text levels-for example, a summary of the work and the reviewer's appraisal-but are also characterized by subtle and ambiguous language elements. To take the nuanced sentiments of literature reviews into account, we worked with a metric rather than a dichotomous scale for sentiment analysis. The results of our analyses show that the predicted sentiments of prefabricated dictionaries, which are computationally efficient and require minimal adaption, have a low to medium correlation with the human-coded sentiments (r between 0.32 and 0.39). The accuracy of self-created dictionaries using word embeddings (both pre-trained and self-trained) was considerably lower (r between 0.10 and 0.28). Given the high coding intensity and contingency on seed selection as well as the degree of data pre-processing of word embeddings that we found with our data, we would not recommend them for complex texts without further adaptation. While fully automated approaches appear not to work in accurately predicting text sentiments with complex texts such as ours, we found relatively high correlations with a semiautomated approach (r of around 0.6)-which, however, requires intensive human coding efforts for the training dataset. In addition to illustrating the benefits and limits of computational approaches in analyzing complex text corpora and the potential of metric rather than binary scales of text sentiment, we also provide a practical guide for researchers to select an appropriate method and degree of pre-processing when working with complex texts.}, language = {en} } @article{Stede2020, author = {Stede, Manfred}, title = {Automatic argumentation mining and the role of stance and sentiment}, series = {Journal of argumentation in context}, volume = {9}, journal = {Journal of argumentation in context}, number = {1}, publisher = {John Benjamins Publishing Co.}, address = {Amsterdam}, issn = {2211-4742}, doi = {10.1075/jaic.00006.ste}, pages = {19 -- 41}, year = {2020}, abstract = {Argumentation mining is a subfield of Computational Linguistics that aims (primarily) at automatically finding arguments and their structural components in natural language text. We provide a short introduction to this field, intended for an audience with a limited computational background. After explaining the subtasks involved in this problem of deriving the structure of arguments, we describe two other applications that are popular in computational linguistics: sentiment analysis and stance detection. From the linguistic viewpoint, they concern the semantics of evaluation in language. In the final part of the paper, we briefly examine the roles that these two tasks play in argumentation mining, both in current practice, and in possible future systems.}, language = {en} }