@article{RischKrestel2019, author = {Risch, Julian and Krestel, Ralf}, title = {Domain-specific word embeddings for patent classification}, series = {Data Technologies and Applications}, volume = {53}, journal = {Data Technologies and Applications}, number = {1}, publisher = {Emerald Group Publishing Limited}, address = {Bingley}, issn = {2514-9288}, doi = {10.1108/DTA-01-2019-0002}, pages = {108 -- 122}, year = {2019}, abstract = {Purpose Patent offices and other stakeholders in the patent domain need to classify patent applications according to a standardized classification scheme. The purpose of this paper is to examine the novelty of an application it can then be compared to previously granted patents in the same class. Automatic classification would be highly beneficial, because of the large volume of patents and the domain-specific knowledge needed to accomplish this costly manual task. However, a challenge for the automation is patent-specific language use, such as special vocabulary and phrases. Design/methodology/approach To account for this language use, the authors present domain-specific pre-trained word embeddings for the patent domain. The authors train the model on a very large data set of more than 5m patents and evaluate it at the task of patent classification. To this end, the authors propose a deep learning approach based on gated recurrent units for automatic patent classification built on the trained word embeddings. Findings Experiments on a standardized evaluation data set show that the approach increases average precision for patent classification by 17 percent compared to state-of-the-art approaches. In this paper, the authors further investigate the model's strengths and weaknesses. An extensive error analysis reveals that the learned embeddings indeed mirror patent-specific language use. The imbalanced training data and underrepresented classes are the most difficult remaining challenge. Originality/value The proposed approach fulfills the need for domain-specific word embeddings for downstream tasks in the patent domain, such as patent classification or patent analysis.}, language = {en} } @article{RischKrestel2020, author = {Risch, Julian and Krestel, Ralf}, title = {Toxic comment detection in online discussions}, series = {Deep learning-based approaches for sentiment analysis}, journal = {Deep learning-based approaches for sentiment analysis}, editor = {Agarwal, Basant and Nayak, Richi and Mittal, Namita and Patnaik, Srikanta}, publisher = {Springer}, address = {Singapore}, isbn = {978-981-15-1216-2}, issn = {2524-7565}, doi = {10.1007/978-981-15-1216-2_4}, pages = {85 -- 109}, year = {2020}, abstract = {Comment sections of online news platforms are an essential space to express opinions and discuss political topics. In contrast to other online posts, news discussions are related to particular news articles, comments refer to each other, and individual conversations emerge. However, the misuse by spammers, haters, and trolls makes costly content moderation necessary. Sentiment analysis can not only support moderation but also help to understand the dynamics of online discussions. A subtask of content moderation is the identification of toxic comments. To this end, we describe the concept of toxicity and characterize its subclasses. Further, we present various deep learning approaches, including datasets and architectures, tailored to sentiment analysis in online discussions. One way to make these approaches more comprehensible and trustworthy is fine-grained instead of binary comment classification. On the downside, more classes require more training data. Therefore, we propose to augment training data by using transfer learning. We discuss real-world applications, such as semi-automated comment moderation and troll detection. Finally, we outline future challenges and current limitations in light of most recent research publications.}, language = {en} } @article{KrestelChikkamathHeweletal.2021, author = {Krestel, Ralf and Chikkamath, Renukswamy and Hewel, Christoph and Risch, Julian}, title = {A survey on deep learning for patent analysis}, series = {World patent information}, volume = {65}, journal = {World patent information}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0172-2190}, doi = {10.1016/j.wpi.2021.102035}, pages = {13}, year = {2021}, abstract = {Patent document collections are an immense source of knowledge for research and innovation communities worldwide. The rapid growth of the number of patent documents poses an enormous challenge for retrieving and analyzing information from this source in an effective manner. Based on deep learning methods for natural language processing, novel approaches have been developed in the field of patent analysis. The goal of these approaches is to reduce costs by automating tasks that previously only domain experts could solve. In this article, we provide a comprehensive survey of the application of deep learning for patent analysis. We summarize the state-of-the-art techniques and describe how they are applied to various tasks in the patent domain. In a detailed discussion, we categorize 40 papers based on the dataset, the representation, and the deep learning architecture that were used, as well as the patent analysis task that was targeted. With our survey, we aim to foster future research at the intersection of patent analysis and deep learning and we conclude by listing promising paths for future work.}, language = {en} }