@phdthesis{Jain2022, author = {Jain, Nitisha}, title = {Representation and curation of knowledge graphs with embeddings}, doi = {10.25932/publishup-61224}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-612240}, school = {Universit{\"a}t Potsdam}, pages = {ii, 104}, year = {2022}, abstract = {Knowledge graphs are structured repositories of knowledge that store facts about the general world or a particular domain in terms of entities and their relationships. Owing to the heterogeneity of use cases that are served by them, there arises a need for the automated construction of domain- specific knowledge graphs from texts. While there have been many research efforts towards open information extraction for automated knowledge graph construction, these techniques do not perform well in domain-specific settings. Furthermore, regardless of whether they are constructed automatically from specific texts or based on real-world facts that are constantly evolving, all knowledge graphs inherently suffer from incompleteness as well as errors in the information they hold. This thesis investigates the challenges encountered during knowledge graph construction and proposes techniques for their curation (a.k.a. refinement) including the correction of semantic ambiguities and the completion of missing facts. Firstly, we leverage existing approaches for the automatic construction of a knowledge graph in the art domain with open information extraction techniques and analyse their limitations. In particular, we focus on the challenging task of named entity recognition for artwork titles and show empirical evidence of performance improvement with our proposed solution for the generation of annotated training data. Towards the curation of existing knowledge graphs, we identify the issue of polysemous relations that represent different semantics based on the context. Having concrete semantics for relations is important for downstream appli- cations (e.g. question answering) that are supported by knowledge graphs. Therefore, we define the novel task of finding fine-grained relation semantics in knowledge graphs and propose FineGReS, a data-driven technique that discovers potential sub-relations with fine-grained meaning from existing pol- ysemous relations. We leverage knowledge representation learning methods that generate low-dimensional vectors (or embeddings) for knowledge graphs to capture their semantics and structure. The efficacy and utility of the proposed technique are demonstrated by comparing it with several baselines on the entity classification use case. Further, we explore the semantic representations in knowledge graph embed- ding models. In the past decade, these models have shown state-of-the-art results for the task of link prediction in the context of knowledge graph comple- tion. In view of the popularity and widespread application of the embedding techniques not only for link prediction but also for different semantic tasks, this thesis presents a critical analysis of the embeddings by quantitatively measuring their semantic capabilities. We investigate and discuss the reasons for the shortcomings of embeddings in terms of the characteristics of the underlying knowledge graph datasets and the training techniques used by popular models. Following up on this, we propose ReasonKGE, a novel method for generating semantically enriched knowledge graph embeddings by taking into account the semantics of the facts that are encapsulated by an ontology accompanying the knowledge graph. With a targeted, reasoning-based method for generating negative samples during the training of the models, ReasonKGE is able to not only enhance the link prediction performance, but also reduce the number of semantically inconsistent predictions made by the resultant embeddings, thus improving the quality of knowledge graphs.}, language = {en} } @phdthesis{Galetzka2022, author = {Galetzka, Fabian}, title = {Investigating and improving background context consistency in neural conversation models}, doi = {10.25932/publishup-58463}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-584637}, school = {Universit{\"a}t Potsdam}, pages = {viii, 173}, year = {2022}, abstract = {Neural conversation models aim to predict appropriate contributions to a (given) conversation by using neural networks trained on dialogue data. A specific strand focuses on non-goal driven dialogues, first proposed by Ritter et al. (2011): They investigated the task of transforming an utterance into an appropriate reply. Then, this strand evolved into dialogue system approaches using long dialogue histories and additional background context. Contributing meaningful and appropriate to a conversation is a complex task, and therefore research in this area has been very diverse: Serban et al. (2016), for example, looked into utilizing variable length dialogue histories, Zhang et al. (2018) added additional context to the dialogue history, Wolf et al. (2019) proposed a model based on pre-trained Self-Attention neural networks (Vasvani et al., 2017), and Dinan et al. (2021) investigated safety issues of these approaches. This trend can be seen as a transformation from trying to somehow carry on a conversation to generating appropriate replies in a controlled and reliable way. In this thesis, we first elaborate the meaning of appropriateness in the context of neural conversation models by drawing inspiration from the Cooperative Principle (Grice, 1975). We first define what an appropriate contribution has to be by operationalizing these maxims as demands on conversation models: being fluent, informative, consistent towards given context, coherent and following a social norm. Then, we identify different targets (or intervention points) to achieve the conversational appropriateness by investigating recent research in that field. In this thesis, we investigate the aspect of consistency towards context in greater detail, being one aspect of our interpretation of appropriateness. During the research, we developed a new context-based dialogue dataset (KOMODIS) that combines factual and opinionated context to dialogues. The KOMODIS dataset is publicly available and we use the data in this thesis to gather new insights in context-augmented dialogue generation. We further introduced a new way of encoding context within Self-Attention based neural networks. For that, we elaborate the issue of space complexity from knowledge graphs, and propose a concise encoding strategy for structured context inspired from graph neural networks (Gilmer et al., 2017) to reduce the space complexity of the additional context. We discuss limitations of context-augmentation for neural conversation models, explore the characteristics of knowledge graphs, and explain how we create and augment knowledge graphs for our experiments. Lastly, we analyzed the potential of reinforcement and transfer learning to improve context-consistency for neural conversation models. We find that current reward functions need to be more precise to enable the potential of reinforcement learning, and that sequential transfer learning can improve the subjective quality of generated dialogues.}, language = {en} }