@phdthesis{Sakizloglou2023, author = {Sakizloglou, Lucas}, title = {Evaluating temporal queries over history-aware architectural runtime models}, doi = {10.25932/publishup-60439}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-604396}, school = {Universit{\"a}t Potsdam}, pages = {v, 168}, year = {2023}, abstract = {In model-driven engineering, the adaptation of large software systems with dynamic structure is enabled by architectural runtime models. Such a model represents an abstract state of the system as a graph of interacting components. Every relevant change in the system is mirrored in the model and triggers an evaluation of model queries, which search the model for structural patterns that should be adapted. This thesis focuses on a type of runtime models where the expressiveness of the model and model queries is extended to capture past changes and their timing. These history-aware models and temporal queries enable more informed decision-making during adaptation, as they support the formulation of requirements on the evolution of the pattern that should be adapted. However, evaluating temporal queries during adaptation poses significant challenges. First, it implies the capability to specify and evaluate requirements on the structure, as well as the ordering and timing in which structural changes occur. Then, query answers have to reflect that the history-aware model represents the architecture of a system whose execution may be ongoing, and thus answers may depend on future changes. Finally, query evaluation needs to be adequately fast and memory-efficient despite the increasing size of the history---especially for models that are altered by numerous, rapid changes. The thesis presents a query language and a querying approach for the specification and evaluation of temporal queries. These contributions aim to cope with the challenges of evaluating temporal queries at runtime, a prerequisite for history-aware architectural monitoring and adaptation which has not been systematically treated by prior model-based solutions. The distinguishing features of our contributions are: the specification of queries based on a temporal logic which encodes structural patterns as graphs; the provision of formally precise query answers which account for timing constraints and ongoing executions; the incremental evaluation which avoids the re-computation of query answers after each change; and the option to discard history that is no longer relevant to queries. The query evaluation searches the model for occurrences of a pattern whose evolution satisfies a temporal logic formula. Therefore, besides model-driven engineering, another related research community is runtime verification. The approach differs from prior logic-based runtime verification solutions by supporting the representation and querying of structure via graphs and graph queries, respectively, which is more efficient for queries with complex patterns. We present a prototypical implementation of the approach and measure its speed and memory consumption in monitoring and adaptation scenarios from two application domains, with executions of an increasing size. We assess scalability by a comparison to the state-of-the-art from both related research communities. The implementation yields promising results, which pave the way for sophisticated history-aware self-adaptation solutions and indicate that the approach constitutes a highly effective technique for runtime monitoring on an architectural level.}, language = {en} } @misc{SahlmannSchefflerSchnor2018, author = {Sahlmann, Kristina and Scheffler, Thomas and Schnor, Bettina}, title = {Ontology-driven Device Descriptions for IoT Network Management}, series = {2018 Global Internet of Things Summit (GIoTS)}, journal = {2018 Global Internet of Things Summit (GIoTS)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-6451-3}, doi = {10.1109/GIOTS.2018.8534569}, pages = {295 -- 300}, year = {2018}, abstract = {One particular challenge in the Internet of Things is the management of many heterogeneous things. The things are typically constrained devices with limited memory, power, network and processing capacity. Configuring every device manually is a tedious task. We propose an interoperable way to configure an IoT network automatically using existing standards. The proposed NETCONF-MQTT bridge intermediates between the constrained devices (speaking MQTT) and the network management standard NETCONF. The NETCONF-MQTT bridge generates dynamically YANG data models from the semantic description of the device capabilities based on the oneM2M ontology. We evaluate the approach for two use cases, i.e. describing an actuator and a sensor scenario.}, language = {en} } @article{RuetherSebodeLohseetal.2021, author = {R{\"u}ther, Ferenc Darius and Sebode, Marcial and Lohse, Ansgar W. and Wernicke, Sarah and B{\"o}ttinger, Erwin and Casar, Christian and Braun, Felix and Schramm, Christoph}, title = {Mobile app requirements for patients with rare liver diseases}, series = {Clinics and research in hepatology and gastroenterology}, volume = {45}, journal = {Clinics and research in hepatology and gastroenterology}, number = {6}, publisher = {Elsevier Masson}, address = {Amsterdam}, issn = {2210-7401}, doi = {10.1016/j.clinre.2021.101760}, pages = {10}, year = {2021}, abstract = {Background: More patient data are needed to improve research on rare liver diseases. Mobile health apps enable an exhaustive data collection. Therefore, the European Reference Network on Hepatological diseases (ERN RARE-LIVER) intends to implement an app for patients with rare liver diseases communicating with a patient registry, but little is known about which features patients and their healthcare providers regard as being useful. Aims: This study aimed to investigate how an app for rare liver diseases would be accepted, and to find out which features are considered useful. Methods: An anonymous survey was conducted on adult patients with rare liver diseases at a single academic, tertiary care outpatient-service. Additionally, medical experts of the ERN working group on autoimmune hepatitis were invited to participate in an online survey. Results: In total, the responses from 100 patients with autoimmune (n = 90) or other rare (n = 10) liver diseases and 32 experts were analyzed. Patients were convinced to use a disease specific app (80\%) and expected some benefit to their health (78\%) but responses differed signifi-cantly between younger and older patients (93\% vs. 62\%, p < 0.001; 88\% vs. 64\%, p < 0.01). Comparing patients' and experts' feedback, patients more often expected a simplified healthcare pathway (e.g. 89\% vs. 59\% (p < 0.001) wanted access to one's own medical records), while healthcare providers saw the benefit mainly in improving compliance and treatment outcome (e.g. 93\% vs. 31\% (p < 0.001) and 70\% vs. 21\% (p < 0.001) expected the app to reduce mistakes in taking medication and improve quality of life, respectively).}, language = {en} } @article{RuiperezValienteStaubitzJenneretal.2022, author = {Ruip{\´e}rez-Valiente, Jos{\´e} A. and Staubitz, Thomas and Jenner, Matt and Halawa, Sherif and Zhang, Jiayin and Despujol, Ignacio and Maldonado-Mahauad, Jorge and Montoro, German and Peffer, Melanie and Rohloff, Tobias and Lane, Jenny and Turro, Carlos and Li, Xitong and P{\´e}rez-Sanagust{\´i}n, Mar and Reich, Justin}, title = {Large scale analytics of global and regional MOOC providers: Differences in learners' demographics, preferences, and perceptions}, series = {Computers \& education}, volume = {180}, journal = {Computers \& education}, publisher = {Elsevier}, address = {Oxford}, issn = {0360-1315}, doi = {10.1016/j.compedu.2021.104426}, pages = {17}, year = {2022}, abstract = {Massive Open Online Courses (MOOCs) remarkably attracted global media attention, but the spotlight has been concentrated on a handful of English-language providers. While Coursera, edX, Udacity, and FutureLearn received most of the attention and scrutiny, an entirely new ecosystem of local MOOC providers was growing in parallel. This ecosystem is harder to study than the major players: they are spread around the world, have less staff devoted to maintaining research data, and operate in multiple languages with university and corporate regional partners. To better understand how online learning opportunities are expanding through this regional MOOC ecosystem, we created a research partnership among 15 different MOOC providers from nine countries. We gathered data from over eight million learners in six thousand MOOCs, and we conducted a large-scale survey with more than 10 thousand participants. From our analysis, we argue that these regional providers may be better positioned to meet the goals of expanding access to higher education in their regions than the better-known global providers. To make this claim we highlight three trends: first, regional providers attract a larger local population with more inclusive demographic profiles; second, students predominantly choose their courses based on topical interest, and regional providers do a better job at catering to those needs; and third, many students feel more at ease learning from institutions they already know and have references from. Our work raises the importance of local education in the global MOOC ecosystem, while calling for additional research and conversations across the diversity of MOOC providers.}, language = {en} } @phdthesis{Roumen2023, author = {Roumen, Thijs}, title = {Portable models for laser cutting}, doi = {10.25932/publishup-57814}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-578141}, school = {Universit{\"a}t Potsdam}, pages = {xx, 170}, year = {2023}, abstract = {Laser cutting is a fast and precise fabrication process. This makes laser cutting a powerful process in custom industrial production. Since the patents on the original technology started to expire, a growing community of tech-enthusiasts embraced the technology and started sharing the models they fabricate online. Surprisingly, the shared models appear to largely be one-offs (e.g., they proudly showcase what a single person can make in one afternoon). For laser cutting to become a relevant mainstream phenomenon (as opposed to the current tech enthusiasts and industry users), it is crucial to enable users to reproduce models made by more experienced modelers, and to build on the work of others instead of creating one-offs. We create a technological basis that allows users to build on the work of others—a progression that is currently held back by the use of exchange formats that disregard mechanical differences between machines and therefore overlook implications with respect to how well parts fit together mechanically (aka engineering fit). For the field to progress, we need a machine-independent sharing infrastructure. In this thesis, we outline three approaches that together get us closer to this: (1) 2D cutting plans that are tolerant to machine variations. Our initial take is a minimally invasive approach: replacing machine-specific elements in cutting plans with more tolerant elements using mechanical hacks like springs and wedges. The resulting models fabricate on any consumer laser cutter and in a range of materials. (2) sharing models in 3D. To allow building on the work of others, we build a 3D modeling environment for laser cutting (kyub). After users design a model, they export their 3D models to 2D cutting plans optimized for the machine and material at hand. We extend this volumetric environment with tools to edit individual plates, allowing users to leverage the efficiency of volumetric editing while having control over the most detailed elements in laser-cutting (plates) (3) converting legacy 2D cutting plans to 3D models. To handle legacy models, we build software to interactively reconstruct 3D models from 2D cutting plans. This allows users to reuse the models in more productive ways. We revisit this by automating the assembly process for a large subset of models. The above-mentioned software composes a larger system (kyub, 140,000 lines of code). This system integration enables the push towards actual use, which we demonstrate through a range of workshops where users build complex models such as fully functional guitars. By simplifying sharing and re-use and the resulting increase in model complexity, this line of work forms a small step to enable personal fabrication to scale past the maker phenomenon, towards a mainstream phenomenon—the same way that other fields, such as print (postscript) and ultimately computing itself (portable programming languages, etc.) reached mass adoption.}, language = {en} } @phdthesis{Rothenberger2022, author = {Rothenberger, Ralf}, title = {Satisfiability thresholds for non-uniform random k-SAT}, doi = {10.25932/publishup-54970}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-549702}, school = {Universit{\"a}t Potsdam}, pages = {x, 163}, year = {2022}, abstract = {Boolean Satisfiability (SAT) is one of the problems at the core of theoretical computer science. It was the first problem proven to be NP-complete by Cook and, independently, by Levin. Nowadays it is conjectured that SAT cannot be solved in sub-exponential time. Thus, it is generally assumed that SAT and its restricted version k-SAT are hard to solve. However, state-of-the-art SAT solvers can solve even huge practical instances of these problems in a reasonable amount of time. Why is SAT hard in theory, but easy in practice? One approach to answering this question is investigating the average runtime of SAT. In order to analyze this average runtime the random k-SAT model was introduced. The model generates all k-SAT instances with n variables and m clauses with uniform probability. Researching random k-SAT led to a multitude of insights and tools for analyzing random structures in general. One major observation was the emergence of the so-called satisfiability threshold: A phase transition point in the number of clauses at which the generated formulas go from asymptotically almost surely satisfiable to asymptotically almost surely unsatisfiable. Additionally, instances around the threshold seem to be particularly hard to solve. In this thesis we analyze a more general model of random k-SAT that we call non-uniform random k-SAT. In contrast to the classical model each of the n Boolean variables now has a distinct probability of being drawn. For each of the m clauses we draw k variables according to the variable distribution and choose their signs uniformly at random. Non-uniform random k-SAT gives us more control over the distribution of Boolean variables in the resulting formulas. This allows us to tailor distributions to the ones observed in practice. Notably, non-uniform random k-SAT contains the previously proposed models random k-SAT, power-law random k-SAT and geometric random k-SAT as special cases. We analyze the satisfiability threshold in non-uniform random k-SAT depending on the variable probability distribution. Our goal is to derive conditions on this distribution under which an equivalent of the satisfiability threshold conjecture holds. We start with the arguably simpler case of non-uniform random 2-SAT. For this model we show under which conditions a threshold exists, if it is sharp or coarse, and what the leading constant of the threshold function is. These are exactly the three ingredients one needs in order to prove or disprove the satisfiability threshold conjecture. For non-uniform random k-SAT with k=3 we only prove sufficient conditions under which a threshold exists. We also show some properties of the variable probabilities under which the threshold is sharp in this case. These are the first results on the threshold behavior of non-uniform random k-SAT.}, language = {en} } @article{RosinLaiMouldetal.2022, author = {Rosin, Paul L. and Lai, Yu-Kun and Mould, David and Yi, Ran and Berger, Itamar and Doyle, Lars and Lee, Seungyong and Li, Chuan and Liu, Yong-Jin and Semmo, Amir and Shamir, Ariel and Son, Minjung and Winnem{\"o}ller, Holger}, title = {NPRportrait 1.0: A three-level benchmark for non-photorealistic rendering of portraits}, series = {Computational visual media}, volume = {8}, journal = {Computational visual media}, number = {3}, publisher = {Springer Nature}, address = {London}, issn = {2096-0433}, doi = {10.1007/s41095-021-0255-3}, pages = {445 -- 465}, year = {2022}, abstract = {Recently, there has been an upsurge of activity in image-based non-photorealistic rendering (NPR), and in particular portrait image stylisation, due to the advent of neural style transfer (NST). However, the state of performance evaluation in this field is poor, especially compared to the norms in the computer vision and machine learning communities. Unfortunately, the task of evaluating image stylisation is thus far not well defined, since it involves subjective, perceptual, and aesthetic aspects. To make progress towards a solution, this paper proposes a new structured, three-level, benchmark dataset for the evaluation of stylised portrait images. Rigorous criteria were used for its construction, and its consistency was validated by user studies. Moreover, a new methodology has been developed for evaluating portrait stylisation algorithms, which makes use of the different benchmark levels as well as annotations provided by user studies regarding the characteristics of the faces. We perform evaluation for a wide variety of image stylisation methods (both portrait-specific and general purpose, and also both traditional NPR approaches and NST) using the new benchmark dataset.}, language = {en} } @phdthesis{Rohloff2021, author = {Rohloff, Tobias}, title = {Learning analytics at scale}, doi = {10.25932/publishup-52623}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-526235}, school = {Universit{\"a}t Potsdam}, pages = {xvii, 138, lxvii}, year = {2021}, abstract = {Digital technologies are paving the way for innovative educational approaches. The learning format of Massive Open Online Courses (MOOCs) provides a highly accessible path to lifelong learning while being more affordable and flexible than face-to-face courses. Thereby, thousands of learners can enroll in courses mostly without admission restrictions, but this also raises challenges. Individual supervision by teachers is barely feasible, and learning persistence and success depend on students' self-regulatory skills. Here, technology provides the means for support. The use of data for decision-making is already transforming many fields, whereas in education, it is still a young research discipline. Learning Analytics (LA) is defined as the measurement, collection, analysis, and reporting of data about learners and their learning contexts with the purpose of understanding and improving learning and learning environments. The vast amount of data that MOOCs produce on the learning behavior and success of thousands of students provides the opportunity to study human learning and develop approaches addressing the demands of learners and teachers. The overall purpose of this dissertation is to investigate the implementation of LA at the scale of MOOCs and to explore how data-driven technology can support learning and teaching in this context. To this end, several research prototypes have been iteratively developed for the HPI MOOC Platform. Hence, they were tested and evaluated in an authentic real-world learning environment. Most of the results can be applied on a conceptual level to other MOOC platforms as well. The research contribution of this thesis thus provides practical insights beyond what is theoretically possible. In total, four system components were developed and extended: (1) The Learning Analytics Architecture: A technical infrastructure to collect, process, and analyze event-driven learning data based on schema-agnostic pipelining in a service-oriented MOOC platform. (2) The Learning Analytics Dashboard for Learners: A tool for data-driven support of self-regulated learning, in particular to enable learners to evaluate and plan their learning activities, progress, and success by themselves. (3) Personalized Learning Objectives: A set of features to better connect learners' success to their personal intentions based on selected learning objectives to offer guidance and align the provided data-driven insights about their learning progress. (4) The Learning Analytics Dashboard for Teachers: A tool supporting teachers with data-driven insights to enable the monitoring of their courses with thousands of learners, identify potential issues, and take informed action. For all aspects examined in this dissertation, related research is presented, development processes and implementation concepts are explained, and evaluations are conducted in case studies. Among other findings, the usage of the learner dashboard in combination with personalized learning objectives demonstrated improved certification rates of 11.62\% to 12.63\%. Furthermore, it was observed that the teacher dashboard is a key tool and an integral part for teaching in MOOCs. In addition to the results and contributions, general limitations of the work are discussed—which altogether provide a solid foundation for practical implications and future research.}, language = {en} } @misc{RischKrestel2018, author = {Risch, Julian and Krestel, Ralf}, title = {My Approach = Your Apparatus?}, series = {Libraries}, journal = {Libraries}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-5178-2}, issn = {2575-7865}, doi = {10.1145/3197026.3197038}, pages = {283 -- 292}, year = {2018}, abstract = {Comparative text mining extends from genre analysis and political bias detection to the revelation of cultural and geographic differences, through to the search for prior art across patents and scientific papers. These applications use cross-collection topic modeling for the exploration, clustering, and comparison of large sets of documents, such as digital libraries. However, topic modeling on documents from different collections is challenging because of domain-specific vocabulary. We present a cross-collection topic model combined with automatic domain term extraction and phrase segmentation. This model distinguishes collection-specific and collection-independent words based on information entropy and reveals commonalities and differences of multiple text collections. We evaluate our model on patents, scientific papers, newspaper articles, forum posts, and Wikipedia articles. In comparison to state-of-the-art cross-collection topic modeling, our model achieves up to 13\% higher topic coherence, up to 4\% lower perplexity, and up to 31\% higher document classification accuracy. More importantly, our approach is the first topic model that ensures disjunct general and specific word distributions, resulting in clear-cut topic representations.}, language = {en} } @article{RischKrestel2019, author = {Risch, Julian and Krestel, Ralf}, title = {Domain-specific word embeddings for patent classification}, series = {Data Technologies and Applications}, volume = {53}, journal = {Data Technologies and Applications}, number = {1}, publisher = {Emerald Group Publishing Limited}, address = {Bingley}, issn = {2514-9288}, doi = {10.1108/DTA-01-2019-0002}, pages = {108 -- 122}, year = {2019}, abstract = {Purpose Patent offices and other stakeholders in the patent domain need to classify patent applications according to a standardized classification scheme. The purpose of this paper is to examine the novelty of an application it can then be compared to previously granted patents in the same class. Automatic classification would be highly beneficial, because of the large volume of patents and the domain-specific knowledge needed to accomplish this costly manual task. However, a challenge for the automation is patent-specific language use, such as special vocabulary and phrases. Design/methodology/approach To account for this language use, the authors present domain-specific pre-trained word embeddings for the patent domain. The authors train the model on a very large data set of more than 5m patents and evaluate it at the task of patent classification. To this end, the authors propose a deep learning approach based on gated recurrent units for automatic patent classification built on the trained word embeddings. Findings Experiments on a standardized evaluation data set show that the approach increases average precision for patent classification by 17 percent compared to state-of-the-art approaches. In this paper, the authors further investigate the model's strengths and weaknesses. An extensive error analysis reveals that the learned embeddings indeed mirror patent-specific language use. The imbalanced training data and underrepresented classes are the most difficult remaining challenge. Originality/value The proposed approach fulfills the need for domain-specific word embeddings for downstream tasks in the patent domain, such as patent classification or patent analysis.}, language = {en} } @article{RischKrestel2020, author = {Risch, Julian and Krestel, Ralf}, title = {Toxic comment detection in online discussions}, series = {Deep learning-based approaches for sentiment analysis}, journal = {Deep learning-based approaches for sentiment analysis}, editor = {Agarwal, Basant and Nayak, Richi and Mittal, Namita and Patnaik, Srikanta}, publisher = {Springer}, address = {Singapore}, isbn = {978-981-15-1216-2}, issn = {2524-7565}, doi = {10.1007/978-981-15-1216-2_4}, pages = {85 -- 109}, year = {2020}, abstract = {Comment sections of online news platforms are an essential space to express opinions and discuss political topics. In contrast to other online posts, news discussions are related to particular news articles, comments refer to each other, and individual conversations emerge. However, the misuse by spammers, haters, and trolls makes costly content moderation necessary. Sentiment analysis can not only support moderation but also help to understand the dynamics of online discussions. A subtask of content moderation is the identification of toxic comments. To this end, we describe the concept of toxicity and characterize its subclasses. Further, we present various deep learning approaches, including datasets and architectures, tailored to sentiment analysis in online discussions. One way to make these approaches more comprehensible and trustworthy is fine-grained instead of binary comment classification. On the downside, more classes require more training data. Therefore, we propose to augment training data by using transfer learning. We discuss real-world applications, such as semi-automated comment moderation and troll detection. Finally, we outline future challenges and current limitations in light of most recent research publications.}, language = {en} } @phdthesis{Risch2020, author = {Risch, Julian}, title = {Reader comment analysis on online news platforms}, doi = {10.25932/publishup-48922}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-489222}, school = {Universit{\"a}t Potsdam}, pages = {xi, 135}, year = {2020}, abstract = {Comment sections of online news platforms are an essential space to express opinions and discuss political topics. However, the misuse by spammers, haters, and trolls raises doubts about whether the benefits justify the costs of the time-consuming content moderation. As a consequence, many platforms limited or even shut down comment sections completely. In this thesis, we present deep learning approaches for comment classification, recommendation, and prediction to foster respectful and engaging online discussions. The main focus is on two kinds of comments: toxic comments, which make readers leave a discussion, and engaging comments, which make readers join a discussion. First, we discourage and remove toxic comments, e.g., insults or threats. To this end, we present a semi-automatic comment moderation process, which is based on fine-grained text classification models and supports moderators. Our experiments demonstrate that data augmentation, transfer learning, and ensemble learning allow training robust classifiers even on small datasets. To establish trust in the machine-learned models, we reveal which input features are decisive for their output with attribution-based explanation methods. Second, we encourage and highlight engaging comments, e.g., serious questions or factual statements. We automatically identify the most engaging comments, so that readers need not scroll through thousands of comments to find them. The model training process builds on upvotes and replies as a measure of reader engagement. We also identify comments that address the article authors or are otherwise relevant to them to support interactions between journalists and their readership. Taking into account the readers' interests, we further provide personalized recommendations of discussions that align with their favored topics or involve frequent co-commenters. Our models outperform multiple baselines and recent related work in experiments on comment datasets from different platforms.}, language = {en} } @article{RingEisenmannKandiletal.2022, author = {Ring, Raphaela M. and Eisenmann, Clemens and Kandil, Farid and Steckhan, Nico and Demmrich, Sarah and Klatte, Caroline and Kessler, Christian S. and Jeitler, Michael and Boschmann, Michael and Michalsen, Andreas and Blakeslee, Sarah B. and St{\"o}ckigt, Barbara and Stritter, Wiebke and Koppold-Liebscher, Daniela A.}, title = {Mental and behavioural responses to Bah{\´a}'{\´i} fasting: Looking behind the scenes of a religiously motivated intermittent fast using a mixed methods approach}, series = {Nutrients}, volume = {14}, journal = {Nutrients}, number = {5}, publisher = {MDPI}, address = {Basel}, issn = {2072-6643}, doi = {10.3390/nu14051038}, pages = {23}, year = {2022}, abstract = {Background/Objective: Historically, fasting has been practiced not only for medical but also for religious reasons. Baha'is follow an annual religious intermittent dry fast of 19 days. We inquired into motivation behind and subjective health impacts of Baha'i fasting. Methods: A convergent parallel mixed methods design was embedded in a clinical single arm observational study. Semi-structured individual interviews were conducted before (n = 7), during (n = 8), and after fasting (n = 8). Three months after the fasting period, two focus group interviews were conducted (n = 5/n = 3). A total of 146 Baha'i volunteers answered an online survey at five time points before, during, and after fasting. Results: Fasting was found to play a central role for the religiosity of interviewees, implying changes in daily structures, spending time alone, engaging in religious practices, and experiencing social belonging. Results show an increase in mindfulness and well-being, which were accompanied by behavioural changes and experiences of self-efficacy and inner freedom. Survey scores point to an increase in mindfulness and well-being during fasting, while stress, anxiety, and fatigue decreased. Mindfulness remained elevated even three months after the fast. Conclusion: Baha'i fasting seems to enhance participants' mindfulness and well-being, lowering stress levels and reducing fatigue. Some of these effects lasted more than three months after fasting.}, language = {en} } @phdthesis{Richter2018, author = {Richter, Rico}, title = {Concepts and techniques for processing and rendering of massive 3D point clouds}, doi = {10.25932/publishup-42330}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-423304}, school = {Universit{\"a}t Potsdam}, pages = {v, 131}, year = {2018}, abstract = {Remote sensing technology, such as airborne, mobile, or terrestrial laser scanning, and photogrammetric techniques, are fundamental approaches for efficient, automatic creation of digital representations of spatial environments. For example, they allow us to generate 3D point clouds of landscapes, cities, infrastructure networks, and sites. As essential and universal category of geodata, 3D point clouds are used and processed by a growing number of applications, services, and systems such as in the domains of urban planning, landscape architecture, environmental monitoring, disaster management, virtual geographic environments as well as for spatial analysis and simulation. While the acquisition processes for 3D point clouds become more and more reliable and widely-used, applications and systems are faced with more and more 3D point cloud data. In addition, 3D point clouds, by their very nature, are raw data, i.e., they do not contain any structural or semantics information. Many processing strategies common to GIS such as deriving polygon-based 3D models generally do not scale for billions of points. GIS typically reduce data density and precision of 3D point clouds to cope with the sheer amount of data, but that results in a significant loss of valuable information at the same time. This thesis proposes concepts and techniques designed to efficiently store and process massive 3D point clouds. To this end, object-class segmentation approaches are presented to attribute semantics to 3D point clouds, used, for example, to identify building, vegetation, and ground structures and, thus, to enable processing, analyzing, and visualizing 3D point clouds in a more effective and efficient way. Similarly, change detection and updating strategies for 3D point clouds are introduced that allow for reducing storage requirements and incrementally updating 3D point cloud databases. In addition, this thesis presents out-of-core, real-time rendering techniques used to interactively explore 3D point clouds and related analysis results. All techniques have been implemented based on specialized spatial data structures, out-of-core algorithms, and GPU-based processing schemas to cope with massive 3D point clouds having billions of points. All proposed techniques have been evaluated and demonstrated their applicability to the field of geospatial applications and systems, in particular for tasks such as classification, processing, and visualization. Case studies for 3D point clouds of entire cities with up to 80 billion points show that the presented approaches open up new ways to manage and apply large-scale, dense, and time-variant 3D point clouds as required by a rapidly growing number of applications and systems.}, language = {en} } @article{RichlySchlosserBoissier2022, author = {Richly, Keven and Schlosser, Rainer and Boissier, Martin}, title = {Budget-conscious fine-grained configuration optimization for spatio-temporal applications}, series = {Proceedings of the VLDB Endowment}, volume = {15}, journal = {Proceedings of the VLDB Endowment}, number = {13}, publisher = {Association for Computing Machinery (ACM)}, address = {[New York]}, issn = {2150-8097}, doi = {10.14778/3565838.3565858}, pages = {4079 -- 4092}, year = {2022}, abstract = {Based on the performance requirements of modern spatio-temporal data mining applications, in-memory database systems are often used to store and process the data. To efficiently utilize the scarce DRAM capacities, modern database systems support various tuning possibilities to reduce the memory footprint (e.g., data compression) or increase performance (e.g., additional indexes). However, the selection of cost and performance balancing configurations is challenging due to the vast number of possible setups consisting of mutually dependent individual decisions. In this paper, we introduce a novel approach to jointly optimize the compression, sorting, indexing, and tiering configuration for spatio-temporal workloads. Further, we consider horizontal data partitioning, which enables the independent application of different tuning options on a fine-grained level. We propose different linear programming (LP) models addressing cost dependencies at different levels of accuracy to compute optimized tuning configurations for a given workload and memory budgets. To yield maintainable and robust configurations, we extend our LP-based approach to incorporate reconfiguration costs as well as a worst-case optimization for potential workload scenarios. Further, we demonstrate on a real-world dataset that our models allow to significantly reduce the memory footprint with equal performance or increase the performance with equal memory size compared to existing tuning heuristics.}, language = {en} } @misc{RichlyBrauerSchlosser2020, author = {Richly, Keven and Brauer, Janos and Schlosser, Rainer}, title = {Predicting location probabilities of drivers to improved dispatch decisions of transportation network companies based on trajectory data}, series = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, journal = {Postprints der Universit{\"a}t Potsdam : Reihe der Digital Engineering Fakult{\"a}t}, number = {9}, doi = {10.25932/publishup-52404}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-524040}, pages = {14}, year = {2020}, abstract = {The demand for peer-to-peer ridesharing services increased over the last years rapidly. To cost-efficiently dispatch orders and communicate accurate pick-up times is challenging as the current location of each available driver is not exactly known since observed locations can be outdated for several seconds. The developed trajectory visualization tool enables transportation network companies to analyze dispatch processes and determine the causes of unexpected delays. As dispatching algorithms are based on the accuracy of arrival time predictions, we account for factors like noise, sample rate, technical and economic limitations as well as the duration of the entire process as they have an impact on the accuracy of spatio-temporal data. To improve dispatching strategies, we propose a prediction approach that provides a probability distribution for a driver's future locations based on patterns observed in past trajectories. We demonstrate the capabilities of our prediction results to ( i) avoid critical delays, (ii) to estimate waiting times with higher confidence, and (iii) to enable risk considerations in dispatching strategies.}, language = {en} } @article{RichlyBrauerSchlosser2020, author = {Richly, Keven and Brauer, Janos and Schlosser, Rainer}, title = {Predicting location probabilities of drivers to improved dispatch decisions of transportation network companies based on trajectory data}, series = {Proceedings of the 9th International Conference on Operations Research and Enterprise Systems - ICORES}, journal = {Proceedings of the 9th International Conference on Operations Research and Enterprise Systems - ICORES}, publisher = {Springer}, address = {Berlin}, pages = {12}, year = {2020}, abstract = {The demand for peer-to-peer ridesharing services increased over the last years rapidly. To cost-efficiently dispatch orders and communicate accurate pick-up times is challenging as the current location of each available driver is not exactly known since observed locations can be outdated for several seconds. The developed trajectory visualization tool enables transportation network companies to analyze dispatch processes and determine the causes of unexpected delays. As dispatching algorithms are based on the accuracy of arrival time predictions, we account for factors like noise, sample rate, technical and economic limitations as well as the duration of the entire process as they have an impact on the accuracy of spatio-temporal data. To improve dispatching strategies, we propose a prediction approach that provides a probability distribution for a driver's future locations based on patterns observed in past trajectories. We demonstrate the capabilities of our prediction results to ( i) avoid critical delays, (ii) to estimate waiting times with higher confidence, and (iii) to enable risk considerations in dispatching strategies.}, language = {en} } @misc{Richly2019, author = {Richly, Keven}, title = {Leveraging spatio-temporal soccer data to define a graphical query language for game recordings}, series = {IEEE International Conference on Big Data (Big Data)}, journal = {IEEE International Conference on Big Data (Big Data)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-5035-6}, issn = {2639-1589}, doi = {10.1109/BigData.2018.8622159}, pages = {3456 -- 3463}, year = {2019}, abstract = {For professional soccer clubs, performance and video analysis are an integral part of the preparation and post-processing of games. Coaches, scouts, and video analysts extract information about strengths and weaknesses of their team as well as opponents by manually analyzing video recordings of past games. Since video recordings are an unstructured data source, it is a complex and time-intensive task to find specific game situations and identify similar patterns. In this paper, we present a novel approach to detect patterns and situations (e.g., playmaking and ball passing of midfielders) based on trajectory data. The application uses the metaphor of a tactic board to offer a graphical query language. With this interactive tactic board, the user can model a game situation or mark a specific situation in the video recording for which all matching occurrences in various games are immediately displayed, and the user can directly jump to the corresponding game scene. Through the additional visualization of key performance indicators (e.g.,the physical load of the players), the user can get a better overall assessment of situations. With the capabilities to find specific game situations and complex patterns in video recordings, the interactive tactic board serves as a useful tool to improve the video analysis process of professional sports teams.}, language = {en} } @misc{Richly2019, author = {Richly, Keven}, title = {A survey on trajectory data management for hybrid transactional and analytical workloads}, series = {IEEE International Conference on Big Data (Big Data)}, journal = {IEEE International Conference on Big Data (Big Data)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-5035-6}, issn = {2639-1589}, doi = {10.1109/BigData.2018.8622394}, pages = {562 -- 569}, year = {2019}, abstract = {Rapid advances in location-acquisition technologies have led to large amounts of trajectory data. This data is the foundation for a broad spectrum of services driven and improved by trajectory data mining. However, for hybrid transactional and analytical workloads, the storing and processing of rapidly accumulated trajectory data is a non-trivial task. In this paper, we present a detailed survey about state-of-the-art trajectory data management systems. To determine the relevant aspects and requirements for such systems, we developed a trajectory data mining framework, which summarizes the different steps in the trajectory data mining process. Based on the derived requirements, we analyze different concepts to store, compress, index, and process spatio-temporal data. There are various trajectory management systems, which are optimized for scalability, data footprint reduction, elasticity, or query performance. To get a comprehensive overview, we describe and compare different exciting systems. Additionally, the observed similarities in the general structure of different systems are consolidated in a general blueprint of trajectory management systems.}, language = {en} } @phdthesis{Richly2024, author = {Richly, Keven}, title = {Memory-efficient data management for spatio-temporal applications}, doi = {10.25932/publishup-63547}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-635473}, school = {Universit{\"a}t Potsdam}, pages = {xii, 181}, year = {2024}, abstract = {The wide distribution of location-acquisition technologies means that large volumes of spatio-temporal data are continuously being accumulated. Positioning systems such as GPS enable the tracking of various moving objects' trajectories, which are usually represented by a chronologically ordered sequence of observed locations. The analysis of movement patterns based on detailed positional information creates opportunities for applications that can improve business decisions and processes in a broad spectrum of industries (e.g., transportation, traffic control, or medicine). Due to the large data volumes generated in these applications, the cost-efficient storage of spatio-temporal data is desirable, especially when in-memory database systems are used to achieve interactive performance requirements. To efficiently utilize the available DRAM capacities, modern database systems support various tuning possibilities to reduce the memory footprint (e.g., data compression) or increase performance (e.g., additional indexes structures). By considering horizontal data partitioning, we can independently apply different tuning options on a fine-grained level. However, the selection of cost and performance-balancing configurations is challenging, due to the vast number of possible setups consisting of mutually dependent individual decisions. In this thesis, we introduce multiple approaches to improve spatio-temporal data management by automatically optimizing diverse tuning options for the application-specific access patterns and data characteristics. Our contributions are as follows: (1) We introduce a novel approach to determine fine-grained table configurations for spatio-temporal workloads. Our linear programming (LP) approach jointly optimizes the (i) data compression, (ii) ordering, (iii) indexing, and (iv) tiering. We propose different models which address cost dependencies at different levels of accuracy to compute optimized tuning configurations for a given workload, memory budgets, and data characteristics. To yield maintainable and robust configurations, we further extend our LP-based approach to incorporate reconfiguration costs as well as optimizations for multiple potential workload scenarios. (2) To optimize the storage layout of timestamps in columnar databases, we present a heuristic approach for the workload-driven combined selection of a data layout and compression scheme. By considering attribute decomposition strategies, we are able to apply application-specific optimizations that reduce the memory footprint and improve performance. (3) We introduce an approach that leverages past trajectory data to improve the dispatch processes of transportation network companies. Based on location probabilities, we developed risk-averse dispatch strategies that reduce critical delays. (4) Finally, we used the use case of a transportation network company to evaluate our database optimizations on a real-world dataset. We demonstrate that workload-driven fine-grained optimizations allow us to reduce the memory footprint (up to 71\% by equal performance) or increase the performance (up to 90\% by equal memory size) compared to established rule-based heuristics. Individually, our contributions provide novel approaches to the current challenges in spatio-temporal data mining and database research. Combining them allows in-memory databases to store and process spatio-temporal data more cost-efficiently.}, language = {en} }