@article{ChromikKirstenHerdicketal.2022, author = {Chromik, Jonas and Kirsten, Kristina and Herdick, Arne and Kappattanavar, Arpita Mallikarjuna and Arnrich, Bert}, title = {SensorHub}, series = {Sensors}, volume = {22}, journal = {Sensors}, number = {1}, publisher = {MDPI}, address = {Basel}, issn = {1424-8220}, doi = {10.3390/s22010408}, pages = {18}, year = {2022}, abstract = {Observational studies are an important tool for determining whether the findings from controlled experiments can be transferred into scenarios that are closer to subjects' real-life circumstances. A rigorous approach to observational studies involves collecting data from different sensors to comprehensively capture the situation of the subject. However, this leads to technical difficulties especially if the sensors are from different manufacturers, as multiple data collection tools have to run simultaneously. We present SensorHub, a system that can collect data from various wearable devices from different manufacturers, such as inertial measurement units, portable electrocardiographs, portable electroencephalographs, portable photoplethysmographs, and sensors for electrodermal activity. Additionally, our tool offers the possibility to include ecological momentary assessments (EMAs) in studies. Hence, SensorHub enables multimodal sensor data collection under real-world conditions and allows direct user feedback to be collected through questionnaires, enabling studies at home. In a first study with 11 participants, we successfully used SensorHub to record multiple signals with different devices and collected additional information with the help of EMAs. In addition, we evaluated SensorHub's technical capabilities in several trials with up to 21 participants recording simultaneously using multiple sensors with sampling frequencies as high as 1000 Hz. We could show that although there is a theoretical limitation to the transmissible data rate, in practice this limitation is not an issue and data loss is rare. We conclude that with modern communication protocols and with the increasingly powerful smartphones and wearables, a system like our SensorHub establishes an interoperability framework to adequately combine consumer-grade sensing hardware which enables observational studies in real life.}, language = {en} } @article{KastiusSchlosser2022, author = {Kastius, Alexander and Schlosser, Rainer}, title = {Dynamic pricing under competition using reinforcement learning}, series = {Journal of revenue and pricing management}, volume = {21}, journal = {Journal of revenue and pricing management}, number = {1}, publisher = {Springer Nature Switzerland AG}, address = {Cham}, issn = {1476-6930}, doi = {10.1057/s41272-021-00285-3}, pages = {50 -- 63}, year = {2022}, abstract = {Dynamic pricing is considered a possibility to gain an advantage over competitors in modern online markets. The past advancements in Reinforcement Learning (RL) provided more capable algorithms that can be used to solve pricing problems. In this paper, we study the performance of Deep Q-Networks (DQN) and Soft Actor Critic (SAC) in different market models. We consider tractable duopoly settings, where optimal solutions derived by dynamic programming techniques can be used for verification, as well as oligopoly settings, which are usually intractable due to the curse of dimensionality. We find that both algorithms provide reasonable results, while SAC performs better than DQN. Moreover, we show that under certain conditions, RL algorithms can be forced into collusion by their competitors without direct communication.}, language = {en} } @article{MattisBeckmannReinetal.2022, author = {Mattis, Toni and Beckmann, Tom and Rein, Patrick and Hirschfeld, Robert}, title = {First-class concepts}, series = {Journal of object technology : JOT / ETH Z{\"u}rich, Department of Computer Science}, volume = {21}, journal = {Journal of object technology : JOT / ETH Z{\"u}rich, Department of Computer Science}, number = {2}, publisher = {ETH Z{\"u}rich, Department of Computer Science}, address = {Z{\"u}rich}, issn = {1660-1769}, doi = {10.5381/jot.2022.21.2.a6}, pages = {1 -- 15}, year = {2022}, abstract = {Ideally, programs are partitioned into independently maintainable and understandable modules. As a system grows, its architecture gradually loses the capability to accommodate new concepts in a modular way. While refactoring is expensive and not always possible, and the programming language might lack dedicated primary language constructs to express certain cross-cutting concerns, programmers are still able to explain and delineate convoluted concepts through secondary means: code comments, use of whitespace and arrangement of code, documentation, or communicating tacit knowledge.
Secondary constructs are easy to change and provide high flexibility in communicating cross-cutting concerns and other concepts among programmers. However, such secondary constructs usually have no reified representation that can be explored and manipulated as first-class entities through the programming environment.
In this exploratory work, we discuss novel ways to express a wide range of concepts, including cross-cutting concerns, patterns, and lifecycle artifacts independently of the dominant decomposition imposed by an existing architecture. We propose the representation of concepts as first-class objects inside the programming environment that retain the capability to change as easily as code comments. We explore new tools that allow programmers to view, navigate, and change programs based on conceptual perspectives. In a small case study, we demonstrate how such views can be created and how the programming experience changes from draining programmers' attention by stretching it across multiple modules toward focusing it on cohesively presented concepts. Our designs are geared toward facilitating multiple secondary perspectives on a system to co-exist in symbiosis with the original architecture, hence making it easier to explore, understand, and explain complex contexts and narratives that are hard or impossible to express using primary modularity constructs.}, language = {en} } @article{SchmidlPapenbrock2022, author = {Schmidl, Sebastian and Papenbrock, Thorsten}, title = {Efficient distributed discovery of bidirectional order dependencies}, series = {The VLDB journal}, volume = {31}, journal = {The VLDB journal}, number = {1}, publisher = {Springer}, address = {Berlin ; Heidelberg ; New York}, issn = {1066-8888}, doi = {10.1007/s00778-021-00683-4}, pages = {49 -- 74}, year = {2022}, abstract = {Bidirectional order dependencies (bODs) capture order relationships between lists of attributes in a relational table. They can express that, for example, sorting books by publication date in ascending order also sorts them by age in descending order. The knowledge about order relationships is useful for many data management tasks, such as query optimization, data cleaning, or consistency checking. Because the bODs of a specific dataset are usually not explicitly given, they need to be discovered. The discovery of all minimal bODs (in set-based canonical form) is a task with exponential complexity in the number of attributes, though, which is why existing bOD discovery algorithms cannot process datasets of practically relevant size in a reasonable time. In this paper, we propose the distributed bOD discovery algorithm DISTOD, whose execution time scales with the available hardware. DISTOD is a scalable, robust, and elastic bOD discovery approach that combines efficient pruning techniques for bOD candidates in set-based canonical form with a novel, reactive, and distributed search strategy. Our evaluation on various datasets shows that DISTOD outperforms both single-threaded and distributed state-of-the-art bOD discovery algorithms by up to orders of magnitude; it can, in particular, process much larger datasets.}, language = {en} } @article{Schlosser2022, author = {Schlosser, Rainer}, title = {Heuristic mean-variance optimization in Markov decision processes using state-dependent risk aversion}, series = {IMA journal of management mathematics / Institute of Mathematics and Its Applications}, volume = {33}, journal = {IMA journal of management mathematics / Institute of Mathematics and Its Applications}, number = {2}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1471-678X}, doi = {10.1093/imaman/dpab009}, pages = {181 -- 199}, year = {2022}, abstract = {In dynamic decision problems, it is challenging to find the right balance between maximizing expected rewards and minimizing risks. In this paper, we consider NP-hard mean-variance (MV) optimization problems in Markov decision processes with a finite time horizon. We present a heuristic approach to solve MV problems, which is based on state-dependent risk aversion and efficient dynamic programming techniques. Our approach can also be applied to mean-semivariance (MSV) problems, which particularly focus on the downside risk. We demonstrate the applicability and the effectiveness of our heuristic for dynamic pricing applications. Using reproducible examples, we show that our approach outperforms existing state-of-the-art benchmark models for MV and MSV problems while also providing competitive runtimes. Further, compared to models based on constant risk levels, we find that state-dependent risk aversion allows to more effectively intervene in case sales processes deviate from their planned paths. Our concepts are domain independent, easy to implement and of low computational complexity.}, language = {en} } @article{AndreeIhdeWeskeetal.2022, author = {Andree, Kerstin and Ihde, Sven and Weske, Mathias and Pufahl, Luise}, title = {An exception handling framework for case management}, series = {Software and Systems Modeling}, volume = {21}, journal = {Software and Systems Modeling}, number = {3}, publisher = {Springer}, address = {Heidelberg}, issn = {1619-1366}, doi = {10.1007/s10270-022-00993-3}, pages = {939 -- 962}, year = {2022}, abstract = {In order to achieve their business goals, organizations heavily rely on the operational excellence of their business processes. In traditional scenarios, business processes are usually well-structured, clearly specifying when and how certain tasks have to be executed. Flexible and knowledge-intensive processes are gathering momentum, where a knowledge worker drives the execution of a process case and determines the exact process path at runtime. In the case of an exception, the knowledge worker decides on an appropriate handling. While there is initial work on exception handling in well-structured business processes, exceptions in case management have not been sufficiently researched. This paper proposes an exception handling framework for stage-oriented case management languages, namely Guard Stage Milestone Model, Case Management Model and Notation, and Fragment-based Case Management. The effectiveness of the framework is evaluated with two real-world use cases showing that it covers all relevant exceptions and proposed handling strategies.}, language = {en} } @article{BanoMichaelRumpeetal.2022, author = {Bano, Dorina and Michael, Judith and Rumpe, Bernhard and Varga, Simon and Weske, Mathias}, title = {Process-aware digital twin cockpit synthesis from event logs}, series = {Journal of computer languages}, volume = {70}, journal = {Journal of computer languages}, publisher = {Elsevier}, address = {Amsterdam [u.a.]}, issn = {2590-1184}, doi = {10.1016/j.cola.2022.101121}, pages = {19}, year = {2022}, abstract = {The engineering of digital twins and their user interaction parts with explicated processes, namely processaware digital twin cockpits (PADTCs), is challenging due to the complexity of the systems and the need for information from different disciplines within the engineering process. Therefore, it is interesting to investigate how to facilitate their engineering by using already existing data, namely event logs, and reducing the number of manual steps for their engineering. Current research lacks systematic, automated approaches to derive process-aware digital twin cockpits even though some helpful techniques already exist in the areas of process mining and software engineering. Within this paper, we present a low-code development approach that reduces the amount of hand-written code needed and uses process mining techniques to derive PADTCs. We describe what models could be derived from event log data, which generative steps are needed for the engineering of PADTCs, and how process mining could be incorporated into the resulting application. This process is evaluated using the MIMIC III dataset for the creation of a PADTC prototype for an automated hospital transportation system. This approach can be used for early prototyping of PADTCs as it needs no hand-written code in the first place, but it still allows for the iterative evolvement of the application. This empowers domain experts to create their PADTC prototypes.}, language = {en} } @article{ReimannBuchheimSemmoetal.2022, author = {Reimann, Max and Buchheim, Benito and Semmo, Amir and D{\"o}llner, J{\"u}rgen and Trapp, Matthias}, title = {Controlling strokes in fast neural style transfer using content transforms}, series = {The Visual Computer}, volume = {38}, journal = {The Visual Computer}, number = {12}, publisher = {Springer}, address = {New York}, issn = {0178-2789}, doi = {10.1007/s00371-022-02518-x}, pages = {4019 -- 4033}, year = {2022}, abstract = {Fast style transfer methods have recently gained popularity in art-related applications as they make a generalized real-time stylization of images practicable. However, they are mostly limited to one-shot stylizations concerning the interactive adjustment of style elements. In particular, the expressive control over stroke sizes or stroke orientations remains an open challenge. To this end, we propose a novel stroke-adjustable fast style transfer network that enables simultaneous control over the stroke size and intensity, and allows a wider range of expressive editing than current approaches by utilizing the scale-variance of convolutional neural networks. Furthermore, we introduce a network-agnostic approach for style-element editing by applying reversible input transformations that can adjust strokes in the stylized output. At this, stroke orientations can be adjusted, and warping-based effects can be applied to stylistic elements, such as swirls or waves. To demonstrate the real-world applicability of our approach, we present StyleTune, a mobile app for interactive editing of neural style transfers at multiple levels of control. Our app allows stroke adjustments on a global and local level. It furthermore implements an on-device patch-based upsampling step that enables users to achieve results with high output fidelity and resolutions of more than 20 megapixels. Our approach allows users to art-direct their creations and achieve results that are not possible with current style transfer applications.}, language = {en} } @article{BlaesiusFreibergerFriedrichetal.2022, author = {Bl{\"a}sius, Thomas and Freiberger, Cedric and Friedrich, Tobias and Katzmann, Maximilian and Montenegro-Retana, Felix and Thieffry, Marianne}, title = {Efficient Shortest Paths in Scale-Free Networks with Underlying Hyperbolic Geometry}, series = {ACM Transactions on Algorithms}, volume = {18}, journal = {ACM Transactions on Algorithms}, number = {2}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1549-6325}, doi = {10.1145/3516483}, pages = {1 -- 32}, year = {2022}, abstract = {A standard approach to accelerating shortest path algorithms on networks is the bidirectional search, which explores the graph from the start and the destination, simultaneously. In practice this strategy performs particularly well on scale-free real-world networks. Such networks typically have a heterogeneous degree distribution (e.g., a power-law distribution) and high clustering (i.e., vertices with a common neighbor are likely to be connected themselves). These two properties can be obtained by assuming an underlying hyperbolic geometry.
To explain the observed behavior of the bidirectional search, we analyze its running time on hyperbolic random graphs and prove that it is (O) over tilde (n(2-1/alpha) + n(1/(2 alpha)) + delta(max)) with high probability, where alpha is an element of (1/2, 1) controls the power-law exponent of the degree distribution, and dmax is the maximum degree. This bound is sublinear, improving the obvious worst-case linear bound. Although our analysis depends on the underlying geometry, the algorithm itself is oblivious to it.}, language = {en} } @article{BonifatiMiorNaumannetal.2022, author = {Bonifati, Angela and Mior, Michael J. and Naumann, Felix and Noack, Nele Sina}, title = {How inclusive are we?}, series = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, volume = {50}, journal = {SIGMOD record / Association for Computing Machinery, Special Interest Group on Management of Data}, number = {4}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {0163-5808}, doi = {10.1145/3516431.3516438}, pages = {30 -- 35}, year = {2022}, abstract = {ACM SIGMOD, VLDB and other database organizations have committed to fostering an inclusive and diverse community, as do many other scientific organizations. Recently, different measures have been taken to advance these goals, especially for underrepresented groups. One possible measure is double-blind reviewing, which aims to hide gender, ethnicity, and other properties of the authors.
We report the preliminary results of a gender diversity analysis of publications of the database community across several peer-reviewed venues, and also compare women's authorship percentages in both single-blind and double-blind venues along the years. We also obtained a cross comparison of the obtained results in data management with other relevant areas in Computer Science.}, language = {en} } @article{VerweijNeyThompson2022, author = {Verweij, Marco and Ney, Steven and Thompson, Michael}, title = {Cultural Theory's contributions to climate science}, series = {European journal for philosophy of science}, volume = {12}, journal = {European journal for philosophy of science}, number = {2}, publisher = {Springer}, address = {Dordrecht}, issn = {1879-4912}, doi = {10.1007/s13194-022-00464-y}, pages = {13}, year = {2022}, abstract = {In his article, 'Social constructionism and climate science denial', Hansson claims to present empirical evidence that the cultural theory developed by Dame Mary Douglas, Aaron Wildavsky and ourselves (among others) leads to (climate) science denial. In this reply, we show that there is no validity to these claims. First, we show that Hansson's empirical evidence that cultural theory has led to climate science denial falls apart under closer inspection. Contrary to Hansson's claims, cultural theory has made significant contributions to understanding and addressing climate change. Second, we discuss various features of Douglas' cultural theory that differentiate it from other constructivist approaches and make it compatible with the scientific method. Thus, we also demonstrate that cultural theory cannot be accused of epistemic relativism.}, language = {en} } @article{DoerrKoetzing2022, author = {Doerr, Benjamin and K{\"o}tzing, Timo}, title = {Lower bounds from fitness levels made easy}, series = {Algorithmica}, journal = {Algorithmica}, publisher = {Springer}, address = {New York}, issn = {0178-4617}, doi = {10.1007/s00453-022-00952-w}, pages = {29}, year = {2022}, abstract = {One of the first and easy to use techniques for proving run time bounds for evolutionary algorithms is the so-called method of fitness levels by Wegener. It uses a partition of the search space into a sequence of levels which are traversed by the algorithm in increasing order, possibly skipping levels. An easy, but often strong upper bound for the run time can then be derived by adding the reciprocals of the probabilities to leave the levels (or upper bounds for these). Unfortunately, a similarly effective method for proving lower bounds has not yet been established. The strongest such method, proposed by Sudholt (2013), requires a careful choice of the viscosity parameters gamma(i), j, 0 <= i < j <= n. In this paper we present two new variants of the method, one for upper and one for lower bounds. Besides the level leaving probabilities, they only rely on the probabilities that levels are visited at all. We show that these can be computed or estimated without greater difficulties and apply our method to reprove the following known results in an easy and natural way. (i) The precise run time of the (1+1) EA on LEADINGONES. (ii) A lower bound for the run time of the (1+1) EA on ONEMAX, tight apart from an O(n) term. (iii) A lower bound for the run time of the (1+1) EA on long k-paths (which differs slightly from the previous result due to a small error in the latter). We also prove a tighter lower bound for the run time of the (1+1) EA on jump functions by showing that, regardless of the jump size, only with probability O(2(-n)) the algorithm can avoid to jump over the valley of low fitness.}, language = {en} } @article{GevayRablBressetal.2022, author = {Gevay, Gabor E. and Rabl, Tilmann and Bress, Sebastian and Maclai-Tahy, Lorand and Quiane-Ruiz, Jorge-Arnulfo and Markl, Volker}, title = {Imperative or Functional Control Flow Handling: Why not the Best of Both Worlds?}, series = {SIGMOD record}, volume = {51}, journal = {SIGMOD record}, number = {1}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {0163-5808}, doi = {10.1109/ICDE51399.2021.00127}, pages = {60 -- 67}, year = {2022}, abstract = {Modern data analysis tasks often involve control flow statements, such as the iterations in PageRank and K-means. To achieve scalability, developers usually implement these tasks in distributed dataflow systems, such as Spark and Flink. Designers of such systems have to choose between providing imperative or functional control flow constructs to users. Imperative constructs are easier to use, but functional constructs are easier to compile to an efficient dataflow job. We propose Mitos, a system where control flow is both easy to use and efficient. Mitos relies on an intermediate representation based on the static single assignment form. This allows us to abstract away from specific control flow constructs and treat any imperative control flow uniformly both when building the dataflow job and when coordinating the distributed execution.}, language = {en} } @article{LewkowiczWohlbrandtBoettinger2022, author = {Lewkowicz, Daniel and Wohlbrandt, Attila M. and B{\"o}ttinger, Erwin}, title = {Digital therapeutic care apps with decision-support interventions for people with low back pain in Germany}, series = {JMIR mhealth and uhealth}, volume = {10}, journal = {JMIR mhealth and uhealth}, number = {2}, publisher = {JMIR Publications}, address = {Toronto}, issn = {2291-5222}, doi = {10.2196/35042}, pages = {17}, year = {2022}, abstract = {Background: Digital therapeutic care apps provide a new effective and scalable approach for people with nonspecific low back pain (LBP). Digital therapeutic care apps are also driven by personalized decision-support interventions that support the user in self-managing LBP, and may induce prolonged behavior change to reduce the frequency and intensity of pain episodes. However, these therapeutic apps are associated with high attrition rates, and the initial prescription cost is higher than that of face-to-face physiotherapy. In Germany, digital therapeutic care apps are now being reimbursed by statutory health insurance; however, price targets and cost-driving factors for the formation of the reimbursement rate remain unexplored. Objective: The aim of this study was to evaluate the cost-effectiveness of a digital therapeutic care app compared to treatment as usual (TAU) in Germany. We further aimed to explore under which circumstances the reimbursement rate could be modified to consider value-based pricing. Methods: We developed a state-transition Markov model based on a best-practice analysis of prior LBP-related decision-analytic models, and evaluated the cost utility of a digital therapeutic care app compared to TAU in Germany. Based on a 3-year time horizon, we simulated the incremental cost and quality-adjusted life years (QALYs) for people with nonacute LBP from the societal perspective. In the deterministic sensitivity and scenario analyses, we focused on diverging attrition rates and app cost to assess our model's robustness and conditions for changing the reimbursement rate. All costs are reported in Euro (euro1=US \$1.12). Results: Our base case results indicated that the digital therapeutic care strategy led to an incremental cost of euro121.59, but also generated 0.0221 additional QALYs compared to the TAU strategy, with an estimated incremental cost-effectiveness ratio (ICER) of euro5486 per QALY. The sensitivity analysis revealed that the reimbursement rate and the capability of digital therapeutic care to prevent reoccurring LBP episodes have a significant impact on the ICER. At the same time, the other parameters remained unaffected and thus supported the robustness of our model. In the scenario analysis, the different model time horizons and attrition rates strongly influenced the economic outcome. Reducing the cost of the app to euro99 per 3 months or decreasing the app's attrition rate resulted in digital therapeutic care being significantly less costly with more generated QALYs, and is thus considered to be the dominant strategy over TAU. Conclusions: The current reimbursement rate for a digital therapeutic care app in the statutory health insurance can be considered a cost-effective measure compared to TAU. The app's attrition rate and effect on the patient's prolonged behavior change essentially influence the settlement of an appropriate reimbursement rate. Future value-based pricing targets should focus on additional outcome parameters besides pain intensity and functional disability by including attrition rates and the app's long-term effect on quality of life.}, language = {en} } @article{RoostapourNeumannNeumannetal.2022, author = {Roostapour, Vahid and Neumann, Aneta and Neumann, Frank and Friedrich, Tobias}, title = {Pareto optimization for subset selection with dynamic cost constraints}, series = {Artificial intelligence}, volume = {302}, journal = {Artificial intelligence}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0004-3702}, doi = {10.1016/j.artint.2021.103597}, pages = {17}, year = {2022}, abstract = {We consider the subset selection problem for function f with constraint bound B that changes over time. Within the area of submodular optimization, various greedy approaches are commonly used. For dynamic environments we observe that the adaptive variants of these greedy approaches are not able to maintain their approximation quality. Investigating the recently introduced POMC Pareto optimization approach, we show that this algorithm efficiently computes a phi=(alpha(f)/2)(1 - 1/e(alpha)f)-approximation, where alpha(f) is the submodularity ratio of f, for each possible constraint bound b <= B. Furthermore, we show that POMC is able to adapt its set of solutions quickly in the case that B increases. Our experimental investigations for the influence maximization in social networks show the advantage of POMC over generalized greedy algorithms. We also consider EAMC, a new evolutionary algorithm with polynomial expected time guarantee to maintain phi approximation ratio, and NSGA-II with two different population sizes as advanced multi-objective optimization algorithm, to demonstrate their challenges in optimizing the maximum coverage problem. Our empirical analysis shows that, within the same number of evaluations, POMC is able to perform as good as NSGA-II under linear constraint, while EAMC performs significantly worse than all considered algorithms in most cases.}, language = {en} } @article{LangenhanJaegerBaumetal.2022, author = {Langenhan, Jennifer and Jaeger, Carsten and Baum, Katharina and Simon, Mareike and Lisec, Jan}, title = {A flexible tool to correct superimposed mass isotopologue distributions in GC-APCI-MS flux experiments}, series = {Metabolites}, volume = {12}, journal = {Metabolites}, number = {5}, publisher = {MDPI}, address = {Basel}, issn = {2218-1989}, doi = {10.3390/metabo12050408}, pages = {10}, year = {2022}, abstract = {The investigation of metabolic fluxes and metabolite distributions within cells by means of tracer molecules is a valuable tool to unravel the complexity of biological systems. Technological advances in mass spectrometry (MS) technology such as atmospheric pressure chemical ionization (APCI) coupled with high resolution (HR), not only allows for highly sensitive analyses but also broadens the usefulness of tracer-based experiments, as interesting signals can be annotated de novo when not yet present in a compound library. However, several effects in the APCI ion source, i.e., fragmentation and rearrangement, lead to superimposed mass isotopologue distributions (MID) within the mass spectra, which need to be corrected during data evaluation as they will impair enrichment calculation otherwise. Here, we present and evaluate a novel software tool to automatically perform such corrections. We discuss the different effects, explain the implemented algorithm, and show its application on several experimental datasets. This adjustable tool is available as an R package from CRAN.}, language = {en} } @article{SinnGieseStuiveretal.2022, author = {Sinn, Ludwig R. and Giese, Sven Hans-Joachim and Stuiver, Marchel and Rappsilber, Juri}, title = {Leveraging parameter dependencies in high-field asymmetric waveform ion-mobility spectrometry and size exclusion chromatography for proteome-wide cross-linking mass spectrometry}, series = {Analytical chemistry : the authoritative voice of the analytical community}, volume = {94}, journal = {Analytical chemistry : the authoritative voice of the analytical community}, number = {11}, publisher = {American Chemical Society}, address = {Columbus, Ohio}, issn = {0003-2700}, doi = {10.1021/acs.analchem.1c04373}, pages = {4627 -- 4634}, year = {2022}, abstract = {Ion-mobility spectrometry shows great promise to tackle analytically challenging research questions by adding another separation dimension to liquid chromatography-mass spectrometry. The understanding of how analyte properties influence ion mobility has increased through recent studies, but no clear rationale for the design of customized experimental settings has emerged. Here, we leverage machine learning to deepen our understanding of field asymmetric waveform ion-mobility spectrometry for the analysis of cross-linked peptides. Knowing that predominantly m/z and then the size and charge state of an analyte influence the separation, we found ideal compensation voltages correlating with the size exclusion chromatography fraction number. The effect of this relationship on the analytical depth can be substantial as exploiting it allowed us to almost double unique residue pair detections in a proteome-wide cross-linking experiment. Other applications involving liquid- and gas-phase separation may also benefit from considering such parameter dependencies.}, language = {en} } @misc{KonigorskiWernickeSlosareketal.2022, author = {Konigorski, Stefan and Wernicke, Sarah and Slosarek, Tamara and Zenner, Alexander M. and Strelow, Nils and Ruether, Darius F. and Henschel, Florian and Manaswini, Manisha and Pottb{\"a}cker, Fabian and Edelman, Jonathan A. and Owoyele, Babajide and Danieletto, Matteo and Golden, Eddye and Zweig, Micol and Nadkarni, Girish N. and B{\"o}ttinger, Erwin}, title = {StudyU: a platform for designing and conducting innovative digital N-of-1 trials}, series = {Journal of medical internet research}, volume = {24}, journal = {Journal of medical internet research}, number = {7}, publisher = {Healthcare World}, address = {Richmond, Va.}, issn = {1439-4456}, doi = {10.2196/35884}, pages = {12}, year = {2022}, abstract = {N-of-1 trials are the gold standard study design to evaluate individual treatment effects and derive personalized treatment strategies. Digital tools have the potential to initiate a new era of N-of-1 trials in terms of scale and scope, but fully functional platforms are not yet available. Here, we present the open source StudyU platform, which includes the StudyU Designer and StudyU app. With the StudyU Designer, scientists are given a collaborative web application to digitally specify, publish, and conduct N-of-1 trials. The StudyU app is a smartphone app with innovative user-centric elements for participants to partake in trials published through the StudyU Designer to assess the effects of different interventions on their health. Thereby, the StudyU platform allows clinicians and researchers worldwide to easily design and conduct digital N-of-1 trials in a safe manner. We envision that StudyU can change the landscape of personalized treatments both for patients and healthy individuals, democratize and personalize evidence generation for self-optimization and medicine, and can be integrated in clinical practice.}, language = {en} } @article{WeinsteinCehMeineletal.2022, author = {Weinstein, Theresa Julia and Ceh, Simon Majed and Meinel, Christoph and Benedek, Mathias}, title = {What's creative about sentences?}, series = {Creativity Research Journal}, volume = {34}, journal = {Creativity Research Journal}, number = {4}, publisher = {Routledge, Taylor \& Francis Group}, address = {Abingdon}, issn = {1040-0419}, doi = {10.1080/10400419.2022.2124777}, pages = {419 -- 430}, year = {2022}, abstract = {Evaluating creativity of verbal responses or texts is a challenging task due to psychometric issues associated with subjective ratings and the peculiarities of textual data. We explore an approach to objectively assess the creativity of responses in a sentence generation task to 1) better understand what language-related aspects are valued by human raters and 2) further advance the developments toward automating creativity evaluations. Over the course of two prior studies, participants generated 989 four-word sentences based on a four-letter prompt with the instruction to be creative. We developed an algorithm that scores each sentence on eight different metrics including 1) general word infrequency, 2) word combination infrequency, 3) context-specific word uniqueness, 4) syntax uniqueness, 5) rhyme, 6) phonetic similarity, and similarity of 7) sequence spelling and 8) semantic meaning to the cue. The text metrics were then used to explain the averaged creativity ratings of eight human raters. We found six metrics to be significantly correlated with the human ratings, explaining a total of 16\% of their variance. We conclude that the creative impression of sentences is partly driven by different aspects of novelty in word choice and syntax, as well as rhythm and sound, which are amenable to objective assessment.}, language = {en} } @article{SchneiderWenigPapenbrock2021, author = {Schneider, Johannes and Wenig, Phillip and Papenbrock, Thorsten}, title = {Distributed detection of sequential anomalies in univariate time series}, series = {The VLDB journal : the international journal on very large data bases}, volume = {30}, journal = {The VLDB journal : the international journal on very large data bases}, number = {4}, publisher = {Springer}, address = {Berlin}, issn = {1066-8888}, doi = {10.1007/s00778-021-00657-6}, pages = {579 -- 602}, year = {2021}, abstract = {The automated detection of sequential anomalies in time series is an essential task for many applications, such as the monitoring of technical systems, fraud detection in high-frequency trading, or the early detection of disease symptoms. All these applications require the detection to find all sequential anomalies possibly fast on potentially very large time series. In other words, the detection needs to be effective, efficient and scalable w.r.t. the input size. Series2Graph is an effective solution based on graph embeddings that are robust against re-occurring anomalies and can discover sequential anomalies of arbitrary length and works without training data. Yet, Series2Graph is no t scalable due to its single-threaded approach; it cannot, in particular, process arbitrarily large sequences due to the memory constraints of a single machine. In this paper, we propose our distributed anomaly detection system, short DADS, which is an efficient and scalable adaptation of Series2Graph. Based on the actor programming model, DADS distributes the input time sequence, intermediate state and the computation to all processors of a cluster in a way that minimizes communication costs and synchronization barriers. Our evaluation shows that DADS is orders of magnitude faster than S2G, scales almost linearly with the number of processors in the cluster and can process much larger input sequences due to its scale-out property.}, language = {en} } @article{CopeBaukmannKlingeretal.2021, author = {Cope, Justin L. and Baukmann, Hannes A. and Klinger, J{\"o}rn E. and Ravarani, Charles N. J. and B{\"o}ttinger, Erwin and Konigorski, Stefan and Schmidt, Marco F.}, title = {Interaction-based feature selection algorithm outperforms polygenic risk score in predicting Parkinson's Disease status}, series = {Frontiers in genetics}, volume = {12}, journal = {Frontiers in genetics}, publisher = {Frontiers Media}, address = {Lausanne}, issn = {1664-8021}, doi = {10.3389/fgene.2021.744557}, pages = {9}, year = {2021}, abstract = {Polygenic risk scores (PRS) aggregating results from genome-wide association studies are the state of the art in the prediction of susceptibility to complex traits or diseases, yet their predictive performance is limited for various reasons, not least of which is their failure to incorporate the effects of gene-gene interactions. Novel machine learning algorithms that use large amounts of data promise to find gene-gene interactions in order to build models with better predictive performance than PRS. Here, we present a data preprocessing step by using data-mining of contextual information to reduce the number of features, enabling machine learning algorithms to identify gene-gene interactions. We applied our approach to the Parkinson's Progression Markers Initiative (PPMI) dataset, an observational clinical study of 471 genotyped subjects (368 cases and 152 controls). With an AUC of 0.85 (95\% CI = [0.72; 0.96]), the interaction-based prediction model outperforms the PRS (AUC of 0.58 (95\% CI = [0.42; 0.81])). Furthermore, feature importance analysis of the model provided insights into the mechanism of Parkinson's disease. For instance, the model revealed an interaction of previously described drug target candidate genes TMEM175 and GAPDHP25. These results demonstrate that interaction-based machine learning models can improve genetic prediction models and might provide an answer to the missing heritability problem.}, language = {en} } @article{Konigorski2021, author = {Konigorski, Stefan}, title = {Causal inference in developmental medicine and neurology}, series = {Developmental medicine and child neurology}, volume = {63}, journal = {Developmental medicine and child neurology}, number = {5}, publisher = {Wiley-Blackwell}, address = {Oxford}, issn = {0012-1622}, doi = {10.1111/dmcn.14813}, pages = {498 -- 498}, year = {2021}, language = {en} } @article{ChromikPirlBeilharzetal.2021, author = {Chromik, Jonas and Pirl, Lukas and Beilharz, Jossekin Jakob and Arnrich, Bert and Polze, Andreas}, title = {Certainty in QRS detection with artificial neural networks}, series = {Biomedical signal processing and control}, volume = {68}, journal = {Biomedical signal processing and control}, publisher = {Elsevier}, address = {Oxford}, issn = {1746-8094}, doi = {10.1016/j.bspc.2021.102628}, pages = {12}, year = {2021}, abstract = {Detection of the QRS complex is a long-standing topic in the context of electrocardiography and many algorithms build upon the knowledge of the QRS positions. Although the first solutions to this problem were proposed in the 1970s and 1980s, there is still potential for improvements. Advancements in neural network technology made in recent years also lead to the emergence of enhanced QRS detectors based on artificial neural networks. In this work, we propose a method for assessing the certainty that is in each of the detected QRS complexes, i.e. how confident the QRS detector is that there is, in fact, a QRS complex in the position where it was detected. We further show how this metric can be utilised to distinguish correctly detected QRS complexes from false detections.}, language = {en} } @article{PfitznerSteckhanArnrich2021, author = {Pfitzner, Bjarne and Steckhan, Nico and Arnrich, Bert}, title = {Federated learning in a medical context}, series = {ACM transactions on internet technology : TOIT / Association for Computing}, volume = {21}, journal = {ACM transactions on internet technology : TOIT / Association for Computing}, number = {2}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1533-5399}, doi = {10.1145/3412357}, pages = {1 -- 31}, year = {2021}, abstract = {Data privacy is a very important issue. Especially in fields like medicine, it is paramount to abide by the existing privacy regulations to preserve patients' anonymity. However, data is required for research and training machine learning models that could help gain insight into complex correlations or personalised treatments that may otherwise stay undiscovered. Those models generally scale with the amount of data available, but the current situation often prohibits building large databases across sites. So it would be beneficial to be able to combine similar or related data from different sites all over the world while still preserving data privacy. Federated learning has been proposed as a solution for this, because it relies on the sharing of machine learning models, instead of the raw data itself. That means private data never leaves the site or device it was collected on. Federated learning is an emerging research area, and many domains have been identified for the application of those methods. This systematic literature review provides an extensive look at the concept of and research into federated learning and its applicability for confidential healthcare datasets.}, language = {en} } @article{LambersOrejas2021, author = {Lambers, Leen and Orejas, Fernando}, title = {Transformation rules with nested application conditions}, series = {Theoretical computer science}, volume = {884}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2021.07.023}, pages = {44 -- 67}, year = {2021}, abstract = {Recently, initial conflicts were introduced in the framework of M-adhesive categories as an important optimization of critical pairs. In particular, they represent a proper subset such that each conflict is represented in a minimal context by a unique initial one. The theory of critical pairs has been extended in the framework of M-adhesive categories to rules with nested application conditions (ACs), restricting the applicability of a rule and generalizing the well-known negative application conditions. A notion of initial conflicts for rules with ACs does not exist yet. In this paper, on the one hand, we extend the theory of initial conflicts in the framework of M-adhesive categories to transformation rules with ACs. They represent a proper subset again of critical pairs for rules with ACs, and represent each conflict in a minimal context uniquely. They are moreover symbolic because we can show that in general no finite and complete set of conflicts for rules with ACs exists. On the other hand, we show that critical pairs are minimally M-complete, whereas initial conflicts are minimally complete. Finally, we introduce important special cases of rules with ACs for which we can obtain finite, minimally (M-)complete sets of conflicts.}, language = {en} } @article{Henkenjohann2021, author = {Henkenjohann, Richard}, title = {Role of individual motivations and privacy concerns in the adoption of German electronic patient record apps}, series = {International journal of environmental research and public health : IJERPH / Molecular Diversity Preservation International}, volume = {18}, journal = {International journal of environmental research and public health : IJERPH / Molecular Diversity Preservation International}, number = {18}, publisher = {MDPI}, address = {Basel}, issn = {1660-4601}, doi = {10.3390/ijerph18189553}, pages = {31}, year = {2021}, abstract = {Germany's electronic patient record ("ePA") launched in 2021 with several attempts and years of delay. The development of such a large-scale project is a complex task, and so is its adoption. Individual attitudes towards an electronic health record are crucial, as individuals can reject opting-in to it and making any national efforts unachievable. Although the integration of an electronic health record serves potential benefits, it also constitutes risks for an individual's privacy. With a mixed-methods study design, this work provides evidence that different types of motivations and contextual privacy antecedents affect usage intentions towards the ePA. Most significantly, individual motivations stemming from feelings of volition or external mandates positively affect ePA adoption, although internal incentives are more powerful.}, language = {en} } @article{CaruccioDeufemiaNaumannetal.2021, author = {Caruccio, Loredana and Deufemia, Vincenzo and Naumann, Felix and Polese, Giuseppe}, title = {Discovering relaxed functional dependencies based on multi-attribute dominance}, series = {IEEE transactions on knowledge and data engineering}, volume = {33}, journal = {IEEE transactions on knowledge and data engineering}, number = {9}, publisher = {Institute of Electrical and Electronics Engineers}, address = {New York, NY}, issn = {1041-4347}, doi = {10.1109/TKDE.2020.2967722}, pages = {3212 -- 3228}, year = {2021}, abstract = {With the advent of big data and data lakes, data are often integrated from multiple sources. Such integrated data are often of poor quality, due to inconsistencies, errors, and so forth. One way to check the quality of data is to infer functional dependencies (fds). However, in many modern applications it might be necessary to extract properties and relationships that are not captured through fds, due to the necessity to admit exceptions, or to consider similarity rather than equality of data values. Relaxed fds (rfds) have been introduced to meet these needs, but their discovery from data adds further complexity to an already complex problem, also due to the necessity of specifying similarity and validity thresholds. We propose Domino, a new discovery algorithm for rfds that exploits the concept of dominance in order to derive similarity thresholds of attribute values while inferring rfds. An experimental evaluation on real datasets demonstrates the discovery performance and the effectiveness of the proposed algorithm.}, language = {en} } @article{KossmannPapenbrockNaumann2021, author = {Koßmann, Jan and Papenbrock, Thorsten and Naumann, Felix}, title = {Data dependencies for query optimization}, series = {The VLDB journal : the international journal on very large data bases / publ. on behalf of the VLDB Endowment}, volume = {31}, journal = {The VLDB journal : the international journal on very large data bases / publ. on behalf of the VLDB Endowment}, number = {1}, publisher = {Springer}, address = {Berlin ; Heidelberg ; New York}, issn = {1066-8888}, doi = {10.1007/s00778-021-00676-3}, pages = {1 -- 22}, year = {2021}, abstract = {Effective query optimization is a core feature of any database management system. While most query optimization techniques make use of simple metadata, such as cardinalities and other basic statistics, other optimization techniques are based on more advanced metadata including data dependencies, such as functional, uniqueness, order, or inclusion dependencies. This survey provides an overview, intuitive descriptions, and classifications of query optimization and execution strategies that are enabled by data dependencies. We consider the most popular types of data dependencies and focus on optimization strategies that target the optimization of relational database queries. The survey supports database vendors to identify optimization opportunities as well as DBMS researchers to find related work and open research questions.}, language = {en} } @article{QuinzanGoebelWagneretal.2021, author = {Quinzan, Francesco and G{\"o}bel, Andreas and Wagner, Markus and Friedrich, Tobias}, title = {Evolutionary algorithms and submodular functions}, series = {Natural computing : an innovative journal bridging biosciences and computer sciences ; an international journal}, volume = {20}, journal = {Natural computing : an innovative journal bridging biosciences and computer sciences ; an international journal}, number = {3}, publisher = {Springer Science + Business Media B.V.}, address = {Dordrecht}, issn = {1572-9796}, doi = {10.1007/s11047-021-09841-7}, pages = {561 -- 575}, year = {2021}, abstract = {A core operator of evolutionary algorithms (EAs) is the mutation. Recently, much attention has been devoted to the study of mutation operators with dynamic and non-uniform mutation rates. Following up on this area of work, we propose a new mutation operator and analyze its performance on the (1 + 1) Evolutionary Algorithm (EA). Our analyses show that this mutation operator competes with pre-existing ones, when used by the (1 + 1) EA on classes of problems for which results on the other mutation operators are available. We show that the (1 + 1) EA using our mutation operator finds a (1/3)-approximation ratio on any non-negative submodular function in polynomial time. We also consider the problem of maximizing a symmetric submodular function under a single matroid constraint and show that the (1 + 1) EA using our operator finds a (1/3)-approximation within polynomial time. This performance matches that of combinatorial local search algorithms specifically designed to solve these problems and outperforms them with constant probability. Finally, we evaluate the performance of the (1 + 1) EA using our operator experimentally by considering two applications: (a) the maximum directed cut problem on real-world graphs of different origins, with up to 6.6 million vertices and 56 million edges and (b) the symmetric mutual information problem using a four month period air pollution data set. In comparison with uniform mutation and a recently proposed dynamic scheme, our operator comes out on top on these instances.}, language = {en} } @article{OosthoekDoerr2021, author = {Oosthoek, Kris and D{\"o}rr, Christian}, title = {Cyber security threats to bitcoin exchanges}, series = {IEEE transactions on network and service management : a publication of the IEEE}, volume = {18}, journal = {IEEE transactions on network and service management : a publication of the IEEE}, number = {2}, publisher = {IEEE}, address = {New York}, issn = {1932-4537}, doi = {10.1109/TNSM.2020.3046145}, pages = {1616 -- 1628}, year = {2021}, abstract = {Bitcoin is gaining traction as an alternative store of value. Its market capitalization transcends all other cryptocurrencies in the market. But its high monetary value also makes it an attractive target to cyber criminal actors. Hacking campaigns usually target an ecosystem's weakest points. In Bitcoin, the exchange platforms are one of them. Each exchange breach is a threat not only to direct victims, but to the credibility of Bitcoin's entire ecosystem. Based on an extensive analysis of 36 breaches of Bitcoin exchanges, we show the attack patterns used to exploit Bitcoin exchange platforms using an industry standard for reporting intelligence on cyber security breaches. Based on this we are able to provide an overview of the most common attack vectors, showing that all except three hacks were possible due to relatively lax security. We show that while the security regimen of Bitcoin exchanges is subpar compared to other financial service providers, the use of stolen credentials, which does not require any hacking, is decreasing. We also show that the amount of BTC taken during a breach is decreasing, as well as the exchanges that terminate after being breached. Furthermore we show that overall security posture has improved, but still has major flaws. To discover adversarial methods post-breach, we have analyzed two cases of BTC laundering. Through this analysis we provide insight into how exchange platforms with lax cyber security even further increase the intermediary risk introduced by them into the Bitcoin ecosystem.}, language = {en} } @article{SchneiderLambersOrejas2021, author = {Schneider, Sven and Lambers, Leen and Orejas, Fernando}, title = {A logic-based incremental approach to graph repair featuring delta preservation}, series = {International journal on software tools for technology transfer : STTT}, volume = {23}, journal = {International journal on software tools for technology transfer : STTT}, number = {3}, publisher = {Springer}, address = {Berlin ; Heidelberg}, issn = {1433-2779}, doi = {10.1007/s10009-020-00584-x}, pages = {369 -- 410}, year = {2021}, abstract = {We introduce a logic-based incremental approach to graph repair, generating a sound and complete (upon termination) overview of least-changing graph repairs from which a user may select a graph repair based on non-formalized further requirements. This incremental approach features delta preservation as it allows to restrict the generation of graph repairs to delta-preserving graph repairs, which do not revert the additions and deletions of the most recent consistency-violating graph update. We specify consistency of graphs using the logic of nested graph conditions, which is equivalent to first-order logic on graphs. Technically, the incremental approach encodes if and how the graph under repair satisfies a graph condition using the novel data structure of satisfaction trees, which are adapted incrementally according to the graph updates applied. In addition to the incremental approach, we also present two state-based graph repair algorithms, which restore consistency of a graph independent of the most recent graph update and which generate additional graph repairs using a global perspective on the graph under repair. We evaluate the developed algorithms using our prototypical implementation in the tool AutoGraph and illustrate our incremental approach using a case study from the graph database domain.}, language = {en} } @article{GoebelLagodzinskiSeidel2021, author = {G{\"o}bel, Andreas and Lagodzinski, Gregor J. A. and Seidel, Karen}, title = {Counting homomorphisms to trees modulo a prime}, series = {ACM transactions on computation theory : TOCT / Association for Computing Machinery}, volume = {13}, journal = {ACM transactions on computation theory : TOCT / Association for Computing Machinery}, number = {3}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1942-3454}, doi = {10.1145/3460958}, pages = {1 -- 33}, year = {2021}, abstract = {Many important graph-theoretic notions can be encoded as counting graph homomorphism problems, such as partition functions in statistical physics, in particular independent sets and colourings. In this article, we study the complexity of \#(p) HOMSTOH, the problem of counting graph homomorphisms from an input graph to a graph H modulo a prime number p. Dyer and Greenhill proved a dichotomy stating that the tractability of non-modular counting graph homomorphisms depends on the structure of the target graph. Many intractable cases in non-modular counting become tractable in modular counting due to the common phenomenon of cancellation. In subsequent studies on counting modulo 2, however, the influence of the structure of H on the tractability was shown to persist, which yields similar dichotomies.
Our main result states that for every tree H and every prime p the problem \#pHOMSTOH is either polynomial time computable or \#P-p-complete. This relates to the conjecture of Faben and Jerrum stating that this dichotomy holds for every graph H when counting modulo 2. In contrast to previous results on modular counting, the tractable cases of \#pHOMSTOH are essentially the same for all values of the modulo when H is a tree. To prove this result, we study the structural properties of a homomorphism. As an important interim result, our study yields a dichotomy for the problem of counting weighted independent sets in a bipartite graph modulo some prime p. These results are the first suggesting that such dichotomies hold not only for the modulo 2 case but also for the modular counting functions of all primes p.}, language = {en} } @article{Perscheid2021, author = {Perscheid, Cindy}, title = {Integrative biomarker detection on high-dimensional gene expression data sets}, series = {Briefings in bioinformatics}, volume = {22}, journal = {Briefings in bioinformatics}, number = {3}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1467-5463}, doi = {10.1093/bib/bbaa151}, pages = {18}, year = {2021}, abstract = {Gene expression data provide the expression levels of tens of thousands of genes from several hundred samples. These data are analyzed to detect biomarkers that can be of prognostic or diagnostic use. Traditionally, biomarker detection for gene expression data is the task of gene selection. The vast number of genes is reduced to a few relevant ones that achieve the best performance for the respective use case. Traditional approaches select genes based on their statistical significance in the data set. This results in issues of robustness, redundancy and true biological relevance of the selected genes. Integrative analyses typically address these shortcomings by integrating multiple data artifacts from the same objects, e.g. gene expression and methylation data. When only gene expression data are available, integrative analyses instead use curated information on biological processes from public knowledge bases. With knowledge bases providing an ever-increasing amount of curated biological knowledge, such prior knowledge approaches become more powerful. This paper provides a thorough overview on the status quo of biomarker detection on gene expression data with prior biological knowledge. We discuss current shortcomings of traditional approaches, review recent external knowledge bases, provide a classification and qualitative comparison of existing prior knowledge approaches and discuss open challenges for this kind of gene selection.}, language = {en} } @article{LosterKoumarelasNaumann2021, author = {Loster, Michael and Koumarelas, Ioannis and Naumann, Felix}, title = {Knowledge transfer for entity resolution with siamese neural networks}, series = {ACM journal of data and information quality}, volume = {13}, journal = {ACM journal of data and information quality}, number = {1}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1936-1955}, doi = {10.1145/3410157}, pages = {25}, year = {2021}, abstract = {The integration of multiple data sources is a common problem in a large variety of applications. Traditionally, handcrafted similarity measures are used to discover, merge, and integrate multiple representations of the same entity-duplicates-into a large homogeneous collection of data. Often, these similarity measures do not cope well with the heterogeneity of the underlying dataset. In addition, domain experts are needed to manually design and configure such measures, which is both time-consuming and requires extensive domain expertise.
We propose a deep Siamese neural network, capable of learning a similarity measure that is tailored to the characteristics of a particular dataset. With the properties of deep learning methods, we are able to eliminate the manual feature engineering process and thus considerably reduce the effort required for model construction. In addition, we show that it is possible to transfer knowledge acquired during the deduplication of one dataset to another, and thus significantly reduce the amount of data required to train a similarity measure. We evaluated our method on multiple datasets and compare our approach to state-of-the-art deduplication methods. Our approach outperforms competitors by up to +26 percent F-measure, depending on task and dataset. In addition, we show that knowledge transfer is not only feasible, but in our experiments led to an improvement in F-measure of up to +4.7 percent.}, language = {en} } @article{BjoerkHoelzleBoer2021, author = {Bj{\"o}rk, Jennie and H{\"o}lzle, Katharina and Boer, Harry}, title = {'What will we learn from the current crisis?'}, series = {Creativity and innovation management}, volume = {30}, journal = {Creativity and innovation management}, number = {2}, publisher = {Wiley-Blackwell}, address = {Oxford [u.a.]}, issn = {0963-1690}, doi = {10.1111/caim.12442}, pages = {231 -- 232}, year = {2021}, language = {en} } @article{BlaesiusFriedrichSchirneck2021, author = {Blaesius, Thomas and Friedrich, Tobias and Schirneck, Friedrich Martin}, title = {The complexity of dependency detection and discovery in relational databases}, series = {Theoretical computer science}, volume = {900}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2021.11.020}, pages = {79 -- 96}, year = {2021}, abstract = {Multi-column dependencies in relational databases come associated with two different computational tasks. The detection problem is to decide whether a dependency of a certain type and size holds in a given database, the discovery problem asks to enumerate all valid dependencies of that type. We settle the complexity of both of these problems for unique column combinations (UCCs), functional dependencies (FDs), and inclusion dependencies (INDs). We show that the detection of UCCs and FDs is W[2]-complete when parameterized by the solution size. The discovery of inclusion-wise minimal UCCs is proven to be equivalent under parsimonious reductions to the transversal hypergraph problem of enumerating the minimal hitting sets of a hypergraph. The discovery of FDs is equivalent to the simultaneous enumeration of the hitting sets of multiple input hypergraphs. We further identify the detection of INDs as one of the first natural W[3]-complete problems. The discovery of maximal INDs is shown to be equivalent to enumerating the maximal satisfying assignments of antimonotone, 3-normalized Boolean formulas.}, language = {en} } @article{DoerrKrejca2021, author = {Doerr, Benjamin and Krejca, Martin Stefan}, title = {A simplified run time analysis of the univariate marginal distribution algorithm on LeadingOnes}, series = {Theoretical computer science}, volume = {851}, journal = {Theoretical computer science}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0304-3975}, doi = {10.1016/j.tcs.2020.11.028}, pages = {121 -- 128}, year = {2021}, abstract = {With elementary means, we prove a stronger run time guarantee for the univariate marginal distribution algorithm (UMDA) optimizing the LEADINGONES benchmark function in the desirable regime with low genetic drift. If the population size is at least quasilinear, then, with high probability, the UMDA samples the optimum in a number of iterations that is linear in the problem size divided by the logarithm of the UMDA's selection rate. This improves over the previous guarantee, obtained by Dang and Lehre (2015) via the deep level-based population method, both in terms of the run time and by demonstrating further run time gains from small selection rates. Under similar assumptions, we prove a lower bound that matches our upper bound up to constant factors.}, language = {en} } @article{HaarmannHolfterPufahletal.2021, author = {Haarmann, Stephan and Holfter, Adrian and Pufahl, Luise and Weske, Mathias}, title = {Formal framework for checking compliance of data-driven case management}, series = {Journal on data semantics : JoDS}, volume = {10}, journal = {Journal on data semantics : JoDS}, number = {1-2}, publisher = {Springer}, address = {Heidelberg}, issn = {1861-2032}, doi = {10.1007/s13740-021-00120-3}, pages = {143 -- 163}, year = {2021}, abstract = {Business processes are often specified in descriptive or normative models. Both types of models should adhere to internal and external regulations, such as company guidelines or laws. Employing compliance checking techniques, it is possible to verify process models against rules. While traditionally compliance checking focuses on well-structured processes, we address case management scenarios. In case management, knowledge workers drive multi-variant and adaptive processes. Our contribution is based on the fragment-based case management approach, which splits a process into a set of fragments. The fragments are synchronized through shared data but can, otherwise, be dynamically instantiated and executed. We formalize case models using Petri nets. We demonstrate the formalization for design-time and run-time compliance checking and present a proof-of-concept implementation. The application of the implemented compliance checking approach to a use case exemplifies its effectiveness while designing a case model. The empirical evaluation on a set of case models for measuring the performance of the approach shows that rules can often be checked in less than a second.}, language = {en} } @article{DattaSachsFreitasdaCruzetal.2021, author = {Datta, Suparno and Sachs, Jan Philipp and Freitas da Cruz, Harry and Martensen, Tom and Bode, Philipp and Morassi Sasso, Ariane and Glicksberg, Benjamin S. and B{\"o}ttinger, Erwin}, title = {FIBER}, series = {JAMIA open}, volume = {4}, journal = {JAMIA open}, number = {3}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {2574-2531}, doi = {10.1093/jamiaopen/ooab048}, pages = {10}, year = {2021}, abstract = {Objectives: The development of clinical predictive models hinges upon the availability of comprehensive clinical data. Tapping into such resources requires considerable effort from clinicians, data scientists, and engineers. Specifically, these efforts are focused on data extraction and preprocessing steps required prior to modeling, including complex database queries. A handful of software libraries exist that can reduce this complexity by building upon data standards. However, a gap remains concerning electronic health records (EHRs) stored in star schema clinical data warehouses, an approach often adopted in practice. In this article, we introduce the FlexIBle EHR Retrieval (FIBER) tool: a Python library built on top of a star schema (i2b2) clinical data warehouse that enables flexible generation of modeling-ready cohorts as data frames. Materials and Methods: FIBER was developed on top of a large-scale star schema EHR database which contains data from 8 million patients and over 120 million encounters. To illustrate FIBER's capabilities, we present its application by building a heart surgery patient cohort with subsequent prediction of acute kidney injury (AKI) with various machine learning models. Results: Using FIBER, we were able to build the heart surgery cohort (n = 12 061), identify the patients that developed AKI (n = 1005), and automatically extract relevant features (n = 774). Finally, we trained machine learning models that achieved area under the curve values of up to 0.77 for this exemplary use case. Conclusion: FIBER is an open-source Python library developed for extracting information from star schema clinical data warehouses and reduces time-to-modeling, helping to streamline the clinical modeling process.}, language = {en} } @article{OliveiraCiabatiLouresdosSantosHsiouSchmaltzetal.2021, author = {Oliveira-Ciabati, Livia and Loures dos Santos, Luciane and Hsiou Schmaltz, Annie and Sasso, Ariane Morassi and Castro, Margaret de and Souza, Jo{\~a}o Paulo}, title = {Scientific sexism}, series = {Revista de sa{\´u}de p{\´u}blica : publication of the Faculdade de Sa{\´u}de P{\´u}blica da Universidade de S{\~a}o Paulo = Journal of public health}, volume = {55}, journal = {Revista de sa{\´u}de p{\´u}blica : publication of the Faculdade de Sa{\´u}de P{\´u}blica da Universidade de S{\~a}o Paulo = Journal of public health}, publisher = {Faculdade de Sa{\´u}de P{\´u}blica da Universidade de S{\~a}o Paulo}, address = {S{\~a}o Paulo}, issn = {1518-8787}, doi = {10.11606/s1518-8787.2021055002939}, pages = {12}, year = {2021}, abstract = {OBJECTIVE: To investigate gender inequity in the scientific production of the University of Sao Paulo. METHODS: Members of the University of Sao Paulo faculty are the study population. The Web of Science repository was the source of the publication metrics. We selected the measures: total publications and citations, average of citations per year and item, H-index, and history of citations between 1950 and 2019. We used the name of the faculty member as a proxy to the gender identity. We use descriptive statistics to characterize the metrics. We evaluated the scissors effect by selecting faculty members with a high H-index. The historical series of citations was projected until 2100. We carry out analyses for the general population and working time subgroups: less than 10 years, 10 to 20 years, and 20 years or more. RESULTS: Of the 8,325 faculty members, we included 3,067 (36.8\%). Among those included, 1,893 (61.7\%) were male and 1,174 (38.28\%) female. The male gender presented higher values in the publication metrics (average of articles: M = 67.0 versus F = 49.7; average of citations/year: M = 53.9 versus F = 35.9), and H-index (M = 14.5 versus F = 12.4). Among the 100 individuals with the highest H-index (>= 37), 83\% are male. The male curve grows faster in the historical series of citations, opening a difference between the groups whose separation is confirmed by the projection. DISCUSSION: Scientific production at the Universidade de Sao Paulo is subject to a gender bias. Two-thirds of the faculty are male, and hiring over the past few decades perpetuates this pattern. The large majority of high impact faculty members are male. CONCLUSION: Our analysis suggests that the Universidade de Sao Paulo will not overcome gender inequality in scientific production without substantive affirmative action. Development does not happen by chance but through choices that are affirmative, decisive, and long-term oriented.}, language = {en} } @article{ShekharReimannMayeretal.2021, author = {Shekhar, Sumit and Reimann, Max and Mayer, Maximilian and Semmo, Amir and Pasewaldt, Sebastian and D{\"o}llner, J{\"u}rgen and Trapp, Matthias}, title = {Interactive photo editing on smartphones via intrinsic decomposition}, series = {Computer graphics forum : journal of the European Association for Computer Graphics}, volume = {40}, journal = {Computer graphics forum : journal of the European Association for Computer Graphics}, publisher = {Blackwell}, address = {Oxford}, issn = {0167-7055}, doi = {10.1111/cgf.142650}, pages = {497 -- 510}, year = {2021}, abstract = {Intrinsic decomposition refers to the problem of estimating scene characteristics, such as albedo and shading, when one view or multiple views of a scene are provided. The inverse problem setting, where multiple unknowns are solved given a single known pixel-value, is highly under-constrained. When provided with correlating image and depth data, intrinsic scene decomposition can be facilitated using depth-based priors, which nowadays is easy to acquire with high-end smartphones by utilizing their depth sensors. In this work, we present a system for intrinsic decomposition of RGB-D images on smartphones and the algorithmic as well as design choices therein. Unlike state-of-the-art methods that assume only diffuse reflectance, we consider both diffuse and specular pixels. For this purpose, we present a novel specularity extraction algorithm based on a multi-scale intensity decomposition and chroma inpainting. At this, the diffuse component is further decomposed into albedo and shading components. We use an inertial proximal algorithm for non-convex optimization (iPiano) to ensure albedo sparsity. Our GPU-based visual processing is implemented on iOS via the Metal API and enables interactive performance on an iPhone 11 Pro. Further, a qualitative evaluation shows that we are able to obtain high-quality outputs. Furthermore, our proposed approach for specularity removal outperforms state-of-the-art approaches for real-world images, while our albedo and shading layer decomposition is faster than the prior work at a comparable output quality. Manifold applications such as recoloring, retexturing, relighting, appearance editing, and stylization are shown, each using the intrinsic layers obtained with our method and/or the corresponding depth data.}, language = {en} } @article{ChanJaladankiSomanietal.2021, author = {Chan, Lili and Jaladanki, Suraj K. and Somani, Sulaiman and Paranjpe, Ishan and Kumar, Arvind and Zhao, Shan and Kaufman, Lewis and Leisman, Staci and Sharma, Shuchita and He, John Cijiang and Murphy, Barbara and Fayad, Zahi A. and Levin, Matthew A. and B{\"o}ttinger, Erwin and Charney, Alexander W. and Glicksberg, Benjamin and Coca, Steven G. and Nadkarni, Girish N.}, title = {Outcomes of patients on maintenance dialysis hospitalized with COVID-19}, series = {Clinical journal of the American Society of Nephrology : CJASN}, volume = {16}, journal = {Clinical journal of the American Society of Nephrology : CJASN}, number = {3}, publisher = {American Society of Nephrology}, address = {Washington}, organization = {Mount Sinai Covid I}, issn = {1555-9041}, doi = {10.2215/CJN.12360720}, pages = {452 -- 455}, year = {2021}, language = {en} } @article{PrillWalterKrolikowskaetal.2021, author = {Prill, Robert and Walter, Marina and Kr{\´o}likowska, Aleksandra and Becker, Roland}, title = {A systematic review of diagnostic accuracy and clinical applications of wearable movement sensors for knee joint rehabilitation}, series = {Sensors}, volume = {21}, journal = {Sensors}, number = {24}, publisher = {MDPI}, address = {Basel}, issn = {1424-8220}, doi = {10.3390/s21248221}, pages = {14}, year = {2021}, abstract = {In clinical practice, only a few reliable measurement instruments are available for monitoring knee joint rehabilitation. Advances to replace motion capturing with sensor data measurement have been made in the last years. Thus, a systematic review of the literature was performed, focusing on the implementation, diagnostic accuracy, and facilitators and barriers of integrating wearable sensor technology in clinical practices based on a Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) statement. For critical appraisal, the COSMIN Risk of Bias tool for reliability and measurement of error was used. PUBMED, Prospero, Cochrane database, and EMBASE were searched for eligible studies. Six studies reporting reliability aspects in using wearable sensor technology at any point after knee surgery in humans were included. All studies reported excellent results with high reliability coefficients, high limits of agreement, or a few detectable errors. They used different or partly inappropriate methods for estimating reliability or missed reporting essential information. Therefore, a moderate risk of bias must be considered. Further quality criterion studies in clinical settings are needed to synthesize the evidence for providing transparent recommendations for the clinical use of wearable movement sensors in knee joint rehabilitation.}, language = {en} } @article{Boissier2021, author = {Boissier, Martin}, title = {Robust and budget-constrained encoding configurations for in-memory database systems}, series = {Proceedings of the VLDB Endowment}, volume = {15}, journal = {Proceedings of the VLDB Endowment}, number = {4}, publisher = {Association for Computing Machinery (ACM)}, address = {[New York]}, issn = {2150-8097}, doi = {10.14778/3503585.3503588}, pages = {780 -- 793}, year = {2021}, abstract = {Data encoding has been applied to database systems for decades as it mitigates bandwidth bottlenecks and reduces storage requirements. But even in the presence of these advantages, most in-memory database systems use data encoding only conservatively as the negative impact on runtime performance can be severe. Real-world systems with large parts being infrequently accessed and cost efficiency constraints in cloud environments require solutions that automatically and efficiently select encoding techniques, including heavy-weight compression. In this paper, we introduce workload-driven approaches to automaticaly determine memory budget-constrained encoding configurations using greedy heuristics and linear programming. We show for TPC-H, TPC-DS, and the Join Order Benchmark that optimized encoding configurations can reduce the main memory footprint significantly without a loss in runtime performance over state-of-the-art dictionary encoding. To yield robust selections, we extend the linear programming-based approach to incorporate query runtime constraints and mitigate unexpected performance regressions.}, language = {en} } @article{VitaglianoJiangNaumann2021, author = {Vitagliano, Gerardo and Jiang, Lan and Naumann, Felix}, title = {Detecting layout templates in complex multiregion files}, series = {Proceedings of the VLDB Endowment}, volume = {15}, journal = {Proceedings of the VLDB Endowment}, number = {3}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {2150-8097}, doi = {10.14778/3494124.3494145}, pages = {646 -- 658}, year = {2021}, abstract = {Spreadsheets are among the most commonly used file formats for data management, distribution, and analysis. Their widespread employment makes it easy to gather large collections of data, but their flexible canvas-based structure makes automated analysis difficult without heavy preparation. One of the common problems that practitioners face is the presence of multiple, independent regions in a single spreadsheet, possibly separated by repeated empty cells. We define such files as "multiregion" files. In collections of various spreadsheets, we can observe that some share the same layout. We present the Mondrian approach to automatically identify layout templates across multiple files and systematically extract the corresponding regions. Our approach is composed of three phases: first, each file is rendered as an image and inspected for elements that could form regions; then, using a clustering algorithm, the identified elements are grouped to form regions; finally, every file layout is represented as a graph and compared with others to find layout templates. We compare our method to state-of-the-art table recognition algorithms on two corpora of real-world enterprise spreadsheets. Our approach shows the best performances in detecting reliable region boundaries within each file and can correctly identify recurring layouts across files.}, language = {en} } @article{VaidChanChaudharyetal.2021, author = {Vaid, Akhil and Chan, Lili and Chaudhary, Kumardeep and Jaladanki, Suraj K. and Paranjpe, Ishan and Russak, Adam J. and Kia, Arash and Timsina, Prem and Levin, Matthew A. and He, John Cijiang and B{\"o}ttinger, Erwin and Charney, Alexander W. and Fayad, Zahi A. and Coca, Steven G. and Glicksberg, Benjamin S. and Nadkarni, Girish N.}, title = {Predictive approaches for acute dialysis requirement and death in COVID-19}, series = {Clinical journal of the American Society of Nephrology : CJASN}, volume = {16}, journal = {Clinical journal of the American Society of Nephrology : CJASN}, number = {8}, publisher = {American Society of Nephrology}, address = {Washington}, organization = {MSCIC}, issn = {1555-9041}, doi = {10.2215/CJN.17311120}, pages = {1158 -- 1168}, year = {2021}, abstract = {Background and objectives AKI treated with dialysis initiation is a common complication of coronavirus disease 2019 (COVID-19) among hospitalized patients. However, dialysis supplies and personnel are often limited. Design, setting, participants, \& measurements Using data from adult patients hospitalized with COVID-19 from five hospitals from theMount Sinai Health System who were admitted between March 10 and December 26, 2020, we developed and validated several models (logistic regression, Least Absolute Shrinkage and Selection Operator (LASSO), random forest, and eXtreme GradientBoosting [XGBoost; with and without imputation]) for predicting treatment with dialysis or death at various time horizons (1, 3, 5, and 7 days) after hospital admission. Patients admitted to theMount Sinai Hospital were used for internal validation, whereas the other hospitals formed part of the external validation cohort. Features included demographics, comorbidities, and laboratory and vital signs within 12 hours of hospital admission. Results A total of 6093 patients (2442 in training and 3651 in external validation) were included in the final cohort. Of the different modeling approaches used, XGBoost without imputation had the highest area under the receiver operating characteristic (AUROC) curve on internal validation (range of 0.93-0.98) and area under the precisionrecall curve (AUPRC; range of 0.78-0.82) for all time points. XGBoost without imputation also had the highest test parameters on external validation (AUROC range of 0.85-0.87, and AUPRC range of 0.27-0.54) across all time windows. XGBoost without imputation outperformed all models with higher precision and recall (mean difference in AUROC of 0.04; mean difference in AUPRC of 0.15). Features of creatinine, BUN, and red cell distribution width were major drivers of the model's prediction. Conclusions An XGBoost model without imputation for prediction of a composite outcome of either death or dialysis in patients positive for COVID-19 had the best performance, as compared with standard and other machine learning models.}, language = {en} } @article{BorchertMockTomczaketal.2021, author = {Borchert, Florian and Mock, Andreas and Tomczak, Aurelie and H{\"u}gel, Jonas and Alkarkoukly, Samer and Knurr, Alexander and Volckmar, Anna-Lena and Stenzinger, Albrecht and Schirmacher, Peter and Debus, J{\"u}rgen and J{\"a}ger, Dirk and Longerich, Thomas and Fr{\"o}hling, Stefan and Eils, Roland and Bougatf, Nina and Sax, Ulrich and Schapranow, Matthieu-Patrick}, title = {Knowledge bases and software support for variant interpretation in precision oncology}, series = {Briefings in bioinformatics}, volume = {22}, journal = {Briefings in bioinformatics}, number = {6}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1467-5463}, doi = {10.1093/bib/bbab134}, pages = {17}, year = {2021}, abstract = {Precision oncology is a rapidly evolving interdisciplinary medical specialty. Comprehensive cancer panels are becoming increasingly available at pathology departments worldwide, creating the urgent need for scalable cancer variant annotation and molecularly informed treatment recommendations. A wealth of mainly academia-driven knowledge bases calls for software tools supporting the multi-step diagnostic process. We derive a comprehensive list of knowledge bases relevant for variant interpretation by a review of existing literature followed by a survey among medical experts from university hospitals in Germany. In addition, we review cancer variant interpretation tools, which integrate multiple knowledge bases. We categorize the knowledge bases along the diagnostic process in precision oncology and analyze programmatic access options as well as the integration of knowledge bases into software tools. The most commonly used knowledge bases provide good programmatic access options and have been integrated into a range of software tools. For the wider set of knowledge bases, access options vary across different parts of the diagnostic process. Programmatic access is limited for information regarding clinical classifications of variants and for therapy recommendations. The main issue for databases used for biological classification of pathogenic variants and pathway context information is the lack of standardized interfaces. There is no single cancer variant interpretation tool that integrates all identified knowledge bases. Specialized tools are available and need to be further developed for different steps in the diagnostic process.}, language = {en} } @article{DellepianeVaidJaladankietal.2021, author = {Dellepiane, Sergio and Vaid, Akhil and Jaladanki, Suraj K. and Coca, Steven and Fayad, Zahi A. and Charney, Alexander W. and B{\"o}ttinger, Erwin and He, John Cijiang and Glicksberg, Benjamin S. and Chan, Lili and Nadkarni, Girish}, title = {Acute kidney injury in patients hospitalized with COVID-19 in New York City}, series = {Kidney medicine}, volume = {3}, journal = {Kidney medicine}, number = {5}, publisher = {Elsevier}, address = {Amsterdam}, issn = {2590-0595}, doi = {10.1016/j.xkme.2021.06.008}, pages = {877 -- 879}, year = {2021}, language = {en} } @article{ChanChaudharySahaetal.2021, author = {Chan, Lili and Chaudhary, Kumardeep and Saha, Aparna and Chauhan, Kinsuk and Vaid, Akhil and Zhao, Shan and Paranjpe, Ishan and Somani, Sulaiman and Richter, Felix and Miotto, Riccardo and Lala, Anuradha and Kia, Arash and Timsina, Prem and Li, Li and Freeman, Robert and Chen, Rong and Narula, Jagat and Just, Allan C. and Horowitz, Carol and Fayad, Zahi and Cordon-Cardo, Carlos and Schadt, Eric and Levin, Matthew A. and Reich, David L. and Fuster, Valentin and Murphy, Barbara and He, John C. and Charney, Alexander W. and B{\"o}ttinger, Erwin and Glicksberg, Benjamin and Coca, Steven G. and Nadkarni, Girish N.}, title = {AKI in hospitalized patients with COVID-19}, series = {Journal of the American Society of Nephrology : JASN}, volume = {32}, journal = {Journal of the American Society of Nephrology : JASN}, number = {1}, publisher = {American Society of Nephrology}, address = {Washington}, organization = {Mt Sinai COVID Informatics Ct}, issn = {1046-6673}, doi = {10.1681/ASN.2020050615}, pages = {151 -- 160}, year = {2021}, abstract = {Background: Early reports indicate that AKI is common among patients with coronavirus disease 2019 (COVID-19) and associatedwith worse outcomes. However, AKI among hospitalized patients with COVID19 in the United States is not well described. Methods: This retrospective, observational study involved a review of data from electronic health records of patients aged >= 18 years with laboratory-confirmed COVID-19 admitted to the Mount Sinai Health System from February 27 to May 30, 2020. We describe the frequency of AKI and dialysis requirement, AKI recovery, and adjusted odds ratios (aORs) with mortality. Results: Of 3993 hospitalized patients with COVID-19, AKI occurred in 1835 (46\%) patients; 347 (19\%) of the patientswith AKI required dialysis. The proportionswith stages 1, 2, or 3 AKIwere 39\%, 19\%, and 42\%, respectively. A total of 976 (24\%) patients were admitted to intensive care, and 745 (76\%) experienced AKI. Of the 435 patients with AKI and urine studies, 84\% had proteinuria, 81\% had hematuria, and 60\% had leukocyturia. Independent predictors of severe AKI were CKD, men, and higher serum potassium at admission. In-hospital mortality was 50\% among patients with AKI versus 8\% among those without AKI (aOR, 9.2; 95\% confidence interval, 7.5 to 11.3). Of survivors with AKI who were discharged, 35\% had not recovered to baseline kidney function by the time of discharge. An additional 28 of 77 (36\%) patients who had not recovered kidney function at discharge did so on posthospital follow-up. Conclusions: AKI is common among patients hospitalized with COVID-19 and is associated with high mortality. Of all patients with AKI, only 30\% survived with recovery of kidney function by the time of discharge.}, language = {en} } @article{KrausMathewStephenSchapranow2021, author = {Kraus, Sara Milena and Mathew-Stephen, Mariet and Schapranow, Matthieu-Patrick}, title = {Eatomics}, series = {Journal of proteome research}, volume = {20}, journal = {Journal of proteome research}, number = {1}, publisher = {American Chemical Society}, address = {Washington}, issn = {1535-3893}, doi = {10.1021/acs.jproteome.0c00398}, pages = {1070 -- 1078}, year = {2021}, abstract = {Quantitative proteomics data are becoming increasingly more available, and as a consequence are being analyzed and interpreted by a larger group of users. However, many of these users have less programming experience. Furthermore, experimental designs and setups are getting more complicated, especially when tissue biopsies are analyzed. Luckily, the proteomics community has already established some best practices on how to conduct quality control, differential abundance analysis and enrichment analysis. However, an easy-to-use application that wraps together all steps for the exploration and flexible analysis of quantitative proteomics data is not yet available. For Eatomics, we utilize the R Shiny framework to implement carefully chosen parts of established analysis workflows to (i) make them accessible in a user-friendly way, (ii) add a multitude of interactive exploration possibilities, and (iii) develop a unique experimental design setup module, which interactively translates a given research hypothesis into a differential abundance and enrichment analysis formula. In this, we aim to fulfill the needs of a growing group of inexperienced quantitative proteomics data analysts. Eatomics may be tested with demo data directly online via https://we.analyzegenomes.com/now/eatomics/or with the user's own data by installation from the Github repository at https://github.com/Millchmaedchen/Eatomics.}, language = {en} }