@article{FelgentreffPerscheidHirschfeld2017, author = {Felgentreff, Tim and Perscheid, Michael and Hirschfeld, Robert}, title = {Implementing record and refinement for debugging timing-dependent communication}, series = {Science of computer programming}, volume = {134}, journal = {Science of computer programming}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0167-6423}, doi = {10.1016/j.scico.2015.11.006}, pages = {4 -- 18}, year = {2017}, abstract = {Distributed applications are hard to debug because timing-dependent network communication is a source of non-deterministic behavior. Current approaches to debug non deterministic failures include post-mortem debugging as well as record and replay. However, the first impairs system performance to gather data, whereas the latter requires developers to understand the timing-dependent communication at a lower level of abstraction than they develop at. Furthermore, both approaches require intrusive core library modifications to gather data from live systems. In this paper, we present the Peek-At-Talk debugger for investigating non-deterministic failures with low overhead in a systematic, top-down method, with a particular focus on tool-building issues in the following areas: First, we show how our debugging framework Path Tools guides developers from failures to their root causes and gathers run-time data with low overhead. Second, we present Peek-At-Talk, an extension to our Path Tools framework to record non-deterministic communication and refine behavioral data that connects source code with network events. Finally, we scope changes to the core library to record network communication without impacting other network applications.}, language = {en} }