@misc{HerzogHoenigSchroederPreikschatetal.2019, author = {Herzog, Benedict and H{\"o}nig, Timo and Schr{\"o}der-Preikschat, Wolfgang and Plauth, Max and K{\"o}hler, Sven and Polze, Andreas}, title = {Bridging the Gap}, series = {e-Energy '19: Proceedings of the Tenth ACM International Conference on Future Energy Systems}, journal = {e-Energy '19: Proceedings of the Tenth ACM International Conference on Future Energy Systems}, publisher = {Association for Computing Machinery}, address = {New York}, isbn = {978-1-4503-6671-7}, doi = {10.1145/3307772.3330176}, pages = {428 -- 430}, year = {2019}, abstract = {The recent restructuring of the electricity grid (i.e., smart grid) introduces a number of challenges for today's large-scale computing systems. To operate reliable and efficient, computing systems must adhere not only to technical limits (i.e., thermal constraints) but they must also reduce operating costs, for example, by increasing their energy efficiency. Efforts to improve the energy efficiency, however, are often hampered by inflexible software components that hardly adapt to underlying hardware characteristics. In this paper, we propose an approach to bridge the gap between inflexible software and heterogeneous hardware architectures. Our proposal introduces adaptive software components that dynamically adapt to heterogeneous processing units (i.e., accelerators) during runtime to improve the energy efficiency of computing systems.}, language = {en} } @misc{PlauthPolze2018, author = {Plauth, Max and Polze, Andreas}, title = {Towards improving data transfer efficiency for accelerators using hardware compression}, series = {Sixth International Symposium on Computing and Networking Workshops (CANDARW)}, journal = {Sixth International Symposium on Computing and Networking Workshops (CANDARW)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-9184-7}, doi = {10.1109/CANDARW.2018.00031}, pages = {125 -- 131}, year = {2018}, abstract = {The overhead of moving data is the major limiting factor in todays hardware, especially in heterogeneous systems where data needs to be transferred frequently between host and accelerator memory. With the increasing availability of hardware-based compression facilities in modern computer architectures, this paper investigates the potential of hardware-accelerated I/O Link Compression as a promising approach to reduce data volumes and transfer time, thus improving the overall efficiency of accelerators in heterogeneous systems. Our considerations are focused on On-the-Fly compression in both Single-Node and Scale-Out deployments. Based on a theoretical analysis, this paper demonstrates the feasibility of hardware-accelerated On-the-Fly I/O Link Compression for many workloads in a Scale-Out scenario, and for some even in a Single-Node scenario. These findings are confirmed in a preliminary evaluation using software-and hardware-based implementations of the 842 compression algorithm.}, language = {en} } @misc{PlauthSterzEberhardtetal.2017, author = {Plauth, Max and Sterz, Christoph and Eberhardt, Felix and Feinbube, Frank and Polze, Andreas}, title = {Assessing NUMA performance based on hardware event counters}, series = {IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)}, journal = {IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)}, publisher = {Institute of Electrical and Electronics Engineers}, address = {New York}, isbn = {978-0-7695-6149-3}, issn = {2164-7062}, doi = {10.1109/IPDPSW.2017.51}, pages = {904 -- 913}, year = {2017}, abstract = {Cost models play an important role for the efficient implementation of software systems. These models can be embedded in operating systems and execution environments to optimize execution at run time. Even though non-uniform memory access (NUMA) architectures are dominating today's server landscape, there is still a lack of parallel cost models that represent NUMA system sufficiently. Therefore, the existing NUMA models are analyzed, and a two-step performance assessment strategy is proposed that incorporates low-level hardware counters as performance indicators. To support the two-step strategy, multiple tools are developed, all accumulating and enriching specific hardware event counter information, to explore, measure, and visualize these low-overhead performance indicators. The tools are showcased and discussed alongside specific experiments in the realm of performance assessment.}, language = {en} }