@article{PapeBolzHirschfeld2017, author = {Pape, Tobias and Bolz, Carl Friedrich and Hirschfeld, Robert}, title = {Adaptive just-in-time value class optimization for lowering memory consumption and improving execution time performance}, series = {Science of computer programming}, volume = {140}, journal = {Science of computer programming}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0167-6423}, doi = {10.1016/j.scico.2016.08.003}, pages = {17 -- 29}, year = {2017}, abstract = {The performance of value classes is highly dependent on how they are represented in the virtual machine. Value class instances are immutable, have no identity, and can only refer to other value objects or primitive values and since they should be very lightweight and fast, it is important to optimize them carefully. In this paper we present a technique to detect and compress common patterns of value class usage to improve memory usage and performance. The technique identifies patterns of frequent value object references and introduces abbreviated forms for them. This allows to store multiple inter-referenced value objects in an inlined memory representation, reducing the overhead stemming from meta data and object references. Applied to a small prototype and an implementation of the Racket language, we found improvements in memory usage and execution time for several micro-benchmarks. (C) 2016 Elsevier B.V. All rights reserved.}, language = {en} } @article{PapeFelgentreffHirschfeldetal.2016, author = {Pape, Tobias and Felgentreff, Tim and Hirschfeld, Robert and Gulenko, Anton and Bolz, Carl Friedrich}, title = {Language-independent Storage Strategies for Tracing-JIT-based Virtual Machines}, series = {ACM SIGPLAN notices}, volume = {51}, journal = {ACM SIGPLAN notices}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {0362-1340}, doi = {10.1145/2816707.2816716}, pages = {104 -- 113}, year = {2016}, abstract = {Storage strategies have been proposed as a run-time optimization for the PyPy Python implementation and have shown promising results for optimizing execution speed and memory requirements. However, it remained unclear whether the approach works equally well in other dynamic languages. Furthermore, while PyPy is based on RPython, a language to write VMs with reusable components such as a tracing just-in-time compiler and garbage collection, the strategies design itself was not generalized to be reusable across languages implemented using that same toolchain. In this paper, we present a general design and implementation for storage strategies and show how they can be reused across different RPython-based languages. We evaluate the performance of our implementation for RSqueak, an RPython-based VM for Squeak/Smalltalk and show that storage strategies may indeed off er performance benefits for certain workloads in other dynamic programming languages. We furthermore evaluate the generality of our implementation by applying it to Topaz, a Ruby VM, and Pycket, a Racket implementation.}, language = {en} } @article{BaumanBolzHirschfeldetal.2015, author = {Bauman, Spenser and Bolz, Carl Friedrich and Hirschfeld, Robert and Kirilichev, Vasily and Pape, Tobias and Siek, Jeremy G. and Tobin-Hochstadt, Sam}, title = {Pycket: A Tracing JIT for a Functional Language}, series = {ACM SIGPLAN notices}, volume = {50}, journal = {ACM SIGPLAN notices}, number = {9}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {0362-1340}, doi = {10.1145/2784731.2784740}, pages = {22 -- 34}, year = {2015}, abstract = {We present Pycket, a high-performance tracing JIT compiler for Racket. Pycket supports a wide variety of the sophisticated features in Racket such as contracts, continuations, classes, structures, dynamic binding, and more. On average, over a standard suite of benchmarks, Pycket outperforms existing compilers, both Racket's JIT and other highly-optimizing Scheme compilers. Further, Pycket provides much better performance for Racket proxies than existing systems, dramatically reducing the overhead of contracts and gradual typing. We validate this claim with performance evaluation on multiple existing benchmark suites. The Pycket implementation is of independent interest as an application of the RPython meta-tracing framework (originally created for PyPy), which automatically generates tracing JIT compilers from interpreters. Prior work on meta-tracing focuses on bytecode interpreters, whereas Pycket is a high-level interpreter based on the CEK abstract machine and operates directly on abstract syntax trees. Pycket supports proper tail calls and first-class continuations. In the setting of a functional language, where recursion and higher-order functions are more prevalent than explicit loops, the most significant performance challenge for a tracing JIT is identifying which control flows constitute a loop-we discuss two strategies for identifying loops and measure their impact.}, language = {en} } @book{WassermannFelgentreffPapeetal.2016, author = {Wassermann, Lars and Felgentreff, Tim and Pape, Tobias and Bolz, Carl Friedrich and Hirschfeld, Robert}, title = {Tracing Algorithmic Primitives in RSqueak/VM}, number = {104}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-355-8}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-91277}, publisher = {Universit{\"a}t Potsdam}, pages = {45}, year = {2016}, abstract = {When realizing a programming language as VM, implementing behavior as part of the VM, as primitive, usually results in reduced execution times. But supporting and developing primitive functions requires more effort than maintaining and using code in the hosted language since debugging is harder, and the turn-around times for VM parts are higher. Furthermore, source artifacts of primitive functions are seldom reused in new implementations of the same language. And if they are reused, the existing API usually is emulated, reducing the performance gains. Because of recent results in tracing dynamic compilation, the trade-off between performance and ease of implementation, reuse, and changeability might now be decided adversely. In this work, we investigate the trade-offs when creating primitives, and in particular how large a difference remains between primitive and hosted function run times in VMs with tracing just-in-time compiler. To that end, we implemented the algorithmic primitive BitBlt three times for RSqueak/VM. RSqueak/VM is a Smalltalk VM utilizing the PyPy RPython toolchain. We compare primitive implementations in C, RPython, and Smalltalk, showing that due to the tracing just-in-time compiler, the performance gap has lessened by one magnitude to one magnitude.}, language = {en} }