@phdthesis{Chen2023, author = {Chen, Junchao}, title = {A self-adaptive resilient method for implementing and managing the high-reliability processing system}, doi = {10.25932/publishup-58313}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-583139}, school = {Universit{\"a}t Potsdam}, pages = {XXIII, 167}, year = {2023}, abstract = {As a result of CMOS scaling, radiation-induced Single-Event Effects (SEEs) in electronic circuits became a critical reliability issue for modern Integrated Circuits (ICs) operating under harsh radiation conditions. SEEs can be triggered in combinational or sequential logic by the impact of high-energy particles, leading to destructive or non-destructive faults, resulting in data corruption or even system failure. Typically, the SEE mitigation methods are deployed statically in processing architectures based on the worst-case radiation conditions, which is most of the time unnecessary and results in a resource overhead. Moreover, the space radiation conditions are dynamically changing, especially during Solar Particle Events (SPEs). The intensity of space radiation can differ over five orders of magnitude within a few hours or days, resulting in several orders of magnitude fault probability variation in ICs during SPEs. This thesis introduces a comprehensive approach for designing a self-adaptive fault resilient multiprocessing system to overcome the static mitigation overhead issue. This work mainly addresses the following topics: (1) Design of on-chip radiation particle monitor for real-time radiation environment detection, (2) Investigation of space environment predictor, as support for solar particle events forecast, (3) Dynamic mode configuration in the resilient multiprocessing system. Therefore, according to detected and predicted in-flight space radiation conditions, the target system can be configured to use no mitigation or low-overhead mitigation during non-critical periods of time. The redundant resources can be used to improve system performance or save power. On the other hand, during increased radiation activity periods, such as SPEs, the mitigation methods can be dynamically configured appropriately depending on the real-time space radiation environment, resulting in higher system reliability. Thus, a dynamic trade-off in the target system between reliability, performance and power consumption in real-time can be achieved. All results of this work are evaluated in a highly reliable quad-core multiprocessing system that allows the self-adaptive setting of optimal radiation mitigation mechanisms during run-time. Proposed methods can serve as a basis for establishing a comprehensive self-adaptive resilient system design process. Successful implementation of the proposed design in the quad-core multiprocessor shows its application perspective also in the other designs.}, language = {en} } @article{ChenLangeAndjelkovicetal.2022, author = {Chen, Junchao and Lange, Thomas and Andjelkovic, Marko and Simevski, Aleksandar and Lu, Li and Krstić, Miloš}, title = {Solar particle event and single event upset prediction from SRAM-based monitor and supervised machine learning}, series = {IEEE transactions on emerging topics in computing / IEEE Computer Society, Institute of Electrical and Electronics Engineers}, volume = {10}, journal = {IEEE transactions on emerging topics in computing / IEEE Computer Society, Institute of Electrical and Electronics Engineers}, number = {2}, publisher = {Institute of Electrical and Electronics Engineers}, address = {[New York, NY]}, issn = {2168-6750}, doi = {10.1109/TETC.2022.3147376}, pages = {564 -- 580}, year = {2022}, abstract = {The intensity of cosmic radiation may differ over five orders of magnitude within a few hours or days during the Solar Particle Events (SPEs), thus increasing for several orders of magnitude the probability of Single Event Upsets (SEUs) in space-borne electronic systems. Therefore, it is vital to enable the early detection of the SEU rate changes in order to ensure timely activation of dynamic radiation hardening measures. In this paper, an embedded approach for the prediction of SPEs and SRAM SEU rate is presented. The proposed solution combines the real-time SRAM-based SEU monitor, the offline-trained machine learning model and online learning algorithm for the prediction. With respect to the state-of-the-art, our solution brings the following benefits: (1) Use of existing on-chip data storage SRAM as a particle detector, thus minimizing the hardware and power overhead, (2) Prediction of SRAM SEU rate one hour in advance, with the fine-grained hourly tracking of SEU variations during SPEs as well as under normal conditions, (3) Online optimization of the prediction model for enhancing the prediction accuracy during run-time, (4) Negligible cost of hardware accelerator design for the implementation of selected machine learning model and online learning algorithm. The proposed design is intended for a highly dependable and self-adaptive multiprocessing system employed in space applications, allowing to trigger the radiation mitigation mechanisms before the onset of high radiation levels.}, language = {en} }