@phdthesis{Shekhar2023, author = {Shekhar, Sumit}, title = {Image and video processing based on intrinsic attributes}, doi = {10.25932/publishup-62004}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-620049}, school = {Universit{\"a}t Potsdam}, pages = {xii, 143}, year = {2023}, abstract = {Advancements in computer vision techniques driven by machine learning have facilitated robust and efficient estimation of attributes such as depth, optical flow, albedo, and shading. To encapsulate all such underlying properties associated with images and videos, we evolve the concept of intrinsic images towards intrinsic attributes. Further, rapid hardware growth in the form of high-quality smartphone cameras, readily available depth sensors, mobile GPUs, or dedicated neural processing units have made image and video processing pervasive. In this thesis, we explore the synergies between the above two advancements and propose novel image and video processing techniques and systems based on them. To begin with, we investigate intrinsic image decomposition approaches and analyze how they can be implemented on mobile devices. We propose an approach that considers not only diffuse reflection but also specular reflection; it allows us to decompose an image into specularity, albedo, and shading on a resource constrained system (e.g., smartphones or tablets) using the depth data provided by the built-in depth sensors. In addition, we explore how on-device depth data can further be used to add an immersive dimension to 2D photos, e.g., showcasing parallax effects via 3D photography. In this regard, we develop a novel system for interactive 3D photo generation and stylization on mobile devices. Further, we investigate how adaptive manipulation of baseline-albedo (i.e., chromaticity) can be used for efficient visual enhancement under low-lighting conditions. The proposed technique allows for interactive editing of enhancement settings while achieving improved quality and performance. We analyze the inherent optical flow and temporal noise as intrinsic properties of a video. We further propose two new techniques for applying the above intrinsic attributes for the purpose of consistent video filtering. To this end, we investigate how to remove temporal inconsistencies perceived as flickering artifacts. One of the techniques does not require costly optical flow estimation, while both provide interactive consistency control. Using intrinsic attributes for image and video processing enables new solutions for mobile devices - a pervasive visual computing device - and will facilitate novel applications for Augmented Reality (AR), 3D photography, and video stylization. The proposed low-light enhancement techniques can also improve the accuracy of high-level computer vision tasks (e.g., face detection) under low-light conditions. Finally, our approach for consistent video filtering can extend a wide range of image-based processing for videos.}, language = {en} } @phdthesis{Semmo2016, author = {Semmo, Amir}, title = {Design and implementation of non-photorealistic rendering techniques for 3D geospatial data}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-99525}, school = {Universit{\"a}t Potsdam}, pages = {XVI, 155}, year = {2016}, abstract = {Geospatial data has become a natural part of a growing number of information systems and services in the economy, society, and people's personal lives. In particular, virtual 3D city and landscape models constitute valuable information sources within a wide variety of applications such as urban planning, navigation, tourist information, and disaster management. Today, these models are often visualized in detail to provide realistic imagery. However, a photorealistic rendering does not automatically lead to high image quality, with respect to an effective information transfer, which requires important or prioritized information to be interactively highlighted in a context-dependent manner. Approaches in non-photorealistic renderings particularly consider a user's task and camera perspective when attempting optimal expression, recognition, and communication of important or prioritized information. However, the design and implementation of non-photorealistic rendering techniques for 3D geospatial data pose a number of challenges, especially when inherently complex geometry, appearance, and thematic data must be processed interactively. Hence, a promising technical foundation is established by the programmable and parallel computing architecture of graphics processing units. This thesis proposes non-photorealistic rendering techniques that enable both the computation and selection of the abstraction level of 3D geospatial model contents according to user interaction and dynamically changing thematic information. To achieve this goal, the techniques integrate with hardware-accelerated rendering pipelines using shader technologies of graphics processing units for real-time image synthesis. The techniques employ principles of artistic rendering, cartographic generalization, and 3D semiotics—unlike photorealistic rendering—to synthesize illustrative renditions of geospatial feature type entities such as water surfaces, buildings, and infrastructure networks. In addition, this thesis contributes a generic system that enables to integrate different graphic styles—photorealistic and non-photorealistic—and provide their seamless transition according to user tasks, camera view, and image resolution. Evaluations of the proposed techniques have demonstrated their significance to the field of geospatial information visualization including topics such as spatial perception, cognition, and mapping. In addition, the applications in illustrative and focus+context visualization have reflected their potential impact on optimizing the information transfer regarding factors such as cognitive load, integration of non-realistic information, visualization of uncertainty, and visualization on small displays.}, language = {en} } @phdthesis{Muehlbauer2011, author = {M{\"u}hlbauer, Felix}, title = {Entwurf, Methoden und Werkzeuge f{\"u}r komplexe Bildverarbeitungssysteme auf Rekonfigurierbaren System-on-Chip-Architekturen}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-59923}, school = {Universit{\"a}t Potsdam}, year = {2011}, abstract = {Bildverarbeitungsanwendungen stellen besondere Anspr{\"u}che an das ausf{\"u}hrende Rechensystem. Einerseits ist eine hohe Rechenleistung erforderlich. Andererseits ist eine hohe Flexibilit{\"a}t von Vorteil, da die Entwicklung tendentiell ein experimenteller und interaktiver Prozess ist. F{\"u}r neue Anwendungen tendieren Entwickler dazu, eine Rechenarchitektur zu w{\"a}hlen, die sie gut kennen, anstatt eine Architektur einzusetzen, die am besten zur Anwendung passt. Bildverarbeitungsalgorithmen sind inh{\"a}rent parallel, doch herk{\"o}mmliche bildverarbeitende eingebettete Systeme basieren meist auf sequentiell arbeitenden Prozessoren. Im Gegensatz zu dieser "Unstimmigkeit" k{\"o}nnen hocheffiziente Systeme aus einer gezielten Synergie aus Software- und Hardwarekomponenten aufgebaut werden. Die Konstruktion solcher System ist jedoch komplex und viele L{\"o}sungen, wie zum Beispiel grobgranulare Architekturen oder anwendungsspezifische Programmiersprachen, sind oft zu akademisch f{\"u}r einen Einsatz in der Wirtschaft. Die vorliegende Arbeit soll ein Beitrag dazu leisten, die Komplexit{\"a}t von Hardware-Software-Systemen zu reduzieren und damit die Entwicklung hochperformanter on-Chip-Systeme im Bereich Bildverarbeitung zu vereinfachen und wirtschaftlicher zu machen. Dabei wurde Wert darauf gelegt, den Aufwand f{\"u}r Einarbeitung, Entwicklung als auch Erweiterungen gering zu halten. Es wurde ein Entwurfsfluss konzipiert und umgesetzt, welcher es dem Softwareentwickler erm{\"o}glicht, Berechnungen durch Hardwarekomponenten zu beschleunigen und das zu Grunde liegende eingebettete System komplett zu prototypisieren. Hierbei werden komplexe Bildverarbeitungsanwendungen betrachtet, welche ein Betriebssystem erfordern, wie zum Beispiel verteilte Kamerasensornetzwerke. Die eingesetzte Software basiert auf Linux und der Bildverarbeitungsbibliothek OpenCV. Die Verteilung der Berechnungen auf Software- und Hardwarekomponenten und die daraus resultierende Ablaufplanung und Generierung der Rechenarchitektur erfolgt automatisch. Mittels einer auf der Antwortmengenprogrammierung basierten Entwurfsraumexploration ergeben sich Vorteile bei der Modellierung und Erweiterung. Die Systemsoftware wird mit OpenEmbedded/Bitbake synthetisiert und die erzeugten on-Chip-Architekturen auf FPGAs realisiert.}, language = {de} } @phdthesis{Boeniger2010, author = {B{\"o}niger, Urs}, title = {Attributes and their potential to analyze and interpret 3D GPR data}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-50124}, school = {Universit{\"a}t Potsdam}, year = {2010}, abstract = {Based on technological advances made within the past decades, ground-penetrating radar (GPR) has become a well-established, non-destructive subsurface imaging technique. Catalyzed by recent demands for high-resolution, near-surface imaging (e.g., the detection of unexploded ordnances and subsurface utilities, or hydrological investigations), the quality of today's GPR-based, near-surface images has significantly matured. At the same time, the analysis of oil and gas related reflection seismic data sets has experienced significant advances. Considering the sensitivity of attribute analysis with respect to data positioning in general, and multi-trace attributes in particular, trace positioning accuracy is of major importance for the success of attribute-based analysis flows. Therefore, to study the feasibility of GPR-based attribute analyses, I first developed and evaluated a real-time GPR surveying setup based on a modern tracking total station (TTS). The combination of current GPR systems capability of fusing global positioning system (GPS) and geophysical data in real-time, the ability of modern TTS systems to generate a GPS-like positional output and wireless data transmission using radio modems results in a flexible and robust surveying setup. To elaborate the feasibility of this setup, I studied the major limitations of such an approach: system cross-talk and data delays known as latencies. Experimental studies have shown that when a minimal distance of ~5 m between the GPR and the TTS system is considered, the signal-to-noise ratio of the acquired GPR data using radio communication equals the one without radio communication. To address the limitations imposed by system latencies, inherent to all real-time data fusion approaches, I developed a novel correction (calibration) strategy to assess the gross system latency and to correct for it. This resulted in the centimeter trace accuracy required by high-frequency and/or three-dimensional (3D) GPR surveys. Having introduced this flexible high-precision surveying setup, I successfully demonstrated the application of attribute-based processing to GPR specific problems, which may differ significantly from the geological ones typically addressed by the oil and gas industry using seismic data. In this thesis, I concentrated on archaeological and subsurface utility problems, as they represent typical near-surface geophysical targets. Enhancing 3D archaeological GPR data sets using a dip-steered filtering approach, followed by calculation of coherency and similarity, allowed me to conduct subsurface interpretations far beyond those obtained by classical time-slice analyses. I could show that the incorporation of additional data sets (magnetic and topographic) and attributes derived from these data sets can further improve the interpretation. In a case study, such an approach revealed the complementary nature of the individual data sets and, for example, allowed conclusions about the source location of magnetic anomalies by concurrently analyzing GPR time/depth slices to be made. In addition to archaeological targets, subsurface utility detection and characterization is a steadily growing field of application for GPR. I developed a novel attribute called depolarization. Incorporation of geometrical and physical feature characteristics into the depolarization attribute allowed me to display the observed polarization phenomena efficiently. Geometrical enhancement makes use of an improved symmetry extraction algorithm based on Laplacian high-boosting, followed by a phase-based symmetry calculation using a two-dimensional (2D) log-Gabor filterbank decomposition of the data volume. To extract the physical information from the dual-component data set, I employed a sliding-window principle component analysis. The combination of the geometrically derived feature angle and the physically derived polarization angle allowed me to enhance the polarization characteristics of subsurface features. Ground-truth information obtained by excavations confirmed this interpretation. In the future, inclusion of cross-polarized antennae configurations into the processing scheme may further improve the quality of the depolarization attribute. In addition to polarization phenomena, the time-dependent frequency evolution of GPR signals might hold further information on the subsurface architecture and/or material properties. High-resolution, sparsity promoting decomposition approaches have recently had a significant impact on the image and signal processing community. In this thesis, I introduced a modified tree-based matching pursuit approach. Based on different synthetic examples, I showed that the modified tree-based pursuit approach clearly outperforms other commonly used time-frequency decomposition approaches with respect to both time and frequency resolutions. Apart from the investigation of tuning effects in GPR data, I also demonstrated the potential of high-resolution sparse decompositions for advanced data processing. Frequency modulation of individual atoms themselves allows to efficiently correct frequency attenuation effects and improve resolution based on shifting the average frequency level. GPR-based attribute analysis is still in its infancy. Considering the growing widespread realization of 3D GPR studies there will certainly be an increasing demand towards improved subsurface interpretations in the future. Similar to the assessment of quantitative reservoir properties through the combination of 3D seismic attribute volumes with sparse well-log information, parameter estimation in a combined manner represents another step in emphasizing the potential of attribute-driven GPR data analyses.}, language = {en} }