@phdthesis{Shekhar2023, author = {Shekhar, Sumit}, title = {Image and video processing based on intrinsic attributes}, doi = {10.25932/publishup-62004}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-620049}, school = {Universit{\"a}t Potsdam}, pages = {xii, 143}, year = {2023}, abstract = {Advancements in computer vision techniques driven by machine learning have facilitated robust and efficient estimation of attributes such as depth, optical flow, albedo, and shading. To encapsulate all such underlying properties associated with images and videos, we evolve the concept of intrinsic images towards intrinsic attributes. Further, rapid hardware growth in the form of high-quality smartphone cameras, readily available depth sensors, mobile GPUs, or dedicated neural processing units have made image and video processing pervasive. In this thesis, we explore the synergies between the above two advancements and propose novel image and video processing techniques and systems based on them. To begin with, we investigate intrinsic image decomposition approaches and analyze how they can be implemented on mobile devices. We propose an approach that considers not only diffuse reflection but also specular reflection; it allows us to decompose an image into specularity, albedo, and shading on a resource constrained system (e.g., smartphones or tablets) using the depth data provided by the built-in depth sensors. In addition, we explore how on-device depth data can further be used to add an immersive dimension to 2D photos, e.g., showcasing parallax effects via 3D photography. In this regard, we develop a novel system for interactive 3D photo generation and stylization on mobile devices. Further, we investigate how adaptive manipulation of baseline-albedo (i.e., chromaticity) can be used for efficient visual enhancement under low-lighting conditions. The proposed technique allows for interactive editing of enhancement settings while achieving improved quality and performance. We analyze the inherent optical flow and temporal noise as intrinsic properties of a video. We further propose two new techniques for applying the above intrinsic attributes for the purpose of consistent video filtering. To this end, we investigate how to remove temporal inconsistencies perceived as flickering artifacts. One of the techniques does not require costly optical flow estimation, while both provide interactive consistency control. Using intrinsic attributes for image and video processing enables new solutions for mobile devices - a pervasive visual computing device - and will facilitate novel applications for Augmented Reality (AR), 3D photography, and video stylization. The proposed low-light enhancement techniques can also improve the accuracy of high-level computer vision tasks (e.g., face detection) under low-light conditions. Finally, our approach for consistent video filtering can extend a wide range of image-based processing for videos.}, language = {en} } @article{ShekharReimannMayeretal.2021, author = {Shekhar, Sumit and Reimann, Max and Mayer, Maximilian and Semmo, Amir and Pasewaldt, Sebastian and D{\"o}llner, J{\"u}rgen and Trapp, Matthias}, title = {Interactive photo editing on smartphones via intrinsic decomposition}, series = {Computer graphics forum : journal of the European Association for Computer Graphics}, volume = {40}, journal = {Computer graphics forum : journal of the European Association for Computer Graphics}, publisher = {Blackwell}, address = {Oxford}, issn = {0167-7055}, doi = {10.1111/cgf.142650}, pages = {497 -- 510}, year = {2021}, abstract = {Intrinsic decomposition refers to the problem of estimating scene characteristics, such as albedo and shading, when one view or multiple views of a scene are provided. The inverse problem setting, where multiple unknowns are solved given a single known pixel-value, is highly under-constrained. When provided with correlating image and depth data, intrinsic scene decomposition can be facilitated using depth-based priors, which nowadays is easy to acquire with high-end smartphones by utilizing their depth sensors. In this work, we present a system for intrinsic decomposition of RGB-D images on smartphones and the algorithmic as well as design choices therein. Unlike state-of-the-art methods that assume only diffuse reflectance, we consider both diffuse and specular pixels. For this purpose, we present a novel specularity extraction algorithm based on a multi-scale intensity decomposition and chroma inpainting. At this, the diffuse component is further decomposed into albedo and shading components. We use an inertial proximal algorithm for non-convex optimization (iPiano) to ensure albedo sparsity. Our GPU-based visual processing is implemented on iOS via the Metal API and enables interactive performance on an iPhone 11 Pro. Further, a qualitative evaluation shows that we are able to obtain high-quality outputs. Furthermore, our proposed approach for specularity removal outperforms state-of-the-art approaches for real-world images, while our albedo and shading layer decomposition is faster than the prior work at a comparable output quality. Manifold applications such as recoloring, retexturing, relighting, appearance editing, and stylization are shown, each using the intrinsic layers obtained with our method and/or the corresponding depth data.}, language = {en} }