@phdthesis{Zadorozhnyi2021, author = {Zadorozhnyi, Oleksandr}, title = {Contributions to the theoretical analysis of the algorithms with adversarial and dependent data}, school = {Universit{\"a}t Potsdam}, pages = {144}, year = {2021}, abstract = {In this work I present the concentration inequalities of Bernstein's type for the norms of Banach-valued random sums under a general functional weak-dependency assumption (the so-called \$\cC-\$mixing). The latter is then used to prove, in the asymptotic framework, excess risk upper bounds of the regularised Hilbert valued statistical learning rules under the τ-mixing assumption on the underlying training sample. These results (of the batch statistical setting) are then supplemented with the regret analysis over the classes of Sobolev balls of the type of kernel ridge regression algorithm in the setting of online nonparametric regression with arbitrary data sequences. Here, in particular, a question of robustness of the kernel-based forecaster is investigated. Afterwards, in the framework of sequential learning, the multi-armed bandit problem under \$\cC-\$mixing assumption on the arm's outputs is considered and the complete regret analysis of a version of Improved UCB algorithm is given. Lastly, probabilistic inequalities of the first part are extended to the case of deviations (both of Azuma-Hoeffding's and of Burkholder's type) to the partial sums of real-valued weakly dependent random fields (under the type of projective dependence condition).}, language = {en} } @phdthesis{Muecke2017, author = {M{\"u}cke, Nicole}, title = {Direct and inverse problems in machine learning}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-403479}, school = {Universit{\"a}t Potsdam}, pages = {159}, year = {2017}, abstract = {We analyze an inverse noisy regression model under random design with the aim of estimating the unknown target function based on a given set of data, drawn according to some unknown probability distribution. Our estimators are all constructed by kernel methods, which depend on a Reproducing Kernel Hilbert Space structure using spectral regularization methods. A first main result establishes upper and lower bounds for the rate of convergence under a given source condition assumption, restricting the class of admissible distributions. But since kernel methods scale poorly when massive datasets are involved, we study one example for saving computation time and memory requirements in more detail. We show that Parallelizing spectral algorithms also leads to minimax optimal rates of convergence provided the number of machines is chosen appropriately. We emphasize that so far all estimators depend on the assumed a-priori smoothness of the target function and on the eigenvalue decay of the kernel covariance operator, which are in general unknown. To obtain good purely data driven estimators constitutes the problem of adaptivity which we handle for the single machine problem via a version of the Lepskii principle.}, language = {en} } @phdthesis{Harmeling2004, author = {Harmeling, Stefan}, title = {Independent component analysis and beyond}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-0001540}, school = {Universit{\"a}t Potsdam}, year = {2004}, abstract = {'Independent component analysis' (ICA) ist ein Werkzeug der statistischen Datenanalyse und Signalverarbeitung, welches multivariate Signale in ihre Quellkomponenten zerlegen kann. Obwohl das klassische ICA Modell sehr n{\"u}tzlich ist, gibt es viele Anwendungen, die Erweiterungen von ICA erfordern. In dieser Dissertation pr{\"a}sentieren wir neue Verfahren, die die Funktionalit{\"a}t von ICA erweitern: (1) Zuverl{\"a}ssigkeitsanalyse und Gruppierung von unabh{\"a}ngigen Komponenten durch Hinzuf{\"u}gen von Rauschen, (2) robuste und {\"u}berbestimmte ('over-complete') ICA durch Ausreissererkennung, und (3) nichtlineare ICA mit Kernmethoden.}, language = {en} } @article{WormellReich2021, author = {Wormell, Caroline L. and Reich, Sebastian}, title = {Spectral convergence of diffusion maps}, series = {SIAM journal on numerical analysis / Society for Industrial and Applied Mathematics}, volume = {59}, journal = {SIAM journal on numerical analysis / Society for Industrial and Applied Mathematics}, number = {3}, publisher = {Society for Industrial and Applied Mathematics}, address = {Philadelphia}, issn = {0036-1429}, doi = {10.1137/20M1344093}, pages = {1687 -- 1734}, year = {2021}, abstract = {Diffusion maps is a manifold learning algorithm widely used for dimensionality reduction. Using a sample from a distribution, it approximates the eigenvalues and eigenfunctions of associated Laplace-Beltrami operators. Theoretical bounds on the approximation error are, however, generally much weaker than the rates that are seen in practice. This paper uses new approaches to improve the error bounds in the model case where the distribution is supported on a hypertorus. For the data sampling (variance) component of the error we make spatially localized compact embedding estimates on certain Hardy spaces; we study the deterministic (bias) component as a perturbation of the Laplace-Beltrami operator's associated PDE and apply relevant spectral stability results. Using these approaches, we match long-standing pointwise error bounds for both the spectral data and the norm convergence of the operator discretization. We also introduce an alternative normalization for diffusion maps based on Sinkhorn weights. This normalization approximates a Langevin diffusion on the sample and yields a symmetric operator approximation. We prove that it has better convergence compared with the standard normalization on flat domains, and we present a highly efficient rigorous algorithm to compute the Sinkhorn weights.}, language = {en} }