@article{MiethKloftRodriguezetal.2016, author = {Mieth, Bettina and Kloft, Marius and Rodriguez, Juan Antonio and Sonnenburg, Soren and Vobruba, Robin and Morcillo-Suarez, Carlos and Farre, Xavier and Marigorta, Urko M. and Fehr, Ernst and Dickhaus, Thorsten and Blanchard, Gilles and Schunk, Daniel and Navarro, Arcadi and M{\"u}ller, Klaus-Robert}, title = {Combining Multiple Hypothesis Testing with Machine Learning Increases the Statistical Power of Genome-wide Association Studies}, series = {Scientific reports}, volume = {6}, journal = {Scientific reports}, publisher = {Nature Publ. Group}, address = {London}, issn = {2045-2322}, doi = {10.1038/srep36671}, pages = {14}, year = {2016}, abstract = {The standard approach to the analysis of genome-wide association studies (GWAS) is based on testing each position in the genome individually for statistical significance of its association with the phenotype under investigation. To improve the analysis of GWAS, we propose a combination of machine learning and statistical testing that takes correlation structures within the set of SNPs under investigation in a mathematically well-controlled manner into account. The novel two-step algorithm, COMBI, first trains a support vector machine to determine a subset of candidate SNPs and then performs hypothesis tests for these SNPs together with an adequate threshold correction. Applying COMBI to data from a WTCCC study (2007) and measuring performance as replication by independent GWAS published within the 2008-2015 period, we show that our method outperforms ordinary raw p-value thresholding as well as other state-of-the-art methods. COMBI presents higher power and precision than the examined alternatives while yielding fewer false (i.e. non-replicated) and more true (i.e. replicated) discoveries when its results are validated on later GWAS studies. More than 80\% of the discoveries made by COMBI upon WTCCC data have been validated by independent studies. Implementations of the COMBI method are available as a part of the GWASpi toolbox 2.0.}, language = {en} } @article{MontavonBraunKruegeretal.2013, author = {Montavon, Gregoire and Braun, Mikio L. and Kr{\"u}ger, Tammo and M{\"u}ller, Klaus-Robert}, title = {Analyzing local structure in Kernel-Based learning}, series = {IEEE signal processing magazine}, volume = {30}, journal = {IEEE signal processing magazine}, number = {4}, publisher = {Inst. of Electr. and Electronics Engineers}, address = {Piscataway}, issn = {1053-5888}, doi = {10.1109/MSP.2013.2249294}, pages = {62 -- 74}, year = {2013}, language = {en} } @article{LaskovGehlKruegeretal.2006, author = {Laskov, Pavel and Gehl, Christian and Kr{\"u}ger, Stefan and M{\"u}ller, Klaus-Robert}, title = {Incremental support vector learning: analysis, implementation and applications}, series = {Journal of machine learning research}, volume = {7}, journal = {Journal of machine learning research}, publisher = {MIT Press}, address = {Cambridge, Mass.}, issn = {1532-4435}, pages = {1909 -- 1936}, year = {2006}, abstract = {Incremental Support Vector Machines (SVM) are instrumental in practical applications of online learning. This work focuses on the design and analysis of efficient incremental SVM learning, with the aim of providing a fast, numerically stable and robust implementation. A detailed analysis of convergence and of algorithmic complexity of incremental SVM learning is carried out. Based on this analysis, a new design of storage and numerical operations is proposed, which speeds up the training of an incremental SVM by a factor of 5 to 20. The performance of the new algorithm is demonstrated in two scenarios: learning with limited resources and active learning. Various applications of the algorithm, such as in drug discovery, online monitoring of industrial devices and and surveillance of network traffic, can be foreseen.}, language = {en} } @article{ShenoyKrauledatBlankertzetal.2006, author = {Shenoy, Pradeep and Krauledat, Matthias and Blankertz, Benjamin and Rao, Rajesh P. N. and M{\"u}ller, Klaus-Robert}, title = {Towards adaptive classification for BCI}, doi = {10.1088/1741-2560/3/1/R02}, year = {2006}, abstract = {Non-stationarities are ubiquitous in EEG signals. They are especially apparent in the use of EEG-based brain- computer interfaces (BCIs): (a) in the differences between the initial calibration measurement and the online operation of a BCI, or (b) caused by changes in the subject's brain processes during an experiment (e.g. due to fatigue, change of task involvement, etc). In this paper, we quantify for the first time such systematic evidence of statistical differences in data recorded during offline and online sessions. Furthermore, we propose novel techniques of investigating and visualizing data distributions, which are particularly useful for the analysis of (non-) stationarities. Our study shows that the brain signals used for control can change substantially from the offline calibration sessions to online control, and also within a single session. In addition to this general characterization of the signals, we propose several adaptive classification schemes and study their performance on data recorded during online experiments. An encouraging result of our study is that surprisingly simple adaptive methods in combination with an offline feature selection scheme can significantly increase BCI performance}, language = {en} } @article{BlanchardKawanabeSugiyamaetal.2006, author = {Blanchard, Gilles and Kawanabe, Motoaki and Sugiyama, Masashi and Spokoiny, Vladimir G. and M{\"u}ller, Klaus-Robert}, title = {In search of non-Gaussian components of a high-dimensional distribution}, issn = {1532-4435}, year = {2006}, abstract = {Finding non-Gaussian components of high-dimensional data is an important preprocessing step for efficient information processing. This article proposes a new linear method to identify the '' non-Gaussian subspace '' within a very general semi-parametric framework. Our proposed method, called NGCA (non-Gaussian component analysis), is based on a linear operator which, to any arbitrary nonlinear (smooth) function, associates a vector belonging to the low dimensional non-Gaussian target subspace, up to an estimation error. By applying this operator to a family of different nonlinear functions, one obtains a family of different vectors lying in a vicinity of the target space. As a final step, the target space itself is estimated by applying PCA to this family of vectors. We show that this procedure is consistent in the sense that the estimaton error tends to zero at a parametric rate, uniformly over the family, Numerical examples demonstrate the usefulness of our method}, language = {en} } @article{BlankertzDornhegeKrauledatetal.2006, author = {Blankertz, Benjamin and Dornhege, Guido and Krauledat, Matthias and M{\"u}ller, Klaus-Robert and Kunzmann, Volker and Losch, Florian and Curio, Gabriel}, title = {The Berlin brain-computer interface : EEG-based communication without subject training}, issn = {1534-4320}, doi = {10.1109/Tnsre.2006.875557}, year = {2006}, abstract = {The Berlin Brain-Computer Interface (BBCI) project develops a noninvasive BCI system whose key features are 1) the use of well-established motor competences as control paradigms, 2) high-dimensional features from 128-channel electroencephalogram (EEG), and 3) advanced machine learning techniques. As reported earlier, our experiments demonstrate that very high information transfer rates can be achieved using the readiness potential (RP) when predicting the laterality of upcoming left-versus right-hand movements in healthy subjects. A more recent study showed that the RP similarily accompanies phantom movements in arm amputees, but the signal strength decreases with longer loss of the limb. In a complementary approach, oscillatory features are used to discriminate imagined movements (left hand versus right hand versus foot). In a recent feedback study with six healthy subjects with no or very little experience with BCI control, three subjects achieved an information transfer rate above 35 bits per minute (bpm), and further two subjects above 24 and 15 bpm, while one subject could not achieve any BCI control. These results are encouraging for an EEG-based BCI system in untrained subjects that is independent of peripheral nervous system activity and does not rely on evoked potentials even when compared to results with very well-trained subjects operating other BCI systems}, language = {en} } @article{NolteMeineckeZieheetal.2006, author = {Nolte, Guido and Meinecke, Frank C. and Ziehe, Andreas and M{\"u}ller, Klaus-Robert}, title = {Identifying interactions in mixed and noisy complex systems}, doi = {10.1103/Physreve.73.051913}, year = {2006}, abstract = {We present a technique that identifies truly interacting subsystems of a complex system from multichannel data if the recordings are an unknown linear and instantaneous mixture of the true sources. The method is valid for arbitrary noise structure. For this, a blind source separation technique is proposed that diagonalizes antisymmetrized cross- correlation or cross-spectral matrices. The resulting decomposition finds truly interacting subsystems blindly and suppresses any spurious interaction stemming from the mixture. The usefulness of this interacting source analysis is demonstrated in simulations and for real electroencephalography data}, language = {en} } @article{LemmCurioHlushchuketal.2006, author = {Lemm, Steven and Curio, Gabriel and Hlushchuk, Yevhen and M{\"u}ller, Klaus-Robert}, title = {Enhancing the signal-to-noise ratio of ICA-based extracted ERPs}, issn = {0018-9294}, doi = {10.1109/Tbme.2006.870258}, year = {2006}, abstract = {When decomposing single trial electroencephalography it is a challenge to incorporate prior physiological knowledge. Here, we develop a method that uses prior information about the phase-locking property of event-related potentials in a regularization framework to bias a blind source separation algorithm toward an improved separation of single-trial phase-locked responses in terms of an increased signal-to-noise ratio. In particular, we suggest a transformation of the data, using weighted average of the single trial and trial-averaged response, that redirects the focus of source separation methods onto the subspace of event-related potentials. The practical benefit with respect to an improved separation of such components from ongoing background activity and extraneous noise is first illustrated on artificial data and finally verified in a real-world application of extracting single-trial somatosensory evoked potentials from multichannel EEG-recordings}, language = {en} } @article{LaubRothBuhmannetal.2006, author = {Laub, Julian and Roth, Volker and Buhmann, Joachim and M{\"u}ller, Klaus-Robert}, title = {On the information and representation of non-Euclidean pairwise data}, issn = {0031-3203}, doi = {10.1016/j.patcog.2006.04.016}, year = {2006}, abstract = {Two common data representations are mostly used in intelligent data analysis, namely the vectorial and the pairwise representation. Pairwise data which satisfy the restrictive conditions of Euclidean spaces can be faithfully translated into a Euclidean vectorial representation by embedding. Non-metric pairwise data with violations of symmetry, reflexivity or triangle inequality pose a substantial conceptual problem for pattern recognition since the amount of predictive structural information beyond what can be measured by embeddings is unclear. We show by systematic modeling of non-Euclidean pairwise data that there exists metric violations which can carry valuable problem specific information. Furthermore, Euclidean and non-metric data can be unified on the level of structural information contained in the data. Stable component analysis selects linear subspaces which are particularly insensitive to data fluctuations. Experimental results from different domains support our pattern recognition strategy.}, language = {en} } @article{KawanabeBlanchardSugiyamaetal.2006, author = {Kawanabe, Motoaki and Blanchard, Gilles and Sugiyama, Masashi and Spokoiny, Vladimir G. and M{\"u}ller, Klaus-Robert}, title = {A novel dimension reduction procedure for searching non-Gaussian subspaces}, issn = {0302-9743}, doi = {10.1007/11679363_19}, year = {2006}, abstract = {In this article, we consider high-dimensional data which contains a low-dimensional non-Gaussian structure contaminated with Gaussian noise and propose a new linear method to identify the non-Gaussian subspace. Our method NGCA (Non-Gaussian Component Analysis) is based on a very general semi-parametric framework and has a theoretical guarantee that the estimation error of finding the non-Gaussian components tends to zero at a parametric rate. NGCA can be used not only as preprocessing for ICA, but also for extracting and visualizing more general structures like clusters. A numerical study demonstrates the usefulness of our method}, language = {en} }