@article{PerscheidGrasnickUflacker2019, author = {Perscheid, Cindy and Grasnick, Bastien and Uflacker, Matthias}, title = {Integrative Gene Selection on Gene Expression Data}, series = {Journal of Integrative Bioinformatics}, volume = {16}, journal = {Journal of Integrative Bioinformatics}, number = {1}, publisher = {De Gruyter}, address = {Berlin}, issn = {1613-4516}, doi = {10.1515/jib-2018-0064}, pages = {17}, year = {2019}, abstract = {The advance of high-throughput RNA-Sequencing techniques enables researchers to analyze the complete gene activity in particular cells. From the insights of such analyses, researchers can identify disease-specific expression profiles, thus understand complex diseases like cancer, and eventually develop effective measures for diagnosis and treatment. The high dimensionality of gene expression data poses challenges to its computational analysis, which is addressed with measures of gene selection. Traditional gene selection approaches base their findings on statistical analyses of the actual expression levels, which implies several drawbacks when it comes to accurately identifying the underlying biological processes. In turn, integrative approaches include curated information on biological processes from external knowledge bases during gene selection, which promises to lead to better interpretability and improved predictive performance. Our work compares the performance of traditional and integrative gene selection approaches. Moreover, we propose a straightforward approach to integrate external knowledge with traditional gene selection approaches. We introduce a framework enabling the automatic external knowledge integration, gene selection, and evaluation. Evaluation results prove our framework to be a useful tool for evaluation and show that integration of external knowledge improves overall analysis results.}, language = {en} } @article{Perscheid2021, author = {Perscheid, Cindy}, title = {Integrative biomarker detection on high-dimensional gene expression data sets}, series = {Briefings in bioinformatics}, volume = {22}, journal = {Briefings in bioinformatics}, number = {3}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1467-5463}, doi = {10.1093/bib/bbaa151}, pages = {18}, year = {2021}, abstract = {Gene expression data provide the expression levels of tens of thousands of genes from several hundred samples. These data are analyzed to detect biomarkers that can be of prognostic or diagnostic use. Traditionally, biomarker detection for gene expression data is the task of gene selection. The vast number of genes is reduced to a few relevant ones that achieve the best performance for the respective use case. Traditional approaches select genes based on their statistical significance in the data set. This results in issues of robustness, redundancy and true biological relevance of the selected genes. Integrative analyses typically address these shortcomings by integrating multiple data artifacts from the same objects, e.g. gene expression and methylation data. When only gene expression data are available, integrative analyses instead use curated information on biological processes from public knowledge bases. With knowledge bases providing an ever-increasing amount of curated biological knowledge, such prior knowledge approaches become more powerful. This paper provides a thorough overview on the status quo of biomarker detection on gene expression data with prior biological knowledge. We discuss current shortcomings of traditional approaches, review recent external knowledge bases, provide a classification and qualitative comparison of existing prior knowledge approaches and discuss open challenges for this kind of gene selection.}, language = {en} } @article{Perscheid2021, author = {Perscheid, Cindy}, title = {Comprior}, series = {BMC Bioinformatics}, volume = {22}, journal = {BMC Bioinformatics}, publisher = {Springer Nature}, address = {London}, issn = {1471-2105}, doi = {10.1186/s12859-021-04308-z}, pages = {1 -- 15}, year = {2021}, abstract = {Background Reproducible benchmarking is important for assessing the effectiveness of novel feature selection approaches applied on gene expression data, especially for prior knowledge approaches that incorporate biological information from online knowledge bases. However, no full-fledged benchmarking system exists that is extensible, provides built-in feature selection approaches, and a comprehensive result assessment encompassing classification performance, robustness, and biological relevance. Moreover, the particular needs of prior knowledge feature selection approaches, i.e. uniform access to knowledge bases, are not addressed. As a consequence, prior knowledge approaches are not evaluated amongst each other, leaving open questions regarding their effectiveness. Results We present the Comprior benchmark tool, which facilitates the rapid development and effortless benchmarking of feature selection approaches, with a special focus on prior knowledge approaches. Comprior is extensible by custom approaches, offers built-in standard feature selection approaches, enables uniform access to multiple knowledge bases, and provides a customizable evaluation infrastructure to compare multiple feature selection approaches regarding their classification performance, robustness, runtime, and biological relevance. Conclusion Comprior allows reproducible benchmarking especially of prior knowledge approaches, which facilitates their applicability and for the first time enables a comprehensive assessment of their effectiveness}, language = {en} }