@article{HoangGryzikHoppeetal.2022, author = {Hoang, Yen and Gryzik, Stefanie and Hoppe, Ines and Rybak, Alexander and Sch{\"a}dlich, Martin and Kadner, Isabelle and Walther, Dirk and Vera, Julio and Radbruch, Andreas and Groth, Detlef and Baumgart, Sabine and Baumgrass, Ria}, title = {PRI: Re-analysis of a public mass cytometry dataset reveals patterns of effective tumor treatments}, series = {Frontiers in immunology}, volume = {13}, journal = {Frontiers in immunology}, publisher = {Frontiers Media}, address = {Lausanne}, issn = {1664-3224}, doi = {10.3389/fimmu.2022.849329}, pages = {9}, year = {2022}, abstract = {Recently, mass cytometry has enabled quantification of up to 50 parameters for millions of cells per sample. It remains a challenge to analyze such high-dimensional data to exploit the richness of the inherent information, even though many valuable new analysis tools have already been developed. We propose a novel algorithm "pattern recognition of immune cells (PRI)" to tackle these high-dimensional protein combinations in the data. PRI is a tool for the analysis and visualization of cytometry data based on a three or more-parametric binning approach, feature engineering of bin properties of multivariate cell data, and a pseudo-multiparametric visualization. Using a publicly available mass cytometry dataset, we proved that reproducible feature engineering and intuitive understanding of the generated bin plots are helpful hallmarks for re-analysis with PRI. In the CD4(+)T cell population analyzed, PRI revealed two bin-plot patterns (CD90/CD44/CD86 and CD90/CD44/CD27) and 20 bin plot features for threshold-independent classification of mice concerning ineffective and effective tumor treatment. In addition, PRI mapped cell subsets regarding co-expression of the proliferation marker Ki67 with two major transcription factors and further delineated a specific Th1 cell subset. All these results demonstrate the added insights that can be obtained using the non-cluster-based tool PRI for re-analyses of high-dimensional cytometric data.}, language = {en} } @article{GryzikHoangLischkeetal.2020, author = {Gryzik, Stefanie and Hoang, Yen and Lischke, Timo and Mohr, Elodie and Venzke, Melanie and Kadner, Isabelle and P{\"o}tzsch, Josephine and Groth, Detlef and Radbruch, Andreas and Hutloff, Andreas and Baumgrass, Ria}, title = {Identification of a super-functional Tfh-like subpopulation in murine lupus by pattern perception}, series = {eLife}, volume = {9}, journal = {eLife}, publisher = {eLife Sciences Publications}, address = {Cambridge}, issn = {2050-084X}, doi = {10.7554/eLife.53226}, pages = {21}, year = {2020}, abstract = {Dysregulated cytokine expression by T cells plays a pivotal role in the pathogenesis of autoimmune diseases. However, the identification of the corresponding pathogenic subpopulations is a challenge, since a distinction between physiological variation and a new quality in the expression of protein markers requires combinatorial evaluation. Here, we were able to identify a super-functional follicular helper T cell (Tfh)-like subpopulation in lupus-prone NZBxW mice with our binning approach "pattern recognition of immune cells (PRI)". PRI uncovered a subpopulation of IL-21(+) IFN-gamma(high) PD-1(low) CD40L(high) CXCR5(-) Bcl-6(-) T cells specifically expanded in diseased mice. In addition, these cells express high levels of TNF-alpha and IL-2, and provide B cell help for IgG production in an IL-21 and CD40L dependent manner. This super-functional T cell subset might be a superior driver of autoimmune processes due to a polyfunctional and high cytokine expression combined with Tfh-like properties.}, language = {en} } @phdthesis{Hoang2019, author = {Hoang, Yen}, title = {De novo binning strategy to analyze and visualize multi-dimensional cytometric data}, doi = {10.25932/publishup-44307}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-443078}, school = {Universit{\"a}t Potsdam}, pages = {vii, 81, xxxii}, year = {2019}, abstract = {Since half a century, cytometry has been a major scientific discipline in the field of cytomics - the study of system's biology at single cell level. It enables the investigation of physiological processes, functional characteristics and rare events with proteins by analysing multiple parameters on an individual cell basis. In the last decade, mass cytometry has been established which increased the parallel measurement to up to 50 proteins. This has shifted the analysis strategy from conventional consecutive manual gates towards multi-dimensional data processing. Novel algorithms have been developed to tackle these high-dimensional protein combinations in the data. They are mainly based on clustering or non-linear dimension reduction techniques, or both, often combined with an upstream downsampling procedure. However, these tools have obstacles either in comprehensible interpretability, reproducibility, computational complexity or in comparability between samples and groups. To address this bottleneck, a reproducible, semi-automated cytometric data mining workflow PRI (pattern recognition of immune cells) is proposed which combines three main steps: i) data preparation and storage; ii) bin-based combinatorial variable engineering of three protein markers, the so called triploTs, and subsequent sectioning of these triploTs in four parts; and iii) deployment of a data-driven supervised learning algorithm, the cross-validated elastic-net regularized logistic regression, with these triploT sections as input variables. As a result, the selected variables from the models are ranked by their prevalence, which potentially have discriminative value. The purpose is to significantly facilitate the identification of meaningful subpopulations, which are most distinguish between two groups. The proposed workflow PRI is exemplified by a recently published public mass cytometry data set. The authors found a T cell subpopulation which is discriminative between effective and ineffective treatment of breast carcinomas in mice. With PRI, that subpopulation was not only validated, but was further narrowed down as a particular Th1 cell population. Moreover, additional insights of combinatorial protein expressions are revealed in a traceable manner. An essential element in the workflow is the reproducible variable engineering. These variables serve as basis for a clearly interpretable visualization, for a structured variable exploration and as input layers in neural network constructs. PRI facilitates the determination of marker levels in a semi-continuous manner. Jointly with the combinatorial display, it allows a straightforward observation of correlating patterns, and thus, the dominant expressed markers and cell hierarchies. Furthermore, it enables the identification and complex characterization of discriminating subpopulations due to its reproducible and pseudo-multi-parametric pattern presentation. This endorses its applicability as a tool for unbiased investigations on cell subsets within multi-dimensional cytometric data sets.}, language = {en} }