@misc{RianoPachonKleessenNeigenfindetal.2010,
  author    = {Ria{\~n}o-Pach{\´o}n, Diego Mauricio and Kleessen, Sabrina and Neigenfind, Jost and Durek, Pawel and Weber, Elke and Engelsberger, Wolfgang R. and Walther, Dirk and Selbig, Joachim and Schulze, Waltraud X. and Kersten, Birgit},
  title     = {Proteome-wide survey of phosphorylation patterns affected by nuclear DNA polymorphisms in Arabidopsis thaliana},
  series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe},
  journal   = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe},
  number    = {1328},
  issn      = {1866-8372},
  doi       = {10.25932/publishup-43118},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-431181},
  pages     = {19},
  year      = {2010},
  abstract  = {Background: Protein phosphorylation is an important post-translational modification influencing many aspects of dynamic cellular behavior. Site-specific phosphorylation of amino acid residues serine, threonine, and tyrosine can have profound effects on protein structure, activity, stability, and interaction with other biomolecules. Phosphorylation sites can be affected in diverse ways in members of any species, one such way is through single nucleotide polymorphisms (SNPs). The availability of large numbers of experimentally identified phosphorylation sites, and of natural variation datasets in Arabidopsis thaliana prompted us to analyze the effect of non-synonymous SNPs (nsSNPs) onto phosphorylation sites. Results: From the analyses of 7,178 experimentally identified phosphorylation sites we found that: (i) Proteins with multiple phosphorylation sites occur more often than expected by chance. (ii) Phosphorylation hotspots show a preference to be located outside conserved domains. (iii) nsSNPs affected experimental phosphorylation sites as much as the corresponding non-phosphorylated amino acid residues. (iv) Losses of experimental phosphorylation sites by nsSNPs were identified in 86 A. thaliana proteins, among them receptor proteins were overrepresented. These results were confirmed by similar analyses of predicted phosphorylation sites in A. thaliana. In addition, predicted threonine phosphorylation sites showed a significant enrichment of nsSNPs towards asparagines and a significant depletion of the synonymous substitution. Proteins in which predicted phosphorylation sites were affected by nsSNPs (loss and gain), were determined to be mainly receptor proteins, stress response proteins and proteins involved in nucleotide and protein binding. Proteins involved in metabolism, catalytic activity and biosynthesis were less affected. Conclusions: We analyzed more than 7,100 experimentally identified phosphorylation sites in almost 4,300 protein-coding loci in silico, thus constituting the largest phosphoproteomics dataset for A. thaliana available to date. Our findings suggest a relatively high variability in the presence or absence of phosphorylation sites between different natural accessions in receptor and other proteins involved in signal transduction. Elucidating the effect of phosphorylation sites affected by nsSNPs on adaptive responses represents an exciting research goal for the future.},
  language  = {en}
}
@article{RianoPachonKleessenNeigenfindetal.2010,
  author    = {Ria{\~n}o-Pach{\´o}n, Diego Mauricio and Kleessen, Sabrina and Neigenfind, Jost and Durek, Pawel and Weber, Elke and Engelsberger, Wolfgang R. and Walther, Dirk and Selbig, Joachim and Schulze, Waltraud X. and Kersten, Birgit},
  title     = {Proteome-wide survey of phosphorylation patterns affected by nuclear DNA polymorphisms in Arabidopsis thaliana},
  series = {BMC Genomics},
  volume    = {11},
  journal   = {BMC Genomics},
  publisher = {Biomed Central},
  address   = {London},
  issn      = {1471-2164},
  doi       = {10.1186/1471-2164-11-411},
  pages     = {19},
  year      = {2010},
  abstract  = {Background: Protein phosphorylation is an important post-translational modification influencing many aspects of dynamic cellular behavior. Site-specific phosphorylation of amino acid residues serine, threonine, and tyrosine can have profound effects on protein structure, activity, stability, and interaction with other biomolecules. Phosphorylation sites can be affected in diverse ways in members of any species, one such way is through single nucleotide polymorphisms (SNPs). The availability of large numbers of experimentally identified phosphorylation sites, and of natural variation datasets in Arabidopsis thaliana prompted us to analyze the effect of non-synonymous SNPs (nsSNPs) onto phosphorylation sites. Results: From the analyses of 7,178 experimentally identified phosphorylation sites we found that: (i) Proteins with multiple phosphorylation sites occur more often than expected by chance. (ii) Phosphorylation hotspots show a preference to be located outside conserved domains. (iii) nsSNPs affected experimental phosphorylation sites as much as the corresponding non-phosphorylated amino acid residues. (iv) Losses of experimental phosphorylation sites by nsSNPs were identified in 86 A. thaliana proteins, among them receptor proteins were overrepresented. These results were confirmed by similar analyses of predicted phosphorylation sites in A. thaliana. In addition, predicted threonine phosphorylation sites showed a significant enrichment of nsSNPs towards asparagines and a significant depletion of the synonymous substitution. Proteins in which predicted phosphorylation sites were affected by nsSNPs (loss and gain), were determined to be mainly receptor proteins, stress response proteins and proteins involved in nucleotide and protein binding. Proteins involved in metabolism, catalytic activity and biosynthesis were less affected. Conclusions: We analyzed more than 7,100 experimentally identified phosphorylation sites in almost 4,300 protein-coding loci in silico, thus constituting the largest phosphoproteomics dataset for A. thaliana available to date. Our findings suggest a relatively high variability in the presence or absence of phosphorylation sites between different natural accessions in receptor and other proteins involved in signal transduction. Elucidating the effect of phosphorylation sites affected by nsSNPs on adaptive responses represents an exciting research goal for the future.},
  language  = {en}
}
@misc{MeyerKustererLisecetal.2009,
  author    = {Meyer, Rhonda Christiane and Kusterer, Barbara and Lisec, Jan and Steinfath, Matthias and Becher, Martina and Scharr, Hanno and Melchinger, Albrecht E. and Selbig, Joachim and Schurr, Ulrich and Willmitzer, Lothar and Altmann, Thomas},
  title     = {QTL analysis of early stage heterosis for biomass in Arabidopsis},
  series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe},
  journal   = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe},
  number    = {1330},
  issn      = {1866-8372},
  doi       = {10.25932/publishup-43127},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-431272},
  pages     = {11},
  year      = {2009},
  abstract  = {The main objective of this study was to identify genomic regions involved in biomass heterosis using QTL, generation means, and mode-of-inheritance classification analyses. In a modified North Carolina Design III we backcrossed 429 recombinant inbred line and 140 introgression line populations to the two parental accessions, C24 and Col-0, whose F 1 hybrid exhibited 44\% heterosis for biomass. Mid-parent heterosis in the RILs ranged from -31 to 99\% for dry weight and from -58 to 143\% for leaf area. We detected ten genomic positions involved in biomass heterosis at an early developmental stage, individually explaining between 2.4 and 15.7\% of the phenotypic variation. While overdominant gene action was prevalent in heterotic QTL, our results suggest that a combination of dominance, overdominance and epistasis is involved in biomass heterosis in this Arabidopsis cross.},
  language  = {en}
}
@article{MeyerKustererLisecetal.2009,
  author    = {Meyer, Rhonda Christiane and Kusterer, Barbara and Lisec, Jan and Steinfath, Matthias and Becher, Martina and Scharr, Hanno and Melchinger, Albrecht E. and Selbig, Joachim and Schurr, Ulrich and Willmitzer, Lothar and Altmann, Thomas},
  title     = {QTL analysis of early stage heterosis for biomass in Arabidopsis},
  series = {Theoretical and applied genetics},
  volume    = {129},
  journal   = {Theoretical and applied genetics},
  number    = {2},
  publisher = {Springer Nature},
  address   = {Berlin},
  issn      = {1432-2242},
  doi       = {10.1007/s00122-009-1074-6},
  pages     = {227 -- 237},
  year      = {2009},
  abstract  = {The main objective of this study was to identify genomic regions involved in biomass heterosis using QTL, generation means, and mode-of-inheritance classification analyses. In a modified North Carolina Design III we backcrossed 429 recombinant inbred line and 140 introgression line populations to the two parental accessions, C24 and Col-0, whose F 1 hybrid exhibited 44\% heterosis for biomass. Mid-parent heterosis in the RILs ranged from -31 to 99\% for dry weight and from -58 to 143\% for leaf area. We detected ten genomic positions involved in biomass heterosis at an early developmental stage, individually explaining between 2.4 and 15.7\% of the phenotypic variation. While overdominant gene action was prevalent in heterotic QTL, our results suggest that a combination of dominance, overdominance and epistasis is involved in biomass heterosis in this Arabidopsis cross.},
  language  = {en}
}
@misc{SteinfathGaertnerLisecetal.2009,
  author    = {Steinfath, Matthias and G{\"a}rtner, Tanja and Lisec, Jan and Meyer, Rhonda C. and Altmann, Thomas and Willmitzer, Lothar and Selbig, Joachim},
  title     = {Prediction of hybrid biomass in Arabidopsis thaliana by selected parental SNP and metabolic markers},
  series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe},
  journal   = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe},
  number    = {1324},
  issn      = {1866-8372},
  doi       = {10.25932/publishup-43111},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-431115},
  pages     = {9},
  year      = {2009},
  abstract  = {A recombinant inbred line (RIL) population, derived from two Arabidopsis thaliana accessions, and the corresponding testcrosses with these two original accessions were used for the development and validation of machine learning models to predict the biomass of hybrids. Genetic and metabolic information of the RILs served as predictors. Feature selection reduced the number of variables (genetic and metabolic markers) in the models by more than 80\% without impairing the predictive power. Thus, potential biomarkers have been revealed. Metabolites were shown to bear information on inherited macroscopic phenotypes. This proof of concept could be interesting for breeders. The example population exhibits substantial mid-parent biomass heterosis. The results of feature selection could therefore be used to shed light on the origin of heterosis. In this respect, mainly dominance effects were detected.},
  language  = {en}
}
@article{SteinfathGaertnerLisecetal.2009,
  author    = {Steinfath, Matthias and G{\"a}rtner, Tanja and Lisec, Jan and Meyer, Rhonda Christiane and Altmann, Thomas and Willmitzer, Lothar and Selbig, Joachim},
  title     = {Prediction of hybrid biomass in Arabidopsis thaliana by selected parental SNP and metabolic markers},
  series = {Theoretical and applied genetics : TAG ; international journal of plant breeding research},
  volume    = {120},
  journal   = {Theoretical and applied genetics : TAG ; international journal of plant breeding research},
  publisher = {Springer},
  address   = {Berlin},
  issn      = {0040-5752},
  doi       = {10.1007/s00122-009-1191-2},
  pages     = {239 -- 247},
  year      = {2009},
  abstract  = {A recombinant inbred line (RIL) population, derived from two Arabidopsis thaliana accessions, and the corresponding testcrosses with these two original accessions were used for the development and validation of machine learning models to predict the biomass of hybrids. Genetic and metabolic information of the RILs served as predictors. Feature selection reduced the number of variables (genetic and metabolic markers) in the models by more than 80\% without impairing the predictive power. Thus, potential biomarkers have been revealed. Metabolites were shown to bear information on inherited macroscopic phenotypes. This proof of concept could be interesting for breeders. The example population exhibits substantial mid-parent biomass heterosis. The results of feature selection could therefore be used to shed light on the origin of heterosis. In this respect, mainly dominance effects were detected.},
  language  = {en}
}
@article{SteuerGrossSelbigetal.2006,
  author    = {Steuer, Ralf and Gross, Thilo and Selbig, Joachim and Blasius, Bernd},
  title     = {Structural kinetic modeling of metabolic networks},
  series = {Proceedings of the National Academy of Sciences of the United States of America},
  volume    = {103},
  journal   = {Proceedings of the National Academy of Sciences of the United States of America},
  number    = {32},
  publisher = {National Academy of Sciences},
  address   = {Washington},
  issn      = {0027-8424},
  doi       = {10.1073/pnas.0600013103},
  pages     = {11868 -- 11873},
  year      = {2006},
  abstract  = {To develop and investigate detailed mathematical models of metabolic processes is one of the primary challenges in systems biology. However, despite considerable advance in the topological analysis of metabolic networks, kinetic modeling is still often severely hampered by inadequate knowledge of the enzyme-kinetic rate laws and their associated parameter values. Here we propose a method that aims to give a quantitative account of the dynamical capabilities of a metabolic system, without requiring any explicit information about the functional form of the rate equations. Our approach is based on constructing a local linear model at each point in parameter space, such that each element of the model is either directly experimentally accessible or amenable to a straightforward biochemical interpretation. This ensemble of local linear models, encompassing all possible explicit kinetic models, then allows for a statistical exploration of the comprehensive parameter space. The method is exemplified on two paradigmatic metabolic systems: the glycolytic pathway of yeast and a realistic-scale representation of the photosynthetic Calvin cycle.},
  language  = {en}
}
@article{SteuerHumburgSelbig2006,
  author    = {Steuer, Ralf and Humburg, Peter and Selbig, Joachim},
  title     = {Validation and functional annotation of expression-based clusters based on gene ontology},
  series = {BMC bioinformatics},
  volume    = {7},
  journal   = {BMC bioinformatics},
  number    = {380},
  publisher = {BioMed Central},
  address   = {London},
  issn      = {1471-2105},
  doi       = {10.1186/1471-2105-7-380},
  pages     = {12},
  year      = {2006},
  abstract  = {Background: The biological interpretation of large-scale gene expression data is one of the paramount challenges in current bioinformatics. In particular, placing the results in the context of other available functional genomics data, such as existing bio-ontologies, has already provided substantial improvement for detecting and categorizing genes of interest. One common approach is to look for functional annotations that are significantly enriched within a group or cluster of genes, as compared to a reference group. Results: In this work, we suggest the information-theoretic concept of mutual information to investigate the relationship between groups of genes, as given by data-driven clustering, and their respective functional categories. Drawing upon related approaches (Gibbons and Roth, Genome Research 12: 1574-1581, 2002), we seek to quantify to what extent individual attributes are sufficient to characterize a given group or cluster of genes. Conclusion: We show that the mutual information provides a systematic framework to assess the relationship between groups or clusters of genes and their functional annotations in a quantitative way. Within this framework, the mutual information allows us to address and incorporate several important issues, such as the interdependence of functional annotations and combinatorial combinations of attributes. It thus supplements and extends the conventional search for overrepresented attributes within a group or cluster of genes. In particular taking combinations of attributes into account, the mutual information opens the way to uncover specific functional descriptions of a group of genes or clustering result. All datasets and functional annotations used in this study are publicly available. All scripts used in the analysis are provided as additional files.},
  language  = {en}
}
@misc{RajasundaramSelbig2016,
  author    = {Rajasundaram, Dhivyaa and Selbig, Joachim},
  title     = {More effort — more results},
  series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe},
  journal   = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe},
  number    = {923},
  issn      = {1866-8372},
  doi       = {10.25932/publishup-44263},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-442639},
  pages     = {57 -- 61},
  year      = {2016},
  abstract  = {The development of 'omics' technologies has progressed to address complex biological questions that underlie various plant functions thereby producing copious amounts of data. The need to assimilate large amounts of data into biologically meaningful interpretations has necessitated the development of statistical methods to integrate multidimensional information. Throughout this review, we provide examples of recent outcomes of 'omics' data integration together with an overview of available statistical methods and tools.},
  language  = {en}
}
@misc{LarhlimiDavidSelbigetal.2012,
  author    = {Larhlimi, Abdelhalim and David, Laszlo and Selbig, Joachim and Bockmayr, Alexander},
  title     = {F2C2},
  series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe},
  journal   = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe},
  number    = {921},
  issn      = {1866-8372},
  doi       = {10.25932/publishup-43243},
  url       = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-432431},
  pages     = {11},
  year      = {2012},
  abstract  = {Background: Flux coupling analysis (FCA) has become a useful tool in the constraint-based analysis of genome-scale metabolic networks. FCA allows detecting dependencies between reaction fluxes of metabolic networks at steady-state. On the one hand, this can help in the curation of reconstructed metabolic networks by verifying whether the coupling between reactions is in agreement with the experimental findings. On the other hand, FCA can aid in defining intervention strategies to knock out target reactions. Results: We present a new method F2C2 for FCA, which is orders of magnitude faster than previous approaches. As a consequence, FCA of genome-scale metabolic networks can now be performed in a routine manner. Conclusions: We propose F2C2 as a fast tool for the computation of flux coupling in genome-scale metabolic networks. F2C2 is freely available for non-commercial use at https://sourceforge.net/projects/f2c2/files/.},
  language  = {en}
}