@phdthesis{Floeter2005, author = {Fl{\"o}ter, Andr{\´e}}, title = {Analyzing biological expression data based on decision tree induction}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-6416}, school = {Universit{\"a}t Potsdam}, year = {2005}, abstract = {Modern biological analysis techniques supply scientists with various forms of data. One category of such data are the so called "expression data". These data indicate the quantities of biochemical compounds present in tissue samples. Recently, expression data can be generated at a high speed. This leads in turn to amounts of data no longer analysable by classical statistical techniques. Systems biology is the new field that focuses on the modelling of this information. At present, various methods are used for this purpose. One superordinate class of these meth­ods is machine learning. Methods of this kind had, until recently, predominantly been used for classification and prediction tasks. This neglected a powerful secondary benefit: the ability to induce interpretable models. Obtaining such models from data has become a key issue within Systems biology. Numerous approaches have been proposed and intensively discussed. This thesis focuses on the examination and exploitation of one basic technique: decision trees. The concept of comparing sets of decision trees is developed. This method offers the pos­sibility of identifying significant thresholds in continuous or discrete valued attributes through their corresponding set of decision trees. Finding significant thresholds in attributes is a means of identifying states in living organisms. Knowing about states is an invaluable clue to the un­derstanding of dynamic processes in organisms. Applied to metabolite concentration data, the proposed method was able to identify states which were not found with conventional techniques for threshold extraction. A second approach exploits the structure of sets of decision trees for the discovery of com­binatorial dependencies between attributes. Previous work on this issue has focused either on expensive computational methods or the interpretation of single decision trees ­ a very limited exploitation of the data. This has led to incomplete or unstable results. That is why a new method is developed that uses sets of decision trees to overcome these limitations. Both the introduced methods are available as software tools. They can be applied consecu­tively or separately. That way they make up a package of analytical tools that usefully supplement existing methods. By means of these tools, the newly introduced methods were able to confirm existing knowl­edge and to suggest interesting and new relationships between metabolites.}, subject = {Molekulare Bioinformatik}, language = {en} } @article{FloeterSelbigSchaub2004, author = {Fl{\"o}ter, Andr{\´e} and Selbig, Joachim and Schaub, Torsten H.}, title = {Finding metabolic pathways in decision forests}, isbn = {3-540-23221-4}, year = {2004}, language = {en} } @article{FloeterNicolasSchaubetal.2004, author = {Fl{\"o}ter, Andr{\´e} and Nicolas, Jacques and Schaub, Torsten H. and Selbig, Joachim}, title = {Threshold extraction in metabolite concentration data}, year = {2004}, abstract = {Motivation: Continued development of analytical techniques based on gas chromatography and mass spectrometry now facilitates the generation of larger sets of metabolite concentration data. An important step towards the understanding of metabolite dynamics is the recognition of stable states where metabolite concentrations exhibit a simple behaviour. Such states can be characterized through the identification of significant thresholds in the concentrations. But general techniques for finding discretization thresholds in continuous data prove to be practically insufficient for detecting states due to the weak conditional dependences in concentration data. Results: We introduce a method of recognizing states in the framework of decision tree induction. It is based upon a global analysis of decision forests where stability and quality are evaluated. It leads to the detection of thresholds that are both comprehensible and robust. Applied to metabolite concentration data, this method has led to the discovery of hidden states in the corresponding variables. Some of these reflect known properties of the biological experiments, and others point to putative new states}, language = {en} } @article{FloeterNicolasSchaubetal.2003, author = {Fl{\"o}ter, Andr{\´e} and Nicolas, Jacques and Schaub, Torsten H. and Selbig, Joachim}, title = {Threshold extraction in metabolite concentration data}, year = {2003}, language = {en} }