@misc{BarlowHartmannGonzalezetal.2020, author = {Barlow, Axel and Hartmann, Stefanie and Gonzalez, Javier and Hofreiter, Michael and Paijmans, Johanna L. A.}, title = {Consensify}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {1033}, issn = {1866-8372}, doi = {10.25932/publishup-47252}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-472521}, pages = {24}, year = {2020}, abstract = {A standard practise in palaeogenome analysis is the conversion of mapped short read data into pseudohaploid sequences, frequently by selecting a single high-quality nucleotide at random from the stack of mapped reads. This controls for biases due to differential sequencing coverage, but it does not control for differential rates and types of sequencing error, which are frequently large and variable in datasets obtained from ancient samples. These errors have the potential to distort phylogenetic and population clustering analyses, and to mislead tests of admixture using D statistics. We introduce Consensify, a method for generating pseudohaploid sequences, which controls for biases resulting from differential sequencing coverage while greatly reducing error rates. The error correction is derived directly from the data itself, without the requirement for additional genomic resources or simplifying assumptions such as contemporaneous sampling. For phylogenetic and population clustering analysis, we find that Consensify is less affected by artefacts than methods based on single read sampling. For D statistics, Consensify is more resistant to false positives and appears to be less affected by biases resulting from different laboratory protocols than other frequently used methods. Although Consensify is developed with palaeogenomic data in mind, it is applicable for any low to medium coverage short read datasets. We predict that Consensify will be a useful tool for future studies of palaeogenomes.}, language = {en} } @article{WestburyBalekaBarlowetal.2017, author = {Westbury, Michael V. and Baleka, Sina Isabelle and Barlow, Axel and Hartmann, Stefanie and Paijmans, Johanna L. A. and Kramarz, Alejandro and Forasiepi, Analia M. and Bond, Mariano and Gelfo, Javier N. and Reguero, Marcelo A. and Lopez-Mendoza, Patricio and Taglioretti, Matias and Scaglia, Fernando and Rinderknecht, Andres and Jones, Washington and Mena, Francisco and Billet, Guillaume and de Muizon, Christian and Luis Aguilar, Jose and MacPhee, Ross D. E. and Hofreiter, Michael}, title = {A mitogenomic timetree for Darwin's enigmatic South American mammal Macrauchenia patachonica}, series = {Nature Communications}, volume = {8}, journal = {Nature Communications}, publisher = {Nature Publ. Group}, address = {London}, issn = {2041-1723}, doi = {10.1038/ncomms15951}, pages = {8}, year = {2017}, abstract = {The unusual mix of morphological traits displayed by extinct South American native ungulates (SANUs) confounded both Charles Darwin, who first discovered them, and Richard Owen, who tried to resolve their relationships. Here we report an almost complete mitochondrial genome for the litoptern Macrauchenia. Our dated phylogenetic tree places Macrauchenia as sister to Perissodactyla, but close to the radiation of major lineages within Laurasiatheria. This position is consistent with a divergence estimate of B66Ma (95\% credibility interval, 56.64-77.83 Ma) obtained for the split between Macrauchenia and other Panperissodactyla. Combined with their morphological distinctiveness, this evidence supports the positioning of Litopterna (possibly in company with other SANU groups) as a separate order within Laurasiatheria. We also show that, when using strict criteria, extinct taxa marked by deep divergence times and a lack of close living relatives may still be amenable to palaeogenomic analysis through iterative mapping against more distant relatives.}, language = {en} } @misc{DennisBallesterosRobinetal.2020, author = {Dennis, Alice B. and Ballesteros, Gabriel I. and Robin, St{\´e}phanie and Schrader, Lukas and Bast, Jens and Bergh{\"o}fer, Jan and Beukeboom, Leo W. and Belghazi, Maya and Bretaudeau, Anthony and Buellesbach, Jan and Cash, Elizabeth and Colinet, Dominique and Dumas, Zo{\´e} and Errbii, Mohammed and Falabella, Patrizia and Gatti, Jean-Luc and Geuverink, Elzemiek and Gibson, Joshua D. and Hertaeg, Corinne and Hartmann, Stefanie and Jacquin-Joly, Emmanuelle and Lammers, Mark and Lavandero, Blas I. and Lindenbaum, Ina and Massardier-Galata, Lauriane and Meslin, Camille and Montagn{\´e}, Nicolas and Pak, Nina and Poiri{\´e}, Maryl{\`e}ne and Salvia, Rosanna and Smith, Chris R. and Tagu, Denis and Tares, Sophie and Vogel, Heiko and Schwander, Tanja and Simon, Jean-Christophe and Figueroa, Christian C. and Vorburger, Christoph and Legeai, Fabrice and Gadau, J{\"u}rgen}, title = {Functional insights from the GC-poor genomes of two aphid parasitoids, Aphidius ervi and Lysiphlebus fabarum}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {989}, issn = {1866-8372}, doi = {10.25932/publishup-47612}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-476129}, pages = {29}, year = {2020}, abstract = {Background Parasitoid wasps have fascinating life cycles and play an important role in trophic networks, yet little is known about their genome content and function. Parasitoids that infect aphids are an important group with the potential for biological control. Their success depends on adapting to develop inside aphids and overcoming both host aphid defenses and their protective endosymbionts. Results We present the de novo genome assemblies, detailed annotation, and comparative analysis of two closely related parasitoid wasps that target pest aphids: Aphidius ervi and Lysiphlebus fabarum (Hymenoptera: Braconidae: Aphidiinae). The genomes are small (139 and 141 Mbp) and the most AT-rich reported thus far for any arthropod (GC content: 25.8 and 23.8\%). This nucleotide bias is accompanied by skewed codon usage and is stronger in genes with adult-biased expression. AT-richness may be the consequence of reduced genome size, a near absence of DNA methylation, and energy efficiency. We identify missing desaturase genes, whose absence may underlie mimicry in the cuticular hydrocarbon profile of L. fabarum. We highlight key gene groups including those underlying venom composition, chemosensory perception, and sex determination, as well as potential losses in immune pathway genes. Conclusions These findings are of fundamental interest for insect evolution and biological control applications. They provide a strong foundation for further functional studies into coevolution between parasitoids and their hosts. Both genomes are available at https://bipaa.genouest.org.}, language = {en} } @misc{ParaskevopoulouDennisWeithoffetal.2019, author = {Paraskevopoulou, Sofia and Dennis, Alice B. and Weithoff, Guntram and Hartmann, Stefanie and Tiedemann, Ralph}, title = {Within species expressed genetic variability and gene expression response to different temperatures in the rotifer Brachionus calyciflorus sensu stricto}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {796}, issn = {1866-8372}, doi = {10.25932/publishup-44105}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-441050}, pages = {23}, year = {2019}, abstract = {Genetic divergence is impacted by many factors, including phylogenetic history, gene flow, genetic drift, and divergent selection. Rotifers are an important component of aquatic ecosystems, and genetic variation is essential to their ongoing adaptive diversification and local adaptation. In addition to coding sequence divergence, variation in gene expression may relate to variable heat tolerance, and can impose ecological barriers within species. Temperature plays a significant role in aquatic ecosystems by affecting species abundance, spatio-temporal distribution, and habitat colonization. Recently described (formerly cryptic) species of the Brachionus calyciflorus complex exhibit different temperature tolerance both in natural and in laboratory studies, and show that B. calyciflorus sensu stricto (s.s.) is a thermotolerant species. Even within B. calyciflorus s.s., there is a tendency for further temperature specializations. Comparison of expressed genes allows us to assess the impact of stressors on both expression and sequence divergence among disparate populations within a single species. Here, we have used RNA-seq to explore expressed genetic diversity in B. calyciflorus s.s. in two mitochondrial DNA lineages with different phylogenetic histories and differences in thermotolerance. We identify a suite of candidate genes that may underlie local adaptation, with a particular focus on the response to sustained high or low temperatures. We do not find adaptive divergence in established candidate genes for thermal adaptation. Rather, we detect divergent selection among our two lineages in genes related to metabolism (lipid metabolism, metabolism of xenobiotics).}, language = {en} } @article{AutenriethHartmannLahetal.2018, author = {Autenrieth, Marijke and Hartmann, Stefanie and Lah, Ljerka and Roos, Anna and Dennis, Alice B. and Tiedemann, Ralph}, title = {High-quality whole-genome sequence of an abundant Holarctic odontocete, the harbour porpoise (Phocoena phocoena)}, series = {Molecular ecology resources}, volume = {18}, journal = {Molecular ecology resources}, number = {6}, publisher = {Wiley}, address = {Hoboken}, issn = {1755-098X}, doi = {10.1111/1755-0998.12932}, pages = {1469 -- 1481}, year = {2018}, abstract = {The harbour porpoise (Phocoena phocoena) is a highly mobile cetacean found across the Northern hemisphere. It occurs in coastal waters and inhabits basins that vary broadly in salinity, temperature and food availability. These diverse habitats could drive subtle differentiation among populations, but examination of this would be best conducted with a robust reference genome. Here, we report the first harbour porpoise genome, assembled de novo from an individual originating in the Kattegat Sea (Sweden). The genome is one of the most complete cetacean genomes currently available, with a total size of 2.39 Gb and 50\% of the total length found in just 34 scaffolds. Using 122 of the longest scaffolds, we were able to show high levels of synteny with the genome of the domestic cattle (Bos taurus). Our draft annotation comprises 22,154 predicted genes, which we further annotated through matches to the NCBI nucleotide database, GO categorization and motif prediction. Within the predicted genes, we have confirmed the presence of >20 genes or gene families that have been associated with adaptive evolution in other cetaceans. Overall, this genome assembly and draft annotation represent a crucial addition to the genomic resources currently available for the study of porpoises and Phocoenidae evolution, phylogeny and conservation.}, language = {en} } @article{ParaskevopoulouDennisWeithoffetal.2019, author = {Paraskevopoulou, Sofia and Dennis, Alice B. and Weithoff, Guntram and Hartmann, Stefanie and Tiedemann, Ralph}, title = {Within species expressed genetic variability and gene expression response to different temperatures in the rotifer Brachionus calyciflorus sensu stricto}, series = {PLoS ONE}, volume = {9}, journal = {PLoS ONE}, number = {14}, publisher = {PLoS ONE}, address = {San Francisco, California}, issn = {1932-6203}, doi = {10.1371/journal.pone.0223134}, pages = {21}, year = {2019}, abstract = {Genetic divergence is impacted by many factors, including phylogenetic history, gene flow, genetic drift, and divergent selection. Rotifers are an important component of aquatic ecosystems, and genetic variation is essential to their ongoing adaptive diversification and local adaptation. In addition to coding sequence divergence, variation in gene expression may relate to variable heat tolerance, and can impose ecological barriers within species. Temperature plays a significant role in aquatic ecosystems by affecting species abundance, spatio-temporal distribution, and habitat colonization. Recently described (formerly cryptic) species of the Brachionus calyciflorus complex exhibit different temperature tolerance both in natural and in laboratory studies, and show that B. calyciflorus sensu stricto (s.s.) is a thermotolerant species. Even within B. calyciflorus s.s., there is a tendency for further temperature specializations. Comparison of expressed genes allows us to assess the impact of stressors on both expression and sequence divergence among disparate populations within a single species. Here, we have used RNA-seq to explore expressed genetic diversity in B. calyciflorus s.s. in two mitochondrial DNA lineages with different phylogenetic histories and differences in thermotolerance. We identify a suite of candidate genes that may underlie local adaptation, with a particular focus on the response to sustained high or low temperatures. We do not find adaptive divergence in established candidate genes for thermal adaptation. Rather, we detect divergent selection among our two lineages in genes related to metabolism (lipid metabolism, metabolism of xenobiotics).}, language = {en} } @article{HartmannPreickAbeltetal.2020, author = {Hartmann, Stefanie and Preick, Michaela and Abelt, Silke and Scheffel, Andr{\´e} and Hofreiter, Michael}, title = {Annotated genome sequences of the carnivorous plant Roridula gorgonias and a non-carnivorous relative, Clethra arborea}, series = {BMC Research Notes}, volume = {13}, journal = {BMC Research Notes}, publisher = {Biomed Central}, address = {London}, issn = {1756-0500}, doi = {10.1186/s13104-020-05254-4}, pages = {6}, year = {2020}, abstract = {Objective Plant carnivory is distributed across the tree of life and has evolved at least six times independently, but sequenced and annotated nuclear genomes of carnivorous plants are currently lacking. We have sequenced and structurally annotated the nuclear genome of the carnivorous Roridula gorgonias and that of a non-carnivorous relative, Madeira's lily-of-the-valley-tree, Clethra arborea, both within the Ericales. This data adds an important resource to study the evolutionary genetics of plant carnivory across angiosperm lineages and also for functional and systematic aspects of plants within the Ericales. Results Our assemblies have total lengths of 284 Mbp (R. gorgonias) and 511 Mbp (C. arborea) and show high BUSCO scores of 84.2\% and 89.5\%, respectively. We used their predicted genes together with publicly available data from other Ericales' genomes and transcriptomes to assemble a phylogenomic data set for the inference of a species tree. However, groups of orthologs showed a marked absence of species represented by a transcriptome. We discuss possible reasons and caution against combining predicted genes from genome- and transriptome-based assemblies.}, language = {en} } @article{BarlowHartmannGonzalezetal.2020, author = {Barlow, Axel and Hartmann, Stefanie and Gonzalez, Javier and Hofreiter, Michael and Paijmans, Johanna L. A.}, title = {Consensify}, series = {Genes / Molecular Diversity Preservation International}, volume = {11}, journal = {Genes / Molecular Diversity Preservation International}, number = {1}, publisher = {MDPI}, address = {Basel}, issn = {2073-4425}, doi = {10.3390/genes11010050}, pages = {22}, year = {2020}, abstract = {A standard practise in palaeogenome analysis is the conversion of mapped short read data into pseudohaploid sequences, frequently by selecting a single high-quality nucleotide at random from the stack of mapped reads. This controls for biases due to differential sequencing coverage, but it does not control for differential rates and types of sequencing error, which are frequently large and variable in datasets obtained from ancient samples. These errors have the potential to distort phylogenetic and population clustering analyses, and to mislead tests of admixture using D statistics. We introduce Consensify, a method for generating pseudohaploid sequences, which controls for biases resulting from differential sequencing coverage while greatly reducing error rates. The error correction is derived directly from the data itself, without the requirement for additional genomic resources or simplifying assumptions such as contemporaneous sampling. For phylogenetic and population clustering analysis, we find that Consensify is less affected by artefacts than methods based on single read sampling. For D statistics, Consensify is more resistant to false positives and appears to be less affected by biases resulting from different laboratory protocols than other frequently used methods. Although Consensify is developed with palaeogenomic data in mind, it is applicable for any low to medium coverage short read datasets. We predict that Consensify will be a useful tool for future studies of palaeogenomes.}, language = {en} } @misc{ZulawskiSchulzeBraginetsetal.2014, author = {Zulawski, Monika and Schulze, Gunnar and Braginets, Rostyslav and Hartmann, Stefanie and Schulze, Waltraud X}, title = {The Arabidopsis Kinome}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, number = {861}, issn = {1866-8372}, doi = {10.25932/publishup-43290}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-432907}, pages = {17}, year = {2014}, abstract = {Background Protein kinases constitute a particularly large protein family in Arabidopsis with important functions in cellular signal transduction networks. At the same time Arabidopsis is a model plant with high frequencies of gene duplications. Here, we have conducted a systematic analysis of the Arabidopsis kinase complement, the kinome, with particular focus on gene duplication events. We matched Arabidopsis proteins to a Hidden-Markov Model of eukaryotic kinases and computed a phylogeny of 942 Arabidopsis protein kinase domains and mapped their origin by gene duplication. Results The phylogeny showed two major clades of receptor kinases and soluble kinases, each of which was divided into functional subclades. Based on this phylogeny, association of yet uncharacterized kinases to families was possible which extended functional annotation of unknowns. Classification of gene duplications within these protein kinases revealed that representatives of cytosolic subfamilies showed a tendency to maintain segmentally duplicated genes, while some subfamilies of the receptor kinases were enriched for tandem duplicates. Although functional diversification is observed throughout most subfamilies, some instances of functional conservation among genes transposed from the same ancestor were observed. In general, a significant enrichment of essential genes was found among genes encoding for protein kinases. Conclusions The inferred phylogeny allowed classification and annotation of yet uncharacterized kinases. The prediction and analysis of syntenic blocks and duplication events within gene families of interest can be used to link functional biology to insights from an evolutionary viewpoint. The approach undertaken here can be applied to any gene family in any organism with an annotated genome.}, language = {en} } @misc{HartmannHasenkampMayeretal.2015, author = {Hartmann, Stefanie and Hasenkamp, Natascha and Mayer, Jens and Michaux, Johan and Morand, Serge and Mazzoni, Camila J. and Roca, Alfred L. and Greenwood, Alex D.}, title = {Endogenous murine leukemia retroviral variation across wild European and inbred strains of house mouse}, series = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Zweitver{\"o}ffentlichungen der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {1329}, issn = {1866-8372}, doi = {10.25932/publishup-43120}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-431200}, pages = {13}, year = {2015}, abstract = {Background: Endogenous murine leukemia retroviruses (MLVs) are high copy number proviral elements difficult to comprehensively characterize using standard low throughput sequencing approaches. However, high throughput approaches generate data that is challenging to process, interpret and present. Results: Next generation sequencing (NGS) data was generated for MLVs from two wild caught Mus musculus domesticus (from mainland France and Corsica) and for inbred laboratory mouse strains C3H, LP/J and SJL. Sequence reads were grouped using a novel sequence clustering approach as applied to retroviral sequences. A Markov cluster algorithm was employed, and the sequence reads were queried for matches to specific xenotropic (Xmv), polytropic (Pmv) and modified polytropic (Mpmv) viral reference sequences. Conclusions: Various MLV subtypes were more widespread than expected among the mice, which may be due to the higher coverage of NGS, or to the presence of similar sequence across many different proviral loci. The results did not correlate with variation in the major MLV receptor Xpr1, which can restrict exogenous MLVs, suggesting that endogenous MLV distribution may reflect gene flow more than past resistance to infection.}, language = {en} } @misc{HartmannVision2008, author = {Hartmann, Stefanie and Vision, Todd J.}, title = {Using ESTs for phylogenomics}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, number = {889}, issn = {1866-8372}, doi = {10.25932/publishup-43667}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-436670}, pages = {15}, year = {2008}, abstract = {Background While full genome sequences are still only available for a handful of taxa, large collections of partial gene sequences are available for many more. The alignment of partial gene sequences results in a multiple sequence alignment containing large gaps that are arranged in a staggered pattern. The consequences of this pattern of missing data on the accuracy of phylogenetic analysis are not well understood. We conducted a simulation study to determine the accuracy of phylogenetic trees obtained from gappy alignments using three commonly used phylogenetic reconstruction methods (Neighbor Joining, Maximum Parsimony, and Maximum Likelihood) and studied ways to improve the accuracy of trees obtained from such datasets. Results We found that the pattern of gappiness in multiple sequence alignments derived from partial gene sequences substantially compromised phylogenetic accuracy even in the absence of alignment error. The decline in accuracy was beyond what would be expected based on the amount of missing data. The decline was particularly dramatic for Neighbor Joining and Maximum Parsimony, where the majority of gappy alignments contained 25\% to 40\% incorrect quartets. To improve the accuracy of the trees obtained from a gappy multiple sequence alignment, we examined two approaches. In the first approach, alignment masking, potentially problematic columns and input sequences are excluded from from the dataset. Even in the absence of alignment error, masking improved phylogenetic accuracy up to 100-fold. However, masking retained, on average, only 83\% of the input sequences. In the second approach, alignment subdivision, the missing data is statistically modelled in order to retain as many sequences as possible in the phylogenetic analysis. Subdivision resulted in more modest improvements to alignment accuracy, but succeeded in including almost all of the input sequences. Conclusion These results demonstrate that partial gene sequences and gappy multiple sequence alignments can pose a major problem for phylogenetic analysis. The concern will be greatest for high-throughput phylogenomic analyses, in which Neighbor Joining is often the preferred method due to its computational efficiency. Both approaches can be used to increase the accuracy of phylogenetic inference from a gappy alignment. The choice between the two approaches will depend upon how robust the application is to the loss of sequences from the input set, with alignment masking generally giving a much greater improvement in accuracy but at the cost of discarding a larger number of the input sequences.}, language = {en} } @misc{SchedinaHartmannGrothetal.2014, author = {Schedina, Ina Maria and Hartmann, Stefanie and Groth, Detlef and Schlupp, Ingo and Tiedemann, Ralph}, title = {Comparative analysis of the gonadal transcriptomes of the all-female species Poecilia formosa and its maternal ancestor Poecilia mexicana}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-401420}, pages = {10}, year = {2014}, abstract = {Background The Amazon molly, Poecilia formosa (Teleostei: Poeciliinae) is an unisexual, all-female species. It evolved through the hybridisation of two closely related sexual species and exhibits clonal reproduction by sperm dependent parthenogenesis (or gynogenesis) where the sperm of a parental species is only used to activate embryogenesis of the apomictic, diploid eggs but does not contribute genetic material to the offspring. Here we provide and describe the first de novo assembled transcriptome of the Amazon molly in comparison with its maternal ancestor, the Atlantic molly Poecilia mexicana. The transcriptome data were produced through sequencing of single end libraries (100 bp) with the Illumina sequencing technique. Results 83,504,382 reads for the Amazon molly and 81,625,840 for the Atlantic molly were assembled into 127,283 and 78,961 contigs for the Amazon molly and the Atlantic molly, respectively. 63\% resp. 57\% of the contigs could be annotated with gene ontology terms after sequence similarity comparisons. Furthermore, we were able to identify genes normally involved in reproduction and especially in meiosis also in the transcriptome dataset of the apomictic reproducing Amazon molly. Conclusions We assembled and annotated the transcriptome of a non-model organism, the Amazon molly, without a reference genome (de novo). The obtained dataset is a fundamental resource for future research in functional and expression analysis. Also, the presence of 30 meiosis-specific genes within a species where no meiosis is known to take place is remarkable and raises new questions for future research.}, language = {en} } @article{StruckPaulHilletal.2011, author = {Struck, Torsten H. and Paul, Christiane and Hill, Natascha and Hartmann, Stefanie and Hoesel, Christoph and Kube, Michael and Lieb, Bernhard and Meyer, Achim and Tiedemann, Ralph and Purschke, Guenter and Bleidorn, Christoph}, title = {Phylogenomic analyses unravel annelid evolution}, series = {Nature : the international weekly journal of science}, volume = {471}, journal = {Nature : the international weekly journal of science}, number = {7336}, publisher = {Nature Publ. Group}, address = {London}, issn = {0028-0836}, doi = {10.1038/nature09864}, pages = {95 -- U113}, year = {2011}, abstract = {Annelida, the ringed worms, is a highly diverse animal phylum that includes more than 15,000 described species and constitutes the dominant benthic macrofauna from the intertidal zone down to the deep sea. A robust annelid phylogeny would shape our understanding of animal body-plan evolution and shed light on the bilaterian ground pattern. Traditionally, Annelida has been split into two major groups: Clitellata (earthworms and leeches) and polychaetes (bristle worms), but recent evidence suggests that other taxa that were once considered to be separate phyla (Sipuncula, Echiura and Siboglinidae (also known as Pogonophora)) should be included in Annelida(1-4). However, the deep-level evolutionary relationships of Annelida are still poorly understood, and a robust reconstruction of annelid evolutionary history is needed. Here we show that phylogenomic analyses of 34 annelid taxa, using 47,953 amino acid positions, recovered a well-supported phylogeny with strong support for major splits. Our results recover chaetopterids, myzostomids and sipunculids in the basal part of the tree, although the position of Myzostomida remains uncertain owing to its long branch. The remaining taxa are split into two clades: Errantia (which includes the model annelid Platynereis), and Sedentaria (which includes Clitellata). Ancestral character trait reconstructions indicate that these clades show adaptation to either an errant or a sedentary lifestyle, with alteration of accompanying morphological traits such as peristaltic movement, parapodia and sensory perception. Finally, life history characters in Annelida seem to be phylogenetically informative.}, language = {en} } @article{HartmannHelmNickeletal.2012, author = {Hartmann, Stefanie and Helm, Conrad and Nickel, Birgit and Meyer, Matthias and Struck, Torsten H. and Tiedemann, Ralph and Selbig, Joachim and Bleidorn, Christoph}, title = {Exploiting gene families for phylogenomic analysis of myzostomid transcriptome data}, series = {PLoS one}, volume = {7}, journal = {PLoS one}, number = {1}, publisher = {PLoS}, address = {San Fransisco}, issn = {1932-6203}, doi = {10.1371/journal.pone.0029843}, pages = {8}, year = {2012}, abstract = {Background: In trying to understand the evolutionary relationships of organisms, the current flood of sequence data offers great opportunities, but also reveals new challenges with regard to data quality, the selection of data for subsequent analysis, and the automation of steps that were once done manually for single-gene analyses. Even though genome or transcriptome data is available for representatives of most bilaterian phyla, some enigmatic taxa still have an uncertain position in the animal tree of life. This is especially true for myzostomids, a group of symbiotic ( or parasitic) protostomes that are either placed with annelids or flatworms. Methodology: Based on similarity criteria, Illumina-based transcriptome sequences of one myzostomid were compared to protein sequences of one additional myzostomid and 29 reference metazoa and clustered into gene families. These families were then used to investigate the phylogenetic position of Myzostomida using different approaches: Alignments of 989 sequence families were concatenated, and the resulting superalignment was analyzed under a Maximum Likelihood criterion. We also used all 1,878 gene trees with at least one myzostomid sequence for a supertree approach: the individual gene trees were computed and then reconciled into a species tree using gene tree parsimony. Conclusions: Superalignments require strictly orthologous genes, and both the gene selection and the widely varying amount of data available for different taxa in our dataset may cause anomalous placements and low bootstrap support. In contrast, gene tree parsimony is designed to accommodate multilocus gene families and therefore allows a much more comprehensive data set to be analyzed. Results of this supertree approach showed a well-resolved phylogeny, in which myzostomids were part of the annelid radiation, and major bilaterian taxa were found to be monophyletic.}, language = {en} } @article{HillLeowBleidornetal.2013, author = {Hill, Natascha and Leow, Alexander and Bleidorn, Christoph and Groth, Detlef and Tiedemann, Ralph and Selbig, Joachim and Hartmann, Stefanie}, title = {Analysis of phylogenetic signal in protostomial intron patterns using Mutual Information}, series = {Theory in biosciences}, volume = {132}, journal = {Theory in biosciences}, number = {2}, publisher = {Springer}, address = {New York}, issn = {1431-7613}, doi = {10.1007/s12064-012-0173-0}, pages = {93 -- 104}, year = {2013}, abstract = {Many deep evolutionary divergences still remain unresolved, such as those among major taxa of the Lophotrochozoa. As alternative phylogenetic markers, the intron-exon structure of eukaryotic genomes and the patterns of absence and presence of spliceosomal introns appear to be promising. However, given the potential homoplasy of intron presence, the phylogenetic analysis of this data using standard evolutionary approaches has remained a challenge. Here, we used Mutual Information (MI) to estimate the phylogeny of Protostomia using gene structure data, and we compared these results with those obtained with Dollo Parsimony. Using full genome sequences from nine Metazoa, we identified 447 groups of orthologous sequences with 21,732 introns in 4,870 unique intron positions. We determined the shared absence and presence of introns in the corresponding sequence alignments and have made this data available in "IntronBase", a web-accessible and downloadable SQLite database. Our results obtained using Dollo Parsimony are obviously misled through systematic errors that arise from multiple intron loss events, but extensive filtering of data improved the quality of the estimated phylogenies. Mutual Information, in contrast, performs better with larger datasets, but at the same time it requires a complete data set, which is difficult to obtain for orthologs from a large number of taxa. Nevertheless, Mutual Information-based distances proved to be useful in analyzing this kind of data, also because the estimation of MI-based distances is independent of evolutionary models and therefore no pre-definitions of ancestral and derived character states are necessary.}, language = {en} } @article{BurleighBansalEulensteinetal.2011, author = {Burleigh, J. Gordon and Bansal, Mukul S. and Eulenstein, Oliver and Hartmann, Stefanie and Wehe, Andre and Vision, Todd J.}, title = {Genome-Scale Phylogenetics inferring the plant tree of life from 18,896 gene trees}, series = {Systematic biology}, volume = {60}, journal = {Systematic biology}, number = {2}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {1063-5157}, doi = {10.1093/sysbio/syq072}, pages = {117 -- 125}, year = {2011}, abstract = {Phylogenetic analyses using genome-scale data sets must confront incongruence among gene trees, which in plants is exacerbated by frequent gene duplications and losses. Gene tree parsimony (GTP) is a phylogenetic optimization criterion in which a species tree that minimizes the number of gene duplications induced among a set of gene trees is selected. The run time performance of previous implementations has limited its use on large-scale data sets. We used new software that incorporates recent algorithmic advances to examine the performance of GTP on a plant data set consisting of 18,896 gene trees containing 510,922 protein sequences from 136 plant taxa (giving a combined alignment length of >2.9 million characters). The relationships inferred from the GTP analysis were largely consistent with previous large-scale studies of backbone plant phylogeny and resolved some controversial nodes. The placement of taxa that were present in few gene trees generally varied the most among GTP bootstrap replicates. Excluding these taxa either before or after the GTP analysis revealed high levels of phylogenetic support across plants. The analyses supported magnoliids sister to a eudicot + monocot clade and did not support the eurosid I and II clades. This study presents a nuclear genomic perspective on the broad-scale phylogenic relationships among plants, and it demonstrates that nuclear genes with a history of duplication and loss can be phylogenetically informative for resolving the plant tree of life.}, language = {en} } @article{SchroederBleidornHartmannetal.2009, author = {Schr{\"o}der, Christiane and Bleidorn, Christoph and Hartmann, Stefanie and Tiedemann, Ralph}, title = {Occurrence of Can-SINEs and intron sequence evolution supports robust phylogeny of pinniped carnivores and their terrestrial relatives}, issn = {0378-1119}, doi = {10.1016/j.gene.2009.06.012}, year = {2009}, abstract = {Investigating the dog genome we found 178965 introns with a moderate length of 200-1000 bp. A screening of these sequences against 23 different repeat libraries to find insertions of short interspersed elements (SINEs) detected 45276 SINEs. Virtually all of these SINEs (98\%) belong to the tRNA-derived Can-SINE family. Can-SINEs arose about 55 million years ago before Carnivora split into two basal groups, the Caniformia (doglike carnivores) and the Feliformia (cat-like carnivores). Genome comparisons of dog and cat recovered 506 putatively informative SINE loci for caniformian phylogeny. In this study we show how to use such genome information of model organisms to research the phylogeny of related non-model species of interest. Investigating a dataset including representatives of all major caniformian lineages, we analysed 24 randomly chosen loci for 22 taxa. All loci were amplifiable and revealed 17 parsimony- informative SINE insertions. The screening for informative SINE insertions yields a large amount of sequence information, in particular of introns, which contain reliable phylogenetic information as well. A phylogenetic analysis of intron- and SINE sequence data provided a statistically robust phylogeny which is congruent with the absence/presence pattern of our SINE markers. This phylogeny strongly supports a sistergroup relationship of Musteloidea and Pinnipedia. Within Pinnipedia, we see strong support from bootstrapping and the presence of a SINE insertion for a sistergroup relationship of the walrus with the Otariidae.}, language = {en} } @article{BartelHartmannLehmannetal.2012, author = {Bartel, Manuela and Hartmann, Stefanie and Lehmann, Karola and Postel, Kai and Quesada, Humberto and Philipp, Eva E. R. and Heilmann, Katja and Micheel, Burkhard and Stuckas, Heiko}, title = {Identification of sperm proteins as candidate biomarkers for the analysis of reproductive isolation in Mytilus: a case study for the enkurin locus}, series = {Marine biology : international journal on life in oceans and coastal waters}, volume = {159}, journal = {Marine biology : international journal on life in oceans and coastal waters}, number = {10}, publisher = {Springer}, address = {New York}, issn = {0025-3162}, doi = {10.1007/s00227-012-2005-7}, pages = {2195 -- 2207}, year = {2012}, abstract = {Sperm proteins of the marine sessile mussels of the Mytilus edulis species complex are models to investigate reproductive isolation and speciation. This study aimed at identifying sperm proteins and their corresponding genes. This was aided by the use of monoclonal antibodies that preferentially bind to yet unknown sperm molecules. By identifying their target molecules, this approach identified proteins with relevance to Mytilus sperm function. This procedure identified 16 proteins, for example, enkurin, laminin, porin and heat shock proteins. The potential use of these proteins as genetic markers to study reproductive isolation is exemplified by analysing the enkurin locus. Enkurin evolution is driven by purifying selection, the locus displays high levels of intraspecific variation and species-specific alleles group in distinct phylogenetic clusters. These findings characterize enkurin as informative candidate biomarker for analyses of clinal variation and differential introgression in hybrid zones, for example, to understand determinants of reproductive isolation in Baltic Mytilus populations.}, language = {en} } @article{BonizzoniBourjeaChenetal.2011, author = {Bonizzoni, Mariangela and Bourjea, Jerome and Chen, Bin and Crain, B. J. and Cui, Liwang and Fiorentino, V. and Hartmann, Stefanie and Hendricks, S. and Ketmaier, Valerio and Ma, Xiaoguang and Muths, Delphine and Pavesi, Laura and Pfautsch, Simone and Rieger, M. A. and Santonastaso, T. and Sattabongkot, Jetsumon and Taron, C. H. and Taron, D. J. and Tiedemann, Ralph and Yan, Guiyun and Zheng, Bin and Zhong, Daibin}, title = {Permanent genetic resources added to molecular ecology resources database 1 April 2011-31 May 2011}, series = {Molecular ecology resources}, volume = {11}, journal = {Molecular ecology resources}, number = {5}, publisher = {Wiley-Blackwell}, address = {Malden}, organization = {Mol Ecology Resources Primer Dev}, issn = {1755-098X}, doi = {10.1111/j.1755-0998.2011.03046.x}, pages = {935 -- 936}, year = {2011}, abstract = {This article documents the addition of 92 microsatellite marker loci to the Molecular Ecology Resources Database. Loci were developed for the following species: Anopheles minimus, An. sinensis, An. dirus, Calephelis mutica, Lutjanus kasmira, Murella muralis and Orchestia montagui. These loci were cross-tested on the following species: Calephelis arizonensi, Calephelis borealis, Calephelis nemesis, Calephelis virginiensis and Lutjanus bengalensis.}, language = {en} } @article{SchedinaPfautschHartmannetal.2014, author = {Schedina, Ina-Maria and Pfautsch, Simone and Hartmann, Stefanie and Dolgener, N. and Polgar, Anika and Bianco, Pier Giorgio and Tiedemann, Ralph and Ketmaier, Valerio}, title = {Isolation and characterization of eight microsatellite loci in the brook lamprey Lampetra planeri (Petromyzontiformes) using 454 sequence data}, series = {Journal of fish biology}, volume = {85}, journal = {Journal of fish biology}, number = {3}, publisher = {Wiley-Blackwell}, address = {Hoboken}, issn = {0022-1112}, doi = {10.1111/jfb.12470}, pages = {960 -- 964}, year = {2014}, abstract = {Eight polymorphic microsatellite loci were developed for the brook lamprey Lampetra planeri through 454 sequencing and their usefulness was tested in 45 individuals of both L. planeri and the river lamprey Lampetra fluviatilis. The number of alleles per loci ranged between two and five; the Italian and Irish populations had a mean expected heterozygosity of 0.388 and 0.424 and a mean observed heterozygosity of 0.418 and 0.411, respectively. (C) 2014 The Fisheries Society of the British Isles}, language = {en} }