@misc{BarlowHartmannGonzalezetal.2020, author = {Barlow, Axel and Hartmann, Stefanie and Gonzalez, Javier and Hofreiter, Michael and Paijmans, Johanna L. A.}, title = {Consensify}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {1033}, issn = {1866-8372}, doi = {10.25932/publishup-47252}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-472521}, pages = {24}, year = {2020}, abstract = {A standard practise in palaeogenome analysis is the conversion of mapped short read data into pseudohaploid sequences, frequently by selecting a single high-quality nucleotide at random from the stack of mapped reads. This controls for biases due to differential sequencing coverage, but it does not control for differential rates and types of sequencing error, which are frequently large and variable in datasets obtained from ancient samples. These errors have the potential to distort phylogenetic and population clustering analyses, and to mislead tests of admixture using D statistics. We introduce Consensify, a method for generating pseudohaploid sequences, which controls for biases resulting from differential sequencing coverage while greatly reducing error rates. The error correction is derived directly from the data itself, without the requirement for additional genomic resources or simplifying assumptions such as contemporaneous sampling. For phylogenetic and population clustering analysis, we find that Consensify is less affected by artefacts than methods based on single read sampling. For D statistics, Consensify is more resistant to false positives and appears to be less affected by biases resulting from different laboratory protocols than other frequently used methods. Although Consensify is developed with palaeogenomic data in mind, it is applicable for any low to medium coverage short read datasets. We predict that Consensify will be a useful tool for future studies of palaeogenomes.}, language = {en} } @article{BarlowHartmannGonzalezetal.2020, author = {Barlow, Axel and Hartmann, Stefanie and Gonzalez, Javier and Hofreiter, Michael and Paijmans, Johanna L. A.}, title = {Consensify}, series = {Genes / Molecular Diversity Preservation International}, volume = {11}, journal = {Genes / Molecular Diversity Preservation International}, number = {1}, publisher = {MDPI}, address = {Basel}, issn = {2073-4425}, doi = {10.3390/genes11010050}, pages = {22}, year = {2020}, abstract = {A standard practise in palaeogenome analysis is the conversion of mapped short read data into pseudohaploid sequences, frequently by selecting a single high-quality nucleotide at random from the stack of mapped reads. This controls for biases due to differential sequencing coverage, but it does not control for differential rates and types of sequencing error, which are frequently large and variable in datasets obtained from ancient samples. These errors have the potential to distort phylogenetic and population clustering analyses, and to mislead tests of admixture using D statistics. We introduce Consensify, a method for generating pseudohaploid sequences, which controls for biases resulting from differential sequencing coverage while greatly reducing error rates. The error correction is derived directly from the data itself, without the requirement for additional genomic resources or simplifying assumptions such as contemporaneous sampling. For phylogenetic and population clustering analysis, we find that Consensify is less affected by artefacts than methods based on single read sampling. For D statistics, Consensify is more resistant to false positives and appears to be less affected by biases resulting from different laboratory protocols than other frequently used methods. Although Consensify is developed with palaeogenomic data in mind, it is applicable for any low to medium coverage short read datasets. We predict that Consensify will be a useful tool for future studies of palaeogenomes.}, language = {en} } @article{SignorePaijmansHofreiteretal.2019, author = {Signore, Anthony V. and Paijmans, Johanna L. A. and Hofreiter, Michael and Fago, Angela and Weber, Roy E. and Springer, Mark S. and Campbell, Kevin L.}, title = {Emergence of a chimeric globin pseudogene and increased Hemoglobin Oxygen Affinity Underlie the evolution of aquatic specializations in Sirenia}, series = {Molecular biology and evolution}, volume = {36}, journal = {Molecular biology and evolution}, number = {6}, publisher = {Oxford Univ. Press}, address = {Oxford}, issn = {0737-4038}, doi = {10.1093/molbev/msz044}, pages = {1134 -- 1147}, year = {2019}, abstract = {As limits on O2 availability during submergence impose severe constraints on aerobic respiration, the oxygen binding globin proteins of marine mammals are expected to have evolved under strong evolutionary pressures during their land-to-sea transition. Here, we address this question for the order Sirenia by retrieving, annotating, and performing detailed selection analyses on the globin repertoire of the extinct Steller's sea cow (Hydrodamalis gigas), dugong (Dugong dugon), and Florida manatee (Trichechus manatus latirostris) in relation to their closest living terrestrial relatives (elephants and hyraxes). These analyses indicate most loci experienced elevated nucleotide substitution rates during their transition to a fully aquatic lifestyle. While most of these genes evolved under neutrality or strong purifying selection, the rate of nonsynonymous/synonymous replacements increased in two genes (Hbz-T1 and Hba-T1) that encode the α-type chains of hemoglobin (Hb) during each stage of life. Notably, the relaxed evolution of Hba-T1 is temporally coupled with the emergence of a chimeric pseudogene (Hba-T2/Hbq-ps) that contributed to the tandemly linked Hba-T1 of stem sirenians via interparalog gene conversion. Functional tests on recombinant Hb proteins from extant and ancestral sirenians further revealed that the molecular remodeling of Hba-T1 coincided with increased Hb-O2 affinity in early sirenians. Available evidence suggests that this trait evolved to maximize O2 extraction from finite lung stores and suppress tissue O2 offloading, thereby facilitating the low metabolic intensities of extant sirenians. In contrast, the derived reduction in Hb-O2 affinity in (sub)Arctic Steller's sea cows is consistent with fueling increased thermogenesis by these once colossal marine herbivores.}, language = {en} } @misc{TaronLellBarlowetal.2018, author = {Taron, Ulrike H. and Lell, Moritz and Barlow, Axel and Paijmans, Johanna L. A.}, title = {Testing of Alignment Parameters for Ancient Samples}, series = {Genes}, journal = {Genes}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-409683}, pages = {12}, year = {2018}, abstract = {High-throughput sequence data retrieved from ancient or other degraded samples has led to unprecedented insights into the evolutionary history of many species, but the analysis of such sequences also poses specific computational challenges. The most commonly used approach involves mapping sequence reads to a reference genome. However, this process becomes increasingly challenging with an elevated genetic distance between target and reference or with the presence of contaminant sequences with high sequence similarity to the target species. The evaluation and testing of mapping efficiency and stringency are thus paramount for the reliable identification and analysis of ancient sequences. In this paper, we present 'TAPAS', (Testing of Alignment Parameters for Ancient Samples), a computational tool that enables the systematic testing of mapping tools for ancient data by simulating sequence data reflecting the properties of an ancient dataset and performing test runs using the mapping software and parameter settings of interest. We showcase TAPAS by using it to assess and improve mapping strategy for a degraded sample from a banded linsang (Prionodon linsang), for which no closely related reference is currently available. This enables a 1.8-fold increase of the number of mapped reads without sacrificing mapping specificity. The increase of mapped reads effectively reduces the need for additional sequencing, thus making more economical use of time, resources, and sample material.}, language = {en} } @article{TaronLellBarlowetal.2018, author = {Taron, Ulrike H. and Lell, Moritz and Barlow, Axel and Paijmans, Johanna L. A.}, title = {Testing of Alignment Parameters for Ancient Samples}, series = {Genes}, volume = {9}, journal = {Genes}, number = {3}, publisher = {Molecular Diversity Preservation International}, address = {Basel}, issn = {2073-4425}, doi = {10.3390/genes9030157}, pages = {1 -- 12}, year = {2018}, abstract = {High-throughput sequence data retrieved from ancient or other degraded samples has led to unprecedented insights into the evolutionary history of many species, but the analysis of such sequences also poses specific computational challenges. The most commonly used approach involves mapping sequence reads to a reference genome. However, this process becomes increasingly challenging with an elevated genetic distance between target and reference or with the presence of contaminant sequences with high sequence similarity to the target species. The evaluation and testing of mapping efficiency and stringency are thus paramount for the reliable identification and analysis of ancient sequences. In this paper, we present 'TAPAS', (Testing of Alignment Parameters for Ancient Samples), a computational tool that enables the systematic testing of mapping tools for ancient data by simulating sequence data reflecting the properties of an ancient dataset and performing test runs using the mapping software and parameter settings of interest. We showcase TAPAS by using it to assess and improve mapping strategy for a degraded sample from a banded linsang (Prionodon linsang), for which no closely related reference is currently available. This enables a 1.8-fold increase of the number of mapped reads without sacrificing mapping specificity. The increase of mapped reads effectively reduces the need for additional sequencing, thus making more economical use of time, resources, and sample material.}, language = {en} } @article{TaronLellBarlowetal.2018, author = {Taron, Ulrike H. and Lell, Moritz and Barlow, Axel and Paijmans, Johanna L. A.}, title = {Testing of Alignment Parameters for Ancient Samples}, series = {Genese}, volume = {9}, journal = {Genese}, number = {3}, publisher = {MDPI}, address = {Basel}, issn = {2073-4425}, doi = {10.3390/genes9030157}, pages = {12}, year = {2018}, abstract = {High-throughput sequence data retrieved from ancient or other degraded samples has led to unprecedented insights into the evolutionary history of many species, but the analysis of such sequences also poses specific computational challenges. The most commonly used approach involves mapping sequence reads to a reference genome. However, this process becomes increasingly challenging with an elevated genetic distance between target and reference or with the presence of contaminant sequences with high sequence similarity to the target species. The evaluation and testing of mapping efficiency and stringency are thus paramount for the reliable identification and analysis of ancient sequences. In this paper, we present 'TAPAS', (Testing of Alignment Parameters for Ancient Samples), a computational tool that enables the systematic testing of mapping tools for ancient data by simulating sequence data reflecting the properties of an ancient dataset and performing test runs using the mapping software and parameter settings of interest. We showcase TAPAS by using it to assess and improve mapping strategy for a degraded sample from a banded linsang (Prionodon linsang), for which no closely related reference is currently available. This enables a 1.8-fold increase of the number of mapped reads without sacrificing mapping specificity. The increase of mapped reads effectively reduces the need for additional sequencing, thus making more economical use of time, resources, and sample material.}, language = {en} } @article{AlbertiGonzalezPaijmansetal.2018, author = {Alberti, Federica and Gonzalez, Javier and Paijmans, Johanna L. A. and Basler, Nikolas and Preick, Michaela and Henneberger, Kirstin and Trinks, Alexandra and Rabeder, Gernot and Conard, Nicholas J. and Muenzel, Susanne C. and Joger, Ulrich and Fritsch, Guido and Hildebrandt, Thomas and Hofreiter, Michael and Barlow, Axel}, title = {Optimized DNA sampling of ancient bones using Computed Tomography scans}, series = {Molecular ecology resources}, volume = {18}, journal = {Molecular ecology resources}, number = {6}, publisher = {Wiley}, address = {Hoboken}, issn = {1755-098X}, doi = {10.1111/1755-0998.12911}, pages = {1196 -- 1208}, year = {2018}, abstract = {The prevalence of contaminant microbial DNA in ancient bone samples represents the principal limiting factor for palaeogenomic studies, as it may comprise more than 99\% of DNA molecules obtained. Efforts to exclude or reduce this contaminant fraction have been numerous but also variable in their success. Here, we present a simple but highly effective method to increase the relative proportion of endogenous molecules obtained from ancient bones. Using computed tomography (CT) scanning, we identify the densest region of a bone as optimal for sampling. This approach accurately identifies the densest internal regions of petrous bones, which are known to be a source of high-purity ancient DNA. For ancient long bones, CT scans reveal a high-density outermost layer, which has been routinely removed and discarded prior to DNA extraction. For almost all long bones investigated, we find that targeted sampling of this outermost layer provides an increase in endogenous DNA content over that obtained from softer, trabecular bone. This targeted sampling can produce as much as 50-fold increase in the proportion of endogenous DNA, providing a directly proportional reduction in sequencing costs for shotgun sequencing experiments. The observed increases in endogenous DNA proportion are not associated with any reduction in absolute endogenous molecule recovery. Although sampling the outermost layer can result in higher levels of human contamination, some bones were found to have more contamination associated with the internal bone structures. Our method is highly consistent, reproducible and applicable across a wide range of bone types, ages and species. We predict that this discovery will greatly extend the potential to study ancient populations and species in the genomics era.}, language = {en} } @misc{WestburyBalekaBarlowetal.2017, author = {Westbury, Michael V. and Baleka, Sina Isabelle and Barlow, Axel and Hartmann, Stefanie and Paijmans, Johanna L. A. and Kramarz, Alejandro and Forasiepi, Anal{\´i}a M. and Bond, Mariano and Gelfo, Javier N. and Reguero, Marcelo A. and L{\´o}pez-Mendoza, Patricio and Taglioretti, Matias and Scaglia, Fernando and Rinderknecht, Andr{\´e}s and Jones, Washington and Mena, Francisco and Billet, Guillaume and de Muizon, Christian and Aguilar, Jos{\´e} Luis and MacPhee, Ross D.E. and Hofreiter, Michael}, title = {A mitogenomic timetree for Darwin's enigmatic South American mammal Macrauchenia patachonica}, series = {Postprints der Universit{\"a}t Potsdam Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam Mathematisch-Naturwissenschaftliche Reihe}, number = {793}, issn = {1866-8372}, doi = {10.25932/publishup-44080}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-440801}, pages = {8}, year = {2017}, abstract = {The unusual mix of morphological traits displayed by extinct South American native ungulates (SANUs) confounded both Charles Darwin, who first discovered them, and Richard Owen, who tried to resolve their relationships. Here we report an almost complete mitochondrial genome for the litoptern Macrauchenia. Our dated phylogenetic tree places Macrauchenia as sister to Perissodactyla, but close to the radiation of major lineages within Laurasiatheria. This position is consistent with a divergence estimate of B66Ma (95\% credibility interval, 56.64-77.83 Ma) obtained for the split between Macrauchenia and other Panperissodactyla. Combined with their morphological distinctiveness, this evidence supports the positioning of Litopterna (possibly in company with other SANU groups) as a separate order within Laurasiatheria. We also show that, when using strict criteria, extinct taxa marked by deep divergence times and a lack of close living relatives may still be amenable to palaeogenomic analysis through iterative mapping against more distant relatives.}, language = {en} } @misc{HofreiterPaijmansGoodchildetal.2015, author = {Hofreiter, Michael and Paijmans, Johanna L. A. and Goodchild, Helen and Speller, Camilla F. and Barlow, Axel and Gonzalez-Fortes, Gloria M. and Thomas, Jessica A. and Ludwig, Arne and Collins, Matthew J.}, title = {The future of ancient DNA}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch Naturwissenschaftliche Reihe}, number = {908}, issn = {1866-8372}, doi = {10.25932/publishup-43881}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-438816}, pages = {284 -- 295}, year = {2015}, abstract = {Technological innovations such as next generation sequencing and DNA hybridisation enrichment have resulted in multi-fold increases in both the quantity of ancient DNA sequence data and the time depth for DNA retrieval. To date, over 30 ancient genomes have been sequenced, moving from 0.7x coverage (mammoth) in 2008 to more than 50x coverage (Neanderthal) in 2014. Studies of rapid evolutionary changes, such as the evolution and spread of pathogens and the genetic responses of hosts, or the genetics of domestication and climatic adaptation, are developing swiftly and the importance of palaeogenomics for investigating evolutionary processes during the last million years is likely to increase considerably. However, these new datasets require new methods of data processing and analysis, as well as conceptual changes in interpreting the results. In this review we highlight important areas of future technical and conceptual progress and discuss research topics in the rapidly growing field of palaeogenomics.}, language = {en} } @article{HofreiterPaijmansGoodchildetal.2015, author = {Hofreiter, Michael and Paijmans, Johanna L. A. and Goodchild, Helen and Speller, Camilla F. and Barlow, Axel and Gonz{\´a}lez-Fortes, Gloria M. and Thomas, Jessica A. and Ludwig, Arne and Collins, Matthew J.}, title = {The future of ancient DNA: Technical advances and conceptual shifts}, series = {Bioessays : ideas that push the boundaries}, volume = {37}, journal = {Bioessays : ideas that push the boundaries}, number = {3}, publisher = {Wiley-Blackwell}, address = {Hoboken}, issn = {0265-9247}, doi = {10.1002/bies.201400160}, pages = {284 -- 293}, year = {2015}, abstract = {Technological innovations such as next generation sequencing and DNA hybridisation enrichment have resulted in multi-fold increases in both the quantity of ancient DNA sequence data and the time depth for DNA retrieval. To date, over 30 ancient genomes have been sequenced, moving from 0.7x coverage (mammoth) in 2008 to more than 50x coverage (Neanderthal) in 2014. Studies of rapid evolutionary changes, such as the evolution and spread of pathogens and the genetic responses of hosts, or the genetics of domestication and climatic adaptation, are developing swiftly and the importance of palaeogenomics for investigating evolutionary processes during the last million years is likely to increase considerably. However, these new datasets require new methods of data processing and analysis, as well as conceptual changes in interpreting the results. In this review we highlight important areas of future technical and conceptual progress and discuss research topics in the rapidly growing field of palaeogenomics.}, language = {en} }