@misc{BarlowHartmannGonzalezetal.2020, author = {Barlow, Axel and Hartmann, Stefanie and Gonzalez, Javier and Hofreiter, Michael and Paijmans, Johanna L. A.}, title = {Consensify}, series = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, journal = {Postprints der Universit{\"a}t Potsdam : Mathematisch-Naturwissenschaftliche Reihe}, number = {1033}, issn = {1866-8372}, doi = {10.25932/publishup-47252}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-472521}, pages = {24}, year = {2020}, abstract = {A standard practise in palaeogenome analysis is the conversion of mapped short read data into pseudohaploid sequences, frequently by selecting a single high-quality nucleotide at random from the stack of mapped reads. This controls for biases due to differential sequencing coverage, but it does not control for differential rates and types of sequencing error, which are frequently large and variable in datasets obtained from ancient samples. These errors have the potential to distort phylogenetic and population clustering analyses, and to mislead tests of admixture using D statistics. We introduce Consensify, a method for generating pseudohaploid sequences, which controls for biases resulting from differential sequencing coverage while greatly reducing error rates. The error correction is derived directly from the data itself, without the requirement for additional genomic resources or simplifying assumptions such as contemporaneous sampling. For phylogenetic and population clustering analysis, we find that Consensify is less affected by artefacts than methods based on single read sampling. For D statistics, Consensify is more resistant to false positives and appears to be less affected by biases resulting from different laboratory protocols than other frequently used methods. Although Consensify is developed with palaeogenomic data in mind, it is applicable for any low to medium coverage short read datasets. We predict that Consensify will be a useful tool for future studies of palaeogenomes.}, language = {en} } @article{BarlowHartmannGonzalezetal.2020, author = {Barlow, Axel and Hartmann, Stefanie and Gonzalez, Javier and Hofreiter, Michael and Paijmans, Johanna L. A.}, title = {Consensify}, series = {Genes / Molecular Diversity Preservation International}, volume = {11}, journal = {Genes / Molecular Diversity Preservation International}, number = {1}, publisher = {MDPI}, address = {Basel}, issn = {2073-4425}, doi = {10.3390/genes11010050}, pages = {22}, year = {2020}, abstract = {A standard practise in palaeogenome analysis is the conversion of mapped short read data into pseudohaploid sequences, frequently by selecting a single high-quality nucleotide at random from the stack of mapped reads. This controls for biases due to differential sequencing coverage, but it does not control for differential rates and types of sequencing error, which are frequently large and variable in datasets obtained from ancient samples. These errors have the potential to distort phylogenetic and population clustering analyses, and to mislead tests of admixture using D statistics. We introduce Consensify, a method for generating pseudohaploid sequences, which controls for biases resulting from differential sequencing coverage while greatly reducing error rates. The error correction is derived directly from the data itself, without the requirement for additional genomic resources or simplifying assumptions such as contemporaneous sampling. For phylogenetic and population clustering analysis, we find that Consensify is less affected by artefacts than methods based on single read sampling. For D statistics, Consensify is more resistant to false positives and appears to be less affected by biases resulting from different laboratory protocols than other frequently used methods. Although Consensify is developed with palaeogenomic data in mind, it is applicable for any low to medium coverage short read datasets. We predict that Consensify will be a useful tool for future studies of palaeogenomes.}, language = {en} } @article{DinevaVermaGonzalezManriqueetal.2020, author = {Dineva, Ekaterina Ivanova and Verma, Meetu and Gonzalez Manrique, Sergio Javier and Schwartz, Pavol and Denker, Carsten}, title = {Cloud model inversions of strong chromospheric absorption lines using principal component analysis}, series = {Astronomische Nachrichten = Astronomical notes}, volume = {341}, journal = {Astronomische Nachrichten = Astronomical notes}, number = {1}, publisher = {Wiley-VCH Verl.}, address = {Berlin}, issn = {0004-6337}, doi = {10.1002/asna.202013652}, pages = {64 -- 78}, year = {2020}, abstract = {High-resolution spectroscopy of strong chromospheric absorption lines delivers nowadays several millions of spectra per observing day, when using fast scanning devices to cover large regions on the solar surface. Therefore, fast and robust inversion schemes are needed to explore the large data volume. Cloud model (CM) inversions of the chromospheric H alpha line are commonly employed to investigate various solar features including filaments, prominences, surges, jets, mottles, and (macro-) spicules. The choice of the CM was governed by its intuitive description of complex chromospheric structures as clouds suspended above the solar surface by magnetic fields. This study is based on observations of active region NOAA 11126 in H alpha, which were obtained November 18-23, 2010 with the echelle spectrograph of the vacuum tower telescope at the Observatorio del Teide, Spain. Principal component analysis reduces the dimensionality of spectra and conditions noise-stripped spectra for CM inversions. Modeled H alpha intensity and contrast profiles as well as CM parameters are collected in a database, which facilitates efficient processing of the observed spectra. Physical maps are computed representing the line-core and continuum intensity, absolute contrast, equivalent width, and Doppler velocities, among others. Noise-free spectra expedite the analysis of bisectors. The data processing is evaluated in the context of "big data," in particular with respect to automatic classification of spectra.}, language = {en} }