@book{DraisbachNaumannSzottetal.2012, author = {Draisbach, Uwe and Naumann, Felix and Szott, Sascha and Wonneberg, Oliver}, title = {Adaptive windows for duplicate detection}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-143-1}, issn = {1613-5652}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-53007}, publisher = {Universit{\"a}t Potsdam}, pages = {41}, year = {2012}, abstract = {Duplicate detection is the task of identifying all groups of records within a data set that represent the same real-world entity, respectively. This task is difficult, because (i) representations might differ slightly, so some similarity measure must be defined to compare pairs of records and (ii) data sets might have a high volume making a pair-wise comparison of all records infeasible. To tackle the second problem, many algorithms have been suggested that partition the data set and compare all record pairs only within each partition. One well-known such approach is the Sorted Neighborhood Method (SNM), which sorts the data according to some key and then advances a window over the data comparing only records that appear within the same window. We propose several variations of SNM that have in common a varying window size and advancement. The general intuition of such adaptive windows is that there might be regions of high similarity suggesting a larger window size and regions of lower similarity suggesting a smaller window size. We propose and thoroughly evaluate several adaption strategies, some of which are provably better than the original SNM in terms of efficiency (same results with fewer comparisons).}, language = {en} } @book{BauckmannAbedjanLeseretal.2012, author = {Bauckmann, Jana and Abedjan, Ziawasch and Leser, Ulf and M{\"u}ller, Heiko and Naumann, Felix}, title = {Covering or complete? : Discovering conditional inclusion dependencies}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-212-4}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-62089}, publisher = {Universit{\"a}t Potsdam}, pages = {34}, year = {2012}, abstract = {Data dependencies, or integrity constraints, are used to improve the quality of a database schema, to optimize queries, and to ensure consistency in a database. In the last years conditional dependencies have been introduced to analyze and improve data quality. In short, a conditional dependency is a dependency with a limited scope defined by conditions over one or more attributes. Only the matching part of the instance must adhere to the dependency. In this paper we focus on conditional inclusion dependencies (CINDs). We generalize the definition of CINDs, distinguishing covering and completeness conditions. We present a new use case for such CINDs showing their value for solving complex data quality tasks. Further, we define quality measures for conditions inspired by precision and recall. We propose efficient algorithms that identify covering and completeness conditions conforming to given quality thresholds. Our algorithms choose not only the condition values but also the condition attributes automatically. Finally, we show that our approach efficiently provides meaningful and helpful results for our use case.}, language = {en} } @article{AbramowskiAceroAharonianetal.2012, author = {Abramowski, Attila and Acero, F. and Aharonian, Felix A. and Akhperjanian, A. G. and Anton, Gisela and Balzer, Arnim and Barnacka, Anna and de Almeida, U. Barres and Becherini, Yvonne and Becker, J. and Behera, B. and Bernl{\"o}hr, K. and Birsin, E. and Biteau, Jonathan and Bochow, A. and Boisson, Catherine and Bolmont, J. and Bordas, Pol and Brucker, J. and Brun, Francois and Brun, Pierre and Bulik, Tomasz and Buesching, I. and Carrigan, Svenja and Casanova, Sabrina and Cerruti, M. and Chadwick, Paula M. and Charbonnier, A. and Chaves, Ryan C. G. and Cheesebrough, A. and Clapson, A. C. and Coignet, G. and Cologna, Gabriele and Conrad, Jan and Dalton, M. and Daniel, M. K. and Davids, I. D. and Degrange, B. and Deil, C. and Dickinson, H. J. and Djannati-Ata{\"i}, A. and Domainko, W. and Drury, L. O'C. and Dubus, G. and Dutson, K. and Dyks, J. and Dyrda, M. and Egberts, Kathrin and Eger, P. and Espigat, P. and Fallon, L. and Farnier, C. and Fegan, S. and Feinstein, F. and Fernandes, M. V. and Fiasson, A. and Fontaine, G. and Foerster, A. and Fuessling, M. and Gallant, Y. A. and Gast, H. and Gerard, L. and Gerbig, D. and Giebels, B. and Glicenstein, J. F. and Glueck, B. and Goret, P. and Goering, D. and Haeffner, S. and Hague, J. D. and Hampf, D. and Hauser, M. and Heinz, S. and Heinzelmann, G. and Henri, G. and Hermann, G. and Hinton, James Anthony and Hoffmann, A. and Hofmann, W. and Hofverberg, P. and Holler, M. and Horns, D. and Jacholkowska, A. and de Jager, O. C. and Jahn, C. and Jamrozy, M. and Jung, I. and Kastendieck, M. A. and Katarzynski, K. and Katz, U. and Kaufmann, S. and Keogh, D. and Khangulyan, D. and Khelifi, B. and Klochkov, D. and Kluzniak, W. and Kneiske, T. and Komin, Nu. and Kosack, K. and Kossakowski, R. and Laffon, H. and Lamanna, G. and Lennarz, D. and Lohse, T. and Lopatin, A. and Lu, C. -C. and Marandon, V. and Marcowith, Alexandre and Masbou, J. and Maurin, D. and Maxted, N. and Mayer, M. and McComb, T. J. L. and Medina, M. C. and Mehault, J. and Moderski, R. and Moulin, Emmanuel and Naumann, C. L. and Naumann-Godo, M. and de Naurois, M. and Nedbal, D. and Nekrassov, D. and Nguyen, N. and Nicholas, B. and Niemiec, J. and Nolan, S. J. and Ohm, S. and Wilhelmi, E. de Ona and Opitz, B. and Ostrowski, M. and Oya, I. and Panter, M. and Arribas, M. Paz and Pedaletti, G. and Pelletier, G. and Petrucci, P. -O. and Pita, S. and Puehlhofer, G. and Punch, M. and Quirrenbach, A. and Raue, M. and Rayner, S. M. and Reimer, A. and Reimer, O. and Renaud, M. and de los Reyes, R. and Rieger, F. and Ripken, J. and Rob, L. and Rosier-Lees, S. and Rowell, G. and Rudak, B. and Rulten, C. B. and Ruppel, J. and Sahakian, V. and Sanchez, David M. and Santangelo, Andrea and Schlickeiser, R. and Schoeck, F. M. and Schulz, A. and Schwanke, U. and Schwarzburg, S. and Schwemmer, S. and Sheidaei, F. and Skilton, J. L. and Sol, H. and Spengler, G. and Stawarz, L. and Steenkamp, R. and Stegmann, Christian and Stinzing, F. and Stycz, K. and Sushch, Iurii and Szostek, A. and Tavernet, J. -P. and Terrier, R. and Tluczykont, M. and Valerius, K. and van Eldik, C. and Vasileiadis, G. and Venter, C. and Vialle, J. P. and Viana, A. and Vincent, P. and Voelk, H. J. and Volpe, F. and Vorobiov, S. and Vorster, M. and Wagner, S. J. and Ward, M. and White, R. and Wierzcholska, A. and Zacharias, M. and Zajczyk, A. and Zdziarski, A. A. and Zech, Alraune and Zechlin, H. -S. and Aleksic, J. and Antonelli, L. A. and Antoranz, P. and Backes, Michael and Barrio, J. A. and Bastieri, D. and Becerra Gonzalez, J. and Bednarek, W. and Berdyugin, A. and Berger, K. and Bernardini, E. and Biland, A. and Blanch Bigas, O. and Bock, R. K. and Boller, A. and Bonnoli, G. and Tridon, D. Borla and Braun, I. and Bretz, T. and Canellas, A. and Carmona, E. and Carosi, A. and Colin, P. and Colombo, E. and Contreras, J. L. and Cortina, J. and Cossio, L. and Covino, S. and Dazzi, F. and De Angelis, A. and De Cea del Pozo, E. and De Lotto, B. and Delgado Mendez, C. and Diago Ortega, A. and Doert, M. and Dominguez, A. and Prester, Dijana Dominis and Dorner, D. and Doro, M. and Elsaesser, D. and Ferenc, D. and Fonseca, M. V. and Font, L. and Fruck, C. and Garcia Lopez, R. J. and Garczarczyk, M. and Garrido, D. and Giavitto, G. and Godinovic, N. and Hadasch, D. and Haefner, D. and Herrero, A. and Hildebrand, D. and Hoehne-Moench, D. and Hose, J. and Hrupec, D. and Huber, B. and Jogler, T. and Klepser, S. and Kraehenbuehl, T. and Krause, J. and La Barbera, A. and Lelas, D. and Leonardo, E. and Lindfors, E. and Lombardi, S. and Lopez, M. and Lorenz, E. and Makariev, M. and Maneva, G. and Mankuzhiyil, N. and Mannheim, K. and Maraschi, L. and Mariotti, M. and Martinez, M. and Mazin, D. and Meucci, M. and Miranda, J. M. and Mirzoyan, R. and Miyamoto, H. and Moldon, J. and Moralejo, A. and Munar, P. and Nieto, D. and Nilsson, K. and Orito, R. and Oya, I. and Paneque, D. and Paoletti, R. and Pardo, S. and Paredes, J. M. and Partini, S. and Pasanen, M. and Pauss, F. and Perez-Torres, M. A. and Persic, M. and Peruzzo, L. and Pilia, M. and Pochon, J. and Prada, F. and Moroni, P. G. Prada and Prandini, E. and Puljak, I. and Reichardt, I. and Reinthal, R. and Rhode, W. and Ribo, M. and Rico, J. and Ruegamer, S. and Saggion, A. and Saito, K. and Saito, T. Y. and Salvati, M. and Satalecka, K. and Scalzotto, V. and Scapin, V. and Schultz, C. and Schweizer, T. and Shayduk, M. and Shore, S. N. and Sillanpaa, A. and Sitarek, J. and Sobczynska, D. and Spanier, F. and Spiro, S. and Stamerra, A. and Steinke, B. and Storz, J. and Strah, N. and Suric, T. and Takalo, L. and Takami, H. and Tavecchio, F. and Temnikov, P. and Terzic, T. and Tescaro, D. and Teshima, M. and Thom, M. and Tibolla, O. and Torres, D. F. and Treves, A. and Vankov, H. and Vogler, P. and Wagner, R. M. and Weitzel, Q. and Zabalza, V. and Zandanel, F. and Zanin, R. and Arlen, T. and Aune, T. and Beilicke, M. and Benbow, W. and Bouvier, A. and Bradbury, S. M. and Buckley, J. H. and Bugaev, V. and Byrum, K. and Cannon, A. and Cesarini, A. and Ciupik, L. and Connolly, M. P. and Cui, W. and Dickherber, R. and Duke, C. and Errando, M. and Falcone, A. and Finley, J. P. and Finnegan, G. and Fortson, L. and Furniss, A. and Galante, N. and Gall, D. and Godambe, S. and Griffin, S. and Grube, J. and Gyuk, G. and Hanna, D. and Holder, J. and Huan, H. and Hui, C. M. and Kaaret, P. and Karlsson, N. and Kertzman, M. and Khassen, Y. and Kieda, D. and Krawczynski, H. and Krennrich, F. and Lang, M. J. and LeBohec, S. and Maier, G. and McArthur, S. and McCann, A. and Moriarty, P. and Mukherjee, R. and Nunez, P. D. and Ong, R. A. and Orr, M. and Otte, A. N. and Park, N. and Perkins, J. S. and Pichel, A. and Pohl, Martin and Prokoph, H. and Ragan, K. and Reyes, L. C. and Reynolds, P. T. and Roache, E. and Rose, H. J. and Ruppel, J. and Schroedter, M. and Sembroski, G. H. and Sentuerk, G. D. and Telezhinsky, Igor O. and Tesic, G. and Theiling, M. and Thibadeau, S. and Varlotta, A. and Vassiliev, V. V. and Vivier, M. and Wakely, S. P. and Weekes, T. C. and Williams, D. A. and Zitzer, B. and de Almeida, U. Barres and Cara, M. and Casadio, C. and Cheung, C. C. and McConville, W. and Davies, F. and Doi, A. and Giovannini, G. and Giroletti, M. and Hada, K. and Hardee, P. and Harris, D. E. and Junor, W. and Kino, M. and Lee, N. P. and Ly, C. and Madrid, J. and Massaro, F. and Mundell, C. G. and Nagai, H. and Perlman, E. S. and Steele, I. A. and Walker, R. C. and Wood, D. L.}, title = {The 2010 very high energy gamma-ray flare and 10 years ofmulti-wavelength oservations of M 87}, series = {The astrophysical journal : an international review of spectroscopy and astronomical physics}, volume = {746}, journal = {The astrophysical journal : an international review of spectroscopy and astronomical physics}, number = {2}, publisher = {IOP Publ. Ltd.}, address = {Bristol}, organization = {HESS Collaboration, MAGIC Collaboration, VERITAS Collaboration}, issn = {0004-637X}, doi = {10.1088/0004-637X/746/2/151}, pages = {18}, year = {2012}, abstract = {The giant radio galaxy M 87 with its proximity (16 Mpc), famous jet, and very massive black hole ((3-6) x 10(9) M-circle dot) provides a unique opportunity to investigate the origin of very high energy (VHE; E > 100 GeV) gamma-ray emission generated in relativistic outflows and the surroundings of supermassive black holes. M 87 has been established as a VHE gamma-ray emitter since 2006. The VHE gamma-ray emission displays strong variability on timescales as short as a day. In this paper, results from a joint VHE monitoring campaign on M 87 by the MAGIC and VERITAS instruments in 2010 are reported. During the campaign, a flare at VHE was detected triggering further observations at VHE (H.E.S.S.), X-rays (Chandra), and radio (43 GHz Very Long Baseline Array, VLBA). The excellent sampling of the VHE gamma-ray light curve enables one to derive a precise temporal characterization of the flare: the single, isolated flare is well described by a two-sided exponential function with significantly different flux rise and decay times of tau(rise)(d) = (1.69 +/- 0.30) days and tau(decay)(d) = (0.611 +/- 0.080) days, respectively. While the overall variability pattern of the 2010 flare appears somewhat different from that of previous VHE flares in 2005 and 2008, they share very similar timescales (similar to day), peak fluxes (Phi(>0.35 TeV) similar or equal to (1-3) x 10(-11) photons cm(-2) s(-1)), and VHE spectra. VLBA radio observations of 43 GHz of the inner jet regions indicate no enhanced flux in 2010 in contrast to observations in 2008, where an increase of the radio flux of the innermost core regions coincided with a VHE flare. On the other hand, Chandra X-ray observations taken similar to 3 days after the peak of the VHE gamma-ray emission reveal an enhanced flux from the core (flux increased by factor similar to 2; variability timescale <2 days). The long-term (2001-2010) multi-wavelength (MWL) light curve of M 87, spanning from radio to VHE and including data from Hubble Space Telescope, Liverpool Telescope, Very Large Array, and European VLBI Network, is used to further investigate the origin of the VHE gamma-ray emission. No unique, common MWL signature of the three VHE flares has been identified. In the outer kiloparsec jet region, in particular in HST-1, no enhanced MWL activity was detected in 2008 and 2010, disfavoring it as the origin of the VHE flares during these years. Shortly after two of the three flares (2008 and 2010), the X-ray core was observed to be at a higher flux level than its characteristic range (determined from more than 60 monitoring observations: 2002-2009). In 2005, the strong flux dominance of HST-1 could have suppressed the detection of such a feature. Published models for VHE gamma-ray emission from M 87 are reviewed in the light of the new data.}, language = {en} } @book{AlbrechtNaumann2012, author = {Albrecht, Alexander and Naumann, Felix}, title = {Understanding cryptic schemata in large extract-transform-load systems}, publisher = {Universit{\"a}tsverlag Potsdam}, address = {Potsdam}, isbn = {978-3-86956-201-8}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus-61257}, publisher = {Universit{\"a}t Potsdam}, pages = {19}, year = {2012}, abstract = {Extract-Transform-Load (ETL) tools are used for the creation, maintenance, and evolution of data warehouses, data marts, and operational data stores. ETL workflows populate those systems with data from various data sources by specifying and executing a DAG of transformations. Over time, hundreds of individual workflows evolve as new sources and new requirements are integrated into the system. The maintenance and evolution of large-scale ETL systems requires much time and manual effort. A key problem is to understand the meaning of unfamiliar attribute labels in source and target databases and ETL transformations. Hard-to-understand attribute labels lead to frustration and time spent to develop and understand ETL workflows. We present a schema decryption technique to support ETL developers in understanding cryptic schemata of sources, targets, and ETL transformations. For a given ETL system, our recommender-like approach leverages the large number of mapped attribute labels in existing ETL workflows to produce good and meaningful decryptions. In this way we are able to decrypt attribute labels consisting of a number of unfamiliar few-letter abbreviations, such as UNP_PEN_INT, which we can decrypt to UNPAID_PENALTY_INTEREST. We evaluate our schema decryption approach on three real-world repositories of ETL workflows and show that our approach is able to suggest high-quality decryptions for cryptic attribute labels in a given schema.}, language = {en} }