@article{WangYangMeinel2018,
  author    = {Wang, Cheng and Yang, Haojin and Meinel, Christoph},
  title     = {Image Captioning with Deep Bidirectional LSTMs and Multi-Task Learning},
  series = {ACM transactions on multimedia computing, communications, and applications},
  volume    = {14},
  journal   = {ACM transactions on multimedia computing, communications, and applications},
  number    = {2},
  publisher = {Association for Computing Machinery},
  address   = {New York},
  issn      = {1551-6857},
  doi       = {10.1145/3115432},
  pages     = {20},
  year      = {2018},
  abstract  = {Generating a novel and descriptive caption of an image is drawing increasing interests in computer vision, natural language processing, and multimedia communities. In this work, we propose an end-to-end trainable deep bidirectional LSTM (Bi-LSTM (Long Short-Term Memory)) model to address the problem. By combining a deep convolutional neural network (CNN) and two separate LSTM networks, our model is capable of learning long-term visual-language interactions by making use of history and future context information at high-level semantic space. We also explore deep multimodal bidirectional models, in which we increase the depth of nonlinearity transition in different ways to learn hierarchical visual-language embeddings. Data augmentation techniques such as multi-crop, multi-scale, and vertical mirror are proposed to prevent over-fitting in training deep models. To understand how our models "translate" image to sentence, we visualize and qualitatively analyze the evolution of Bi-LSTM internal states over time. The effectiveness and generality of proposed models are evaluated on four benchmark datasets: Flickr8K, Flickr30K, MSCOCO, and Pascal1K datasets. We demonstrate that Bi-LSTM models achieve highly competitive performance on both caption generation and image-sentence retrieval even without integrating an additional mechanism (e.g., object detection, attention model). Our experiments also prove that multi-task learning is beneficial to increase model generality and gain performance. We also demonstrate the performance of transfer learning of the Bi-LSTM model significantly outperforms previous methods on the Pascal1K dataset.},
  language  = {en}
}
@article{ShilonKrausBuecheleetal.2018,
  author    = {Shilon, I. and Kraus, M. and B{\"u}chele, M. and Egberts, Kathrin and Fischer, Tobias and Holch, Tim Lukas and Lohse, T. and Schwanke, U. and Steppa, Constantin Beverly and Funk, Stefan},
  title     = {Application of deep learning methods to analysis of imaging atmospheric Cherenkov telescopes data},
  series = {Astroparticle physics},
  volume    = {105},
  journal   = {Astroparticle physics},
  publisher = {Elsevier},
  address   = {Amsterdam},
  issn      = {0927-6505},
  doi       = {10.1016/j.astropartphys.2018.10.003},
  pages     = {44 -- 53},
  year      = {2018},
  abstract  = {Ground based gamma-ray observations with Imaging Atmospheric Cherenkov Telescopes (IACTs) play a significant role in the discovery of very high energy (E > 100 GeV) gamma-ray emitters. The analysis of IACT data demands a highly efficient background rejection technique, as well as methods to accurately determine the position of its source in the sky and the energy of the recorded gamma-ray. We present results for background rejection and signal direction reconstruction from first studies of a novel data analysis scheme for IACT measurements. The new analysis is based on a set of Convolutional Neural Networks (CNNs) applied to images from the four H.E.S.S. phase-I telescopes. As the H.E.S.S. cameras pixels are arranged in a hexagonal array, we demonstrate two ways to use such image data to train CNNs: by resampling the images to a square grid and by applying modified convolution kernels that conserve the hexagonal grid properties. The networks were trained on sets of Monte-Carlo simulated events and tested on both simulations and measured data from the H.E.S.S. array. A comparison between the CNN analysis to current state-of-the-art algorithms reveals a clear improvement in background rejection performance. When applied to H.E.S.S. observation data, the CNN direction reconstruction performs at a similar level as traditional methods. These results serve as a proof-of-concept for the application of CNNs to the analysis of events recorded by IACTs. (C) 2018 Published by Elsevier B.V.},
  language  = {en}
}