@article{WangYangMeinel2018, author = {Wang, Cheng and Yang, Haojin and Meinel, Christoph}, title = {Image Captioning with Deep Bidirectional LSTMs and Multi-Task Learning}, series = {ACM transactions on multimedia computing, communications, and applications}, volume = {14}, journal = {ACM transactions on multimedia computing, communications, and applications}, number = {2}, publisher = {Association for Computing Machinery}, address = {New York}, issn = {1551-6857}, doi = {10.1145/3115432}, pages = {20}, year = {2018}, abstract = {Generating a novel and descriptive caption of an image is drawing increasing interests in computer vision, natural language processing, and multimedia communities. In this work, we propose an end-to-end trainable deep bidirectional LSTM (Bi-LSTM (Long Short-Term Memory)) model to address the problem. By combining a deep convolutional neural network (CNN) and two separate LSTM networks, our model is capable of learning long-term visual-language interactions by making use of history and future context information at high-level semantic space. We also explore deep multimodal bidirectional models, in which we increase the depth of nonlinearity transition in different ways to learn hierarchical visual-language embeddings. Data augmentation techniques such as multi-crop, multi-scale, and vertical mirror are proposed to prevent over-fitting in training deep models. To understand how our models "translate" image to sentence, we visualize and qualitatively analyze the evolution of Bi-LSTM internal states over time. The effectiveness and generality of proposed models are evaluated on four benchmark datasets: Flickr8K, Flickr30K, MSCOCO, and Pascal1K datasets. We demonstrate that Bi-LSTM models achieve highly competitive performance on both caption generation and image-sentence retrieval even without integrating an additional mechanism (e.g., object detection, attention model). Our experiments also prove that multi-task learning is beneficial to increase model generality and gain performance. We also demonstrate the performance of transfer learning of the Bi-LSTM model significantly outperforms previous methods on the Pascal1K dataset.}, language = {en} } @article{ShilonKrausBuecheleetal.2018, author = {Shilon, I. and Kraus, M. and B{\"u}chele, M. and Egberts, Kathrin and Fischer, Tobias and Holch, Tim Lukas and Lohse, T. and Schwanke, U. and Steppa, Constantin Beverly and Funk, Stefan}, title = {Application of deep learning methods to analysis of imaging atmospheric Cherenkov telescopes data}, series = {Astroparticle physics}, volume = {105}, journal = {Astroparticle physics}, publisher = {Elsevier}, address = {Amsterdam}, issn = {0927-6505}, doi = {10.1016/j.astropartphys.2018.10.003}, pages = {44 -- 53}, year = {2018}, abstract = {Ground based gamma-ray observations with Imaging Atmospheric Cherenkov Telescopes (IACTs) play a significant role in the discovery of very high energy (E > 100 GeV) gamma-ray emitters. The analysis of IACT data demands a highly efficient background rejection technique, as well as methods to accurately determine the position of its source in the sky and the energy of the recorded gamma-ray. We present results for background rejection and signal direction reconstruction from first studies of a novel data analysis scheme for IACT measurements. The new analysis is based on a set of Convolutional Neural Networks (CNNs) applied to images from the four H.E.S.S. phase-I telescopes. As the H.E.S.S. cameras pixels are arranged in a hexagonal array, we demonstrate two ways to use such image data to train CNNs: by resampling the images to a square grid and by applying modified convolution kernels that conserve the hexagonal grid properties. The networks were trained on sets of Monte-Carlo simulated events and tested on both simulations and measured data from the H.E.S.S. array. A comparison between the CNN analysis to current state-of-the-art algorithms reveals a clear improvement in background rejection performance. When applied to H.E.S.S. observation data, the CNN direction reconstruction performs at a similar level as traditional methods. These results serve as a proof-of-concept for the application of CNNs to the analysis of events recorded by IACTs. (C) 2018 Published by Elsevier B.V.}, language = {en} }