@misc{BinTareafBergerHennigetal.2019, author = {Bin Tareaf, Raad and Berger, Philipp and Hennig, Patrick and Meinel, Christoph}, title = {Personality exploration system for online social networks}, series = {2018 IEEE/WIC/ACM International Conference on Web Intelligence (WI)}, journal = {2018 IEEE/WIC/ACM International Conference on Web Intelligence (WI)}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-7325-6}, doi = {10.1109/WI.2018.00-76}, pages = {301 -- 309}, year = {2019}, abstract = {User-generated content on social media platforms is a rich source of latent information about individual variables. Crawling and analyzing this content provides a new approach for enterprises to personalize services and put forward product recommendations. In the past few years, brands made a gradual appearance on social media platforms for advertisement, customers support and public relation purposes and by now it became a necessity throughout all branches. This online identity can be represented as a brand personality that reflects how a brand is perceived by its customers. We exploited recent research in text analysis and personality detection to build an automatic brand personality prediction model on top of the (Five-Factor Model) and (Linguistic Inquiry and Word Count) features extracted from publicly available benchmarks. The proposed model reported significant accuracy in predicting specific personality traits form brands. For evaluating our prediction results on actual brands, we crawled the Facebook API for 100k posts from the most valuable brands' pages in the USA and we visualize exemplars of comparison results and present suggestions for future directions.}, language = {en} } @misc{BinTareafBergerHennigetal.2018, author = {Bin Tareaf, Raad and Berger, Philipp and Hennig, Patrick and Meinel, Christoph}, title = {ASEDS}, series = {IEEE 20th International Conference on High Performance Computing and Communications; IEEE 16th International Conference on Smart City; IEEE 4th International Conference on Data Science and Systems (HPCC/SmartCity/DSS))}, journal = {IEEE 20th International Conference on High Performance Computing and Communications; IEEE 16th International Conference on Smart City; IEEE 4th International Conference on Data Science and Systems (HPCC/SmartCity/DSS))}, publisher = {IEEE}, address = {New York}, isbn = {978-1-5386-6614-2}, doi = {10.1109/HPCC/SmartCity/DSS.2018.00143}, pages = {860 -- 866}, year = {2018}, abstract = {The Massive adoption of social media has provided new ways for individuals to express their opinion and emotion online. In 2016, Facebook introduced a new reactions feature that allows users to express their psychological emotions regarding published contents using so-called Facebook reactions. In this paper, a framework for predicting the distribution of Facebook post reactions is presented. For this purpose, we collected an enormous amount of Facebook posts associated with their reactions labels using the proposed scalable Facebook crawler. The training process utilizes 3 million labeled posts for more than 64,000 unique Facebook pages from diverse categories. The evaluation on standard benchmarks using the proposed features shows promising results compared to previous research. The final model is able to predict the reaction distribution on Facebook posts with a recall score of 0.90 for "Joy" emotion.}, language = {en} } @phdthesis{Stojanovic2021, author = {Stojanovic, Vladeta}, title = {Digital twins for indoor built environments}, doi = {10.25932/publishup-50913}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-509134}, school = {Universit{\"a}t Potsdam}, pages = {xxiii, 181}, year = {2021}, abstract = {One of the key challenges in modern Facility Management (FM) is to digitally reflect the current state of the built environment, referred to as-is or as-built versus as-designed representation. While the use of Building Information Modeling (BIM) can address the issue of digital representation, the generation and maintenance of BIM data requires a considerable amount of manual work and domain expertise. Another key challenge is being able to monitor the current state of the built environment, which is used to provide feedback and enhance decision making. The need for an integrated solution for all data associated with the operational life cycle of a building is becoming more pronounced as practices from Industry 4.0 are currently being evaluated and adopted for FM use. This research presents an approach for digital representation of indoor environments in their current state within the life cycle of a given building. Such an approach requires the fusion of various sources of digital data. The key to solving such a complex issue of digital data integration, processing and representation is with the use of a Digital Twin (DT). A DT is a digital duplicate of the physical environment, states, and processes. A DT fuses as-designed and as-built digital representations of built environment with as-is data, typically in the form of floorplans, point clouds and BIMs, with additional information layers pertaining to the current and predicted states of an indoor environment or a complete building (e.g., sensor data). The design, implementation and initial testing of prototypical DT software services for indoor environments is presented and described. These DT software services are implemented within a service-oriented paradigm, and their feasibility is presented through functioning and tested key software components within prototypical Service-Oriented System (SOS) implementations. The main outcome of this research shows that key data related to the built environment can be semantically enriched and combined to enable digital representations of indoor environments, based on the concept of a DT. Furthermore, the outcomes of this research show that digital data, related to FM and Architecture, Construction, Engineering, Owner and Occupant (AECOO) activity, can be combined, analyzed and visualized in real-time using a service-oriented approach. This has great potential to benefit decision making related to Operation and Maintenance (O\&M) procedures within the scope of the post-construction life cycle stages of typical office buildings.}, language = {en} } @phdthesis{Zieger2017, author = {Zieger, Tobias}, title = {Self-adaptive data quality}, url = {http://nbn-resolving.de/urn:nbn:de:kobv:517-opus4-410573}, school = {Universit{\"a}t Potsdam}, pages = {vii, 125}, year = {2017}, abstract = {Carrying out business processes successfully is closely linked to the quality of the data inventory in an organization. Lacks in data quality lead to problems: Incorrect address data prevents (timely) shipments to customers. Erroneous orders lead to returns and thus to unnecessary effort. Wrong pricing forces companies to miss out on revenues or to impair customer satisfaction. If orders or customer records cannot be retrieved, complaint management takes longer. Due to erroneous inventories, too few or too much supplies might be reordered. A special problem with data quality and the reason for many of the issues mentioned above are duplicates in databases. Duplicates are different representations of same real-world objects in a dataset. However, these representations differ from each other and are for that reason hard to match by a computer. Moreover, the number of required comparisons to find those duplicates grows with the square of the dataset size. To cleanse the data, these duplicates must be detected and removed. Duplicate detection is a very laborious process. To achieve satisfactory results, appropriate software must be created and configured (similarity measures, partitioning keys, thresholds, etc.). Both requires much manual effort and experience. This thesis addresses automation of parameter selection for duplicate detection and presents several novel approaches that eliminate the need for human experience in parts of the duplicate detection process. A pre-processing step is introduced that analyzes the datasets in question and classifies their attributes semantically. Not only do these annotations help understanding the respective datasets, but they also facilitate subsequent steps, for example, by selecting appropriate similarity measures or normalizing the data upfront. This approach works without schema information. Following that, we show a partitioning technique that strongly reduces the number of pair comparisons for the duplicate detection process. The approach automatically finds particularly suitable partitioning keys that simultaneously allow for effective and efficient duplicate retrieval. By means of a user study, we demonstrate that this technique finds partitioning keys that outperform expert suggestions and additionally does not need manual configuration. Furthermore, this approach can be applied independently of the attribute types. To measure the success of a duplicate detection process and to execute the described partitioning approach, a gold standard is required that provides information about the actual duplicates in a training dataset. This thesis presents a technique that uses existing duplicate detection results and crowdsourcing to create a near gold standard that can be used for the purposes above. Another part of the thesis describes and evaluates strategies how to reduce these crowdsourcing costs and to achieve a consensus with less effort.}, language = {en} }