@article{, author = {Demmel, Markus; Göbel, Thomas; Gonçalves, Patrik; Baier, Harald}, title = {Data Synthesis is Going Mobile – On Community-driven Dataset Generation for Android Devices}, editor = {}, booktitle = {}, series = {}, journal = {Digital Threats: Research and Practice }, address = {}, publisher = {}, edition = {}, year = {2024}, isbn = {}, volume = {}, number = {}, pages = {}, url = {}, doi = {10.1145/3688807}, keywords = {Mobile forensics ; Android image ; Forensic dataset ; Digital corpora ; Data synthesis ; Data generation ; Data synthesis framework ; UI testing ; Android Emulator ; User simulation ; Human interaction}, abstract = {Personal electronic devices such as smartphones and smartwatches have become indispensable daily companions, collecting a multitude of personal and sensitive data. As a result, they are of paramount importance in digital forensic examinations. However, there is a lack of publicly available and ready-to-use digital forensic datasets, especially in mobile forensics. This work presents a concept and an open-source proof-of-concept implementation, which simplifies and automates the creation of mobile forensic datasets within the scope of the Android operating system. In contrast to previous approaches, which populate the most common databases of an Android device, our concept is based on community-driven playbooks and makes use of interaction with the actual smartphone GUI. Hence, we are able to generate coherent and realistic traces as they occur in real-world human usage. Our proof-of-concept implementation is based on the standard Android emulation environment and borrows tools from the user interface testing community. Our evaluation shows that our approach actually generates realistic Android datasets. For instance, we can generate traces that cannot be simulated by gestures (e.g., changing the GPS position or triggering incoming phone calls). Recording the actual data synthesis process allows users to either create and share their own playbooks (i.e., the exact instructions for the data synthesis process rather than having to share the full image) or reproduce Android images with different scenarios using playbooks previously created and shared by the community.}, note = {}, institution = {Universität der Bundeswehr München, Fakultät für Informatik, INF 6 - Institut für Systemsicherheit, Professur: Baier, Harald }, }