@inproceedings{, author = {Dollmann, Markus; Geierhos, Michaela}, title = {On- and Off-Topic Classification and Semantic Annotation of User-Generated Software Requirements}, editor = {}, booktitle = {Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing (EMNLP)}, series = {}, journal = {}, address = {}, publisher = {Association for Computational Linguistics (ACL)}, edition = {}, year = {2016}, isbn = {}, volume = {}, number = {}, pages = {1807-1816}, url = {https://www.aclweb.org/anthology/D16-1186}, doi = {10.18653/v1/D16-1186}, keywords = {}, abstract = {Users prefer natural language software requirements because of their usability and accessibility. When they describe their wishes for software development, they often provide off-topic information. We therefore present an automated approach for identifying and semantically annotating the on-topic parts of the given descriptions. It is designed to support requirement engineers in the requirement elicitation process on detecting and analyzing requirements in user-generated content. Since no lexical resources with domain-specific information about requirements are available, we created a corpus of requirements written in controlled language by instructed users and uncontrolled language by uninstructed users. We annotated these requirements regarding predicate-argument structures, conditions, priorities, motivations and semantic roles and used this information to train classifiers for information extraction purposes. The approach achieves an accuracy of 92% for the on- and off-topic classification task and an F1-measure of 72% for the semantic annotation.}, note = {}, }