@incollection{, author = {Huang, Hai; Kuhn, Andreas; Michelini, Mario; Schmitz, Matthias; Mayer, Helmut}, title = {3D Urban Scene Reconstruction and Interpretation from Multisensor Imagery}, editor = {Ying Yang, Michael; Rosenhahn, Bodo; Murino, Vittorio}, booktitle = {Mulrimodal Scene Understanding : Algorithms, Applications and Deep Learning}, series = {}, journal = {}, address = {London}, publisher = {Academic Press}, edition = {}, year = {2019}, isbn = {978-0-12-817358-9 ; 978-0-12-817359-6}, volume = {}, number = {}, pages = {307 - 340}, url = {https://doi.org/10.1016/B978-0-12-817358-9.00016-0}, doi = {10.1016/C2018-0-01791-0}, keywords = {}, abstract = {We present an approach for 3D urban scene reconstruction and interpretation based on the fusion of terrestrial and unmanned aerial vehicle (UAV) imagery. The terrestrial close range images acquired with high-resolution cameras reveal details of buildings, particularly of the facades. Yet, they have a poor coverage of the roofs and the ground due to the obtuse viewing angle. Thus, they are complemented by UAV imagery taken from larger distances using nadir and oblique views, with a clear view on the ground and the roofs. The resulting wide-baseline images are fused by a precise and reliable pose estimation approach and dense 3D point clouds including color are reconstructed. The colored 3D points are the input to semantic scene classification, which effectively fuses color and 3D geometric information. A set of “relative” features is proposed to provide an intra-class stable as well as inter-class discriminative description of objects. In comparison with the conventional “absolute” attributes, relative features provide context-sensitive measurements of both color and 3D geometry. The classification results for buildings are input to an automatic pipeline for level of detail 2 (LOD2) building model reconstruction combining a reliable scene as well as building decomposition with a subsequent primitive-based reconstruction and assembly. Finally, LoD3 models are obtained by integrating the results of facade image interpretation with an adapted convolutional neural network, which employs the 3D point cloud as well as the terrestrial images.}, note = {}, institution = {Universität der Bundeswehr München, Fakultät für Informatik, INF 4 - Institut für Angewandte Informatik, Professur: Mayer, Helmut}, }