@inproceedings{44f57489b2334b46bed5d3740d760e9f,
title = "Estimating types in binaries using predictive modeling",
abstract = "Reverse engineering is an important tool in mitigating vulnerabilities in binaries. As a lot of software is developed in object-oriented languages, reverse engineering of object-oriented code is of critical importance. One of the major hurdles in reverse engineering binaries compiled from object-oriented code is the use of dynamic dispatch. In the absence of debug information, any dynamic dispatch may seem to jump to many possible targets, posing a significant challenge to a reverse engineer trying to track the program flow. We present a novel technique that allows us to statically determine the likely targets of virtual function calls. Our technique uses object tracelets - statically constructed sequences of operations performed on an object - to capture potential runtime behaviors of the object. Our analysis automatically pre-labels some of the object tracelets by relying on instances where the type of an object is known. The resulting type-labeled tracelets are then used to train a statistical language model (SLM) for each type.We then use the resulting ensemble of SLMs over unlabeled tracelets to generate a ranking of their most likely types, from which we deduce the likely targets of dynamic dispatches.We have implemented our technique and evaluated it over real-world C++ binaries. Our evaluation shows that when there are multiple alternative targets, our approach can drastically reduce the number of targets that have to be considered by a reverse engineer.",
keywords = "Reverse engineering, Static binary analysis, X86",
author = "Omer Katz and Ran El-Yaniv and Eran Yahav",
note = "Publisher Copyright: {\textcopyright} 2016 ACM.; 43rd Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, POPL 2016 ; Conference date: 20-01-2016 Through 22-01-2016",
year = "2016",
month = jan,
day = "11",
doi = "10.1145/2837614.2837674",
language = "الإنجليزيّة",
series = "Conference Record of the Annual ACM Symposium on Principles of Programming Languages",
pages = "313--326",
editor = "Rupak Majumdar and Rastislav Bodik",
booktitle = "POPL 2016 - Proceedings of the 43rd Annual ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages",
}