This post belong to the series "Reversing C++ binaries". First post here.
The goal of this episode is to understand the lifecycle of object, when using
global, local and dynamic allocated instances. In the end, the field memory structure of simple object is anlyzed.
As a first example, we take into account the following class:
class TestClass
{
public:
// _ZN9TestClassC1Ev
TestClass()
{
stuff = 1;
}
// _ZN9TestClassD1Ev
~TestClass()
{
stuff = 0;
}
// _ZN9TestClass8GetStuffEv
int GetStuff()
{
return stuff;
}
private:
int stuff;
};
A common approach when compiling a class is to create global functions for constructors, destructors and methods and allocate only enough space for fields. Each global function operates on this, which is a pointer to the object. Basing on the compiler, the reference can be provided into a register or onto the stack. I'm currently using g++, which uses the stack to pass this as first parameter.
Local Objects
The following function initialize a class onto the stack and calls the method GetStuff:// _Z7OnStackv
int OnStack()
{
TestClass t1;
return t1.GetStuff();
}
804862d: push %ebp
804862e: mov %esp,%ebp
8048630: push %ebx
8048631: lea -0x1034(%esp),%esp
8048638: orl $0x0,(%esp)
804863c: lea 0x1010(%esp),%esp
8048643: lea -0xc(%ebp),%eax ; <- this
8048646: mov %eax,(%esp) ; as first parameter
8048649: call 804876e <_ZN9TestClassC1Ev> ; Constructor
804864e: lea -0xc(%ebp),%eax ; <- this
8048651: mov %eax,(%esp) ; as first parameter
8048654: call 80487ae <_ZN9TestClass8GetStuffEv> ; Method
8048659: mov %eax,%ebx
804865b: lea -0xc(%ebp),%eax ; <- this
804865e: mov %eax,(%esp) ; as first parameter
8048661: call 804878e <_ZN9TestClassD1Ev> ; Destructor on exit
8048666: mov %ebx,%eax
8048668: add $0x24,%esp
804866b: pop %ebx
804866c: pop %ebp
804866d: ret
Dynamic Allocated Object
An heap based initialization implies the use of the new and delete operator which respectively trigger the class constructor and destructor. The new operator, which resides in a shared library, performs a malloc, while the delete operator performs a free.// _Z6OnHeapv
int OnHeap()
{
// new: _Znwj@plt
// con: _ZN9TestClassC1Ev
TestClass* t1 = new TestClass();
int ret = t1->GetStuff();
// des: _ZN9TestClassD1Ev
// delete: _ZdlPv@plt
delete t1;
return ret;
}
804866e: push %ebp 804866f: mov %esp,%ebp 8048671: push %ebx 8048672: lea -0x1034(%esp),%esp 8048679: orl $0x0,(%esp) 804867d: lea 0x1010(%esp),%esp 8048684: movl $0x4,(%esp) ; the size to allocate 804868b: call 80484e0 <_Znwj@plt> ; the new operator 8048690: mov %eax,%ebx ; return the 'this' reference 8048692: mov %ebx,(%esp) 8048695: call 804876e <_ZN9TestClassC1Ev> 804869a: mov %ebx,-0xc(%ebp) 804869d: mov -0xc(%ebp),%eax 80486a0: mov %eax,(%esp) 80486a3: call 80487ae <_ZN9TestClass8GetStuffEv> 80486a8: mov %eax,-0x10(%ebp) 80486ab: mov -0xc(%ebp),%ebx 80486ae: test %ebx,%ebx ; If the reference is null 80486b0: je 80486c2 <_Z6OnHeapv+0x54> ; ...return 80486b2: mov %ebx,(%esp) 80486b5: call 804878e <_ZN9TestClassD1Ev> 80486ba: mov %ebx,(%esp) ; Value of this as first parameter 80486bd: call 80484a0 <_ZdlPv@plt> ; of the delete operator 80486c2: mov -0x10(%ebp),%eax 80486c5: add $0x24,%esp 80486c8: pop %ebx 80486c9: pop %ebp 80486ca: ret
Global Objects
Both static and global objects are initialized before the main is called,by an initialization routine. The same routine saves the new references and
the destructor address into a global table, where they will be destroyed by
the __run_exit_handlers routine, executed after the main function.
TestClass tg;
static TestClass ts;
int GlobalTc()
{
return tg.GetStuff() + ts.GetStuff();
}
08048793 <_Z41__static_initialization_and_destruction_0ii>: ... 80487e3: movl $0x804a00c,(%esp) 80487ea: call 8048860 <_ZN9TestClassC1Ev> ; Global initialization 80487ef: movl $0x804a004,0x8(%esp) ... 804880b: movl $0x804a014,(%esp) 8048812: call 8048860 <_ZN9TestClassC1Ev> ; Static initialization ... 8048834: ret
Memory Layout
For a better understanding of memory layour, let's use a class with a few more fields.class WithFieldsClass
{
public:
// _ZN15WithFieldsClassC1Ev
WithFieldsClass()
{
publicS1 = 1;
publicS2 = &protectedS1;
protectedS1 = 'c';
protectedS2 = 2;
privateS1 = &publicS1;
privateS2 = 'd';
}
int publicS1;
char* publicS2;
protected:
char protectedS1;
int protectedS2;
private:
int* privateS1;
char privateS2;
};
The constructor is the function to analyze to understand which fields are present into the object and maybe also their type.
08048b02 <_ZN15WithFieldsClassC1Ev>: 8048b02: push %ebp 8048b03: mov %esp,%ebp 8048b05: lea -0x1010(%esp),%esp 8048b0c: orl $0x0,(%esp) 8048b10: lea 0x1010(%esp),%esp 8048b17: mov 0x8(%ebp),%eax 8048b1a: movl $0x1,(%eax) ; this->publicS1 = 1 8048b20: mov 0x8(%ebp),%eax 8048b23: lea 0x8(%eax),%edx 8048b26: mov 0x8(%ebp),%eax 8048b29: mov %edx,0x4(%eax) ; this->publicS2 = &protectedS1 8048b2c: mov 0x8(%ebp),%eax 8048b2f: movb $0x63,0x8(%eax) ; this->protectedS1 = 'c' 8048b33: mov 0x8(%ebp),%eax 8048b36: movl $0x2,0xc(%eax) ; this->protectedD2 = 2 8048b3d: mov 0x8(%ebp),%edx 8048b40: mov 0x8(%ebp),%eax 8048b43: mov %edx,0x10(%eax) ; this->privateS1 = &publicS1 8048b46: mov 0x8(%ebp),%eax 8048b49: movb $0x64,0x14(%eax) ; this->privateS2 = 'd' 8048b4d: pop %ebp 8048b4e: ret
From this analysis, we can infer that the memory layout is sequential:
this+0x00: publicS1 this+0x04: publicS2 this+0x08: protectedS1 this+0x0c: protectedS2 this+0x10: privateS1 this+0x14: privateS2