Friday, October 17, 2014

Reversing C++ binaries 2: Objects lifecycle and structure

This post belong to the series "Reversing C++ binaries". First post here.

The goal of this episode is to understand the lifecycle of object, when using
global, local and dynamic allocated instances. In the end, the field memory structure of simple object is anlyzed.

As a first example, we take into account the following class:

class TestClass
{
  public:
  // _ZN9TestClassC1Ev
  TestClass()
  {
    stuff = 1;
  }

  // _ZN9TestClassD1Ev
  ~TestClass()
  {
    stuff = 0;
  }

  // _ZN9TestClass8GetStuffEv
  int GetStuff()
  {
    return stuff;
  }

  private:
  int stuff;
};

A common approach when compiling a class is to create global functions for constructors, destructors and methods and allocate only enough space for fields. Each global function operates on this, which is a pointer to the object. Basing on the compiler, the reference can be provided into a register or onto the stack. I'm currently using g++, which uses the stack to pass this as first parameter.

Local Objects

The following function initialize a class onto the stack and calls the method GetStuff:

// _Z7OnStackv
int OnStack()
{
  TestClass t1;
  return t1.GetStuff();
}

804862d: push   %ebp
804862e: mov    %esp,%ebp
8048630: push   %ebx
8048631: lea    -0x1034(%esp),%esp
8048638: orl    $0x0,(%esp)
804863c: lea    0x1010(%esp),%esp
8048643: lea    -0xc(%ebp),%eax  ; <- this
8048646: mov    %eax,(%esp)      ;    as first parameter
8048649: call   804876e <_ZN9TestClassC1Ev> ; Constructor
804864e: lea    -0xc(%ebp),%eax  ; <- this
8048651: mov    %eax,(%esp)      ;    as first parameter
8048654: call   80487ae <_ZN9TestClass8GetStuffEv> ; Method
8048659: mov    %eax,%ebx
804865b: lea    -0xc(%ebp),%eax  ; <- this
804865e: mov    %eax,(%esp)      ;    as first parameter
8048661: call   804878e <_ZN9TestClassD1Ev> ; Destructor on exit
8048666: mov    %ebx,%eax
8048668: add    $0x24,%esp
804866b: pop    %ebx
804866c: pop    %ebp
804866d: ret

Dynamic Allocated Object

An heap based initialization implies the use of the new and delete operator which respectively trigger the class constructor and destructor. The new operator, which resides in a shared library, performs a malloc, while the delete operator performs a free.

// _Z6OnHeapv
int OnHeap()
{
  // new: _Znwj@plt
  // con: _ZN9TestClassC1Ev
  TestClass* t1 = new TestClass();

  int ret = t1->GetStuff();

  // des: _ZN9TestClassD1Ev
  // delete: _ZdlPv@plt
  delete t1;
  return ret;
}

804866e: push   %ebp
804866f: mov    %esp,%ebp
8048671: push   %ebx
8048672: lea    -0x1034(%esp),%esp
8048679: orl    $0x0,(%esp)
804867d: lea    0x1010(%esp),%esp
8048684: movl   $0x4,(%esp)         ; the size to allocate
804868b: call   80484e0 <_Znwj@plt> ; the new operator
8048690: mov    %eax,%ebx           ; return the 'this' reference
8048692: mov    %ebx,(%esp)
8048695: call   804876e <_ZN9TestClassC1Ev>
804869a: mov    %ebx,-0xc(%ebp)
804869d: mov    -0xc(%ebp),%eax
80486a0: mov    %eax,(%esp)
80486a3: call   80487ae <_ZN9TestClass8GetStuffEv>
80486a8: mov    %eax,-0x10(%ebp)
80486ab: mov    -0xc(%ebp),%ebx
80486ae: test   %ebx,%ebx                 ; If the reference is null
80486b0: je     80486c2 <_Z6OnHeapv+0x54> ; ...return
80486b2: mov    %ebx,(%esp)
80486b5: call   804878e <_ZN9TestClassD1Ev>
80486ba: mov    %ebx,(%esp)           ; Value of this as first parameter
80486bd: call   80484a0 <_ZdlPv@plt>  ; of the delete operator
80486c2: mov    -0x10(%ebp),%eax
80486c5: add    $0x24,%esp
80486c8: pop    %ebx
80486c9: pop    %ebp
80486ca: ret

Global Objects

Both static and global objects are initialized before the main is called,
by an initialization routine. The same routine saves the new references and
the destructor address into a global table, where they will be destroyed by
the __run_exit_handlers routine, executed after the main function.

TestClass tg;
static TestClass ts;

int GlobalTc()
{
  return tg.GetStuff() + ts.GetStuff();
}

08048793 <_Z41__static_initialization_and_destruction_0ii>:
 ...
 80487e3: movl   $0x804a00c,(%esp)
 80487ea: call   8048860 <_ZN9TestClassC1Ev> ; Global initialization
 80487ef: movl   $0x804a004,0x8(%esp)
 ...
 804880b: movl   $0x804a014,(%esp)
 8048812: call   8048860 <_ZN9TestClassC1Ev> ; Static initialization
 ...
 8048834: ret

Memory Layout

For a better understanding of memory layour, let's use a class with a few more fields.

class WithFieldsClass
{
public:
  // _ZN15WithFieldsClassC1Ev
  WithFieldsClass()
  {
    publicS1 = 1;
    publicS2 = &protectedS1;
    protectedS1 = 'c';
    protectedS2 = 2;
    privateS1 = &publicS1;
    privateS2 = 'd';
  }

  int publicS1;
  char* publicS2;

protected:
  char protectedS1;
  int protectedS2;

private:
  int* privateS1;
  char privateS2;
};

The constructor is the function to analyze to understand which fields are present into the object and maybe also their type.

08048b02 <_ZN15WithFieldsClassC1Ev>:
 8048b02: push   %ebp
 8048b03: mov    %esp,%ebp
 8048b05: lea    -0x1010(%esp),%esp
 8048b0c: orl    $0x0,(%esp)
 8048b10: lea    0x1010(%esp),%esp
 8048b17: mov    0x8(%ebp),%eax
 8048b1a: movl   $0x1,(%eax)     ; this->publicS1 = 1
 8048b20: mov    0x8(%ebp),%eax
 8048b23: lea    0x8(%eax),%edx
 8048b26: mov    0x8(%ebp),%eax
 8048b29: mov    %edx,0x4(%eax)  ; this->publicS2 = &protectedS1
 8048b2c: mov    0x8(%ebp),%eax
 8048b2f: movb   $0x63,0x8(%eax) ; this->protectedS1 = 'c'
 8048b33: mov    0x8(%ebp),%eax
 8048b36: movl   $0x2,0xc(%eax)  ; this->protectedD2 = 2
 8048b3d: mov    0x8(%ebp),%edx
 8048b40: mov    0x8(%ebp),%eax
 8048b43: mov    %edx,0x10(%eax) ; this->privateS1 = &publicS1
 8048b46: mov    0x8(%ebp),%eax
 8048b49: movb   $0x64,0x14(%eax) ; this->privateS2 = 'd'
 8048b4d: pop    %ebp
 8048b4e: ret

From this analysis, we can infer that the memory layout is sequential:

  this+0x00: publicS1
  this+0x04: publicS2
  this+0x08: protectedS1
  this+0x0c: protectedS2
  this+0x10: privateS1
  this+0x14: privateS2

No comments: