Showing posts with label Reversing. Show all posts
Showing posts with label Reversing. Show all posts

Monday, October 20, 2014

Reversing C++ binaries 4: Inheritance


The previous tutorial was about virtual members. Now we can use that knowledge to analyze inerithance. We will use the usual TestClass, but now it extends BaseTestClass:

class BaseTestClass
{
public:
  // _ZN13BaseTestClassC1Ev
  BaseTestClass()
  {
    baseStuff = 1;
  }
  // _ZN13BaseTestClassD*Ev
  virtual ~BaseTestClass()
  {
    baseStuff = 0;
  }
  // _ZN13BaseTestClass8GetStuffEv
  virtual int GetStuff()
  {
    return baseStuff;
  }
protected:
  int baseStuff;
};
class TestClass : public BaseTestClass
{
public:
  // _ZN9TestClassC1Ev
  TestClass()
  {
    stuff = 2;
  }
  // _ZN9TestClassD*Ev
  virtual ~TestClass()
  {
    stuff = 0;
  }
  // _ZN9TestClass8GetStuffEv
  virtual int GetStuff()
  {
    return BaseTestClass::GetStuff() + stuff;
  }
  // _ZN9TestClass13AnotherMethodEv
  virtual int AnotherMethod()
  {
    return baseStuff;
  }
private:
  int stuff;
};
int DoIt(BaseTestClass* btc)
{
  return btc->GetStuff();
}
int main()
{
  BaseTestClass* t1 = new BaseTestClass();
  TestClass* t2 = new TestClass();
  int a = DoIt(t1) + DoIt(t2) + t2->AnotherMethod();
  delete t1;
  delete t2;
  return a;
}

This time we'll go directly to the constructors to analyze the virtual tables:

080487ee <_ZN13BaseTestClassC1Ev>:
 80487ee: push   %ebp
 80487ef: mov    %esp,%ebp
 80487f1: lea    -0x1010(%esp),%esp
 80487f8: orl    $0x0,(%esp)
 80487fc: lea    0x1010(%esp),%esp
 8048803: mov    0x8(%ebp),%eax    ; this pointer
 8048806: movl   $0x8048ad8,(%eax) ; vtable = 0x8048ad8
 804880c: mov    0x8(%ebp),%eax
 804880f: movl   $0x1,0x4(%eax)    ; baseStuff = 1
 8048816: pop    %ebp
 8048817: ret

080488a2 <_ZN9TestClassC1Ev>:
 80488a2: push   %ebp
 80488a3: mov    %esp,%ebp
 80488a5: lea    -0x1028(%esp),%esp
 80488ac: orl    $0x0,(%esp)
 80488b0: lea    0x1010(%esp),%esp
 80488b7: mov    0x8(%ebp),%eax
 80488ba: mov    %eax,(%esp)         ; this pointer
 80488bd: call   80487ee <_ZN13BaseTestClassC1Ev> ; Call to base constructor
 80488c2: mov    0x8(%ebp),%eax
 80488c5: movl   $0x8048ac0,(%eax) ; vtable = 0x8048ac0 (overwrite)
 80488cb: mov    0x8(%ebp),%eax
 80488ce: movl   $0x2,0x8(%eax) ; stuff = 2
 80488d5: leave
 80488d6: ret
 80488d7: nop

Let's review what happened. The first action done by the TestClass constructor is to call the base constructor, which sets the virtual table and the field baseStuff:

  this+0x0: 0x8048ad8 (BaseTestClass vtable)
  this+0x4: 1 (BaseTestClass::baseStuff)

Then, the vtable is overridden and the field stuff is inserted under baseStuff:

  this+0x0: 0x8048ac0 (TestClass vtable)
  this+0x4: 1 (BaseTestClass::baseStuff)
  this+0x8: 2 (TestClass::stuff)

Vtables are:

Contents of section .rodata:
 8048ab0 03000000 01000200 00000000 f08a0408  ................
 8048ac0 4c890408 96890408 c4890408 ee890408  L...............
 8048ad0 00000000 0c8b0408 8c880408 ca880408  ................
 8048ae0 f8880408 39546573 74436c61 73730000  ....9TestClass..
 8048af0 68a00408 e48a0408 0c8b0408 31334261  h...........13Ba
 8048b00 73655465 7374436c 61737300 28a00408  seTestClass.(...
 8048b10 fc8a0408                             ....            

// Base1:
 0x8048ad8: _ZN13Base1D1Ev
 0x8048adc: _ZN13Base1D0Ev
 0x8048ae0: _ZN13Base18GetStuffEv

// TestClass:
 0x8048ac0: _ZN9TestClassD1Ev
 0x8048ac4: _ZN9TestClassD0Ev
 0x8048ac8: _ZN9TestClass8GetStuffEv
 0x8048ac8: _ZN9TestClass13AnotherMethodEv

Multiple Inheritance

C++ supports multiple inheritance, so, let's see how it's implemented:

class Base2
{
public:
  // _ZN5Base2C1Ev
  Base2()
  {
    b2 = 1;
  }
  // _ZN5Base2D*Ev
  virtual ~Base2()
  {
    b2 = 0;
  }
  // _ZN5Base29GetStuff2Ev
  virtual int GetStuff2()
  {
    return b2;
  }
protected:
  int b2;

};

class Base1
{
public:
  // _ZN5Base1C1Ev
  Base1()
  {
    baseStuff = 1;
  }
  // _ZN5Base1D*Ev
  virtual ~Base1()
  {
    baseStuff = 0;
  }
  // _ZN5Base18GetStuffEv
  virtual int GetStuff()
  {
    return baseStuff;
  }
protected:
  int baseStuff;

};

class TestClass : public Base1, public Base2
{
public:
  // _ZN9TestClassC1Ev
  TestClass()
  {
    stuff = 2;
  }
  // _ZN9TestClassD*Ev
  // _ZThn8_N9TestClassD*Ev
  virtual ~TestClass()
  {
    stuff = 0;
  }
  // _ZN9TestClass9GetStuff2Ev
  // _ZThn8_N9TestClass9GetStuff2Ev
  virtual int GetStuff2()
  {
    return Base2::GetStuff2() + stuff;
  }
  // _ZN9TestClass8GetStuffEv
  virtual int GetStuff()
  {
    return Base1::GetStuff() + stuff;
  }

private:
  int stuff;
};

// _Z4DoItP5Base1P5Base2
int DoIt(Base1* bt1, Base2* bt2)
{
  return bt1->GetStuff() + bt2->GetStuff2();
}

int main()
{
  TestClass* t1 = new TestClass();
  return DoIt(t1, t1);
}

For each method, I wrote their mangled name as comment. As you may have notice, the method TestClass::GetStuff2() and TestClass::~TestClass() have two mangled names, that is, they are mapped into two different functions. The first is the usual method implementation, while the second moves the this reference and jumps to the right method:

08048a7b <_ZThn8_N9TestClass9GetStuff2Ev>:
 8048a7b: subl   $0x8,0x4(%esp)
 8048a80: jmp    8048a4e <_ZN9TestClass9GetStuff2Ev>

The reason is simple, but it will be more clear after we see the memory layout, so let's analyze the constructors:

080488ac <_ZN5Base1C1Ev>:
 80488ac: push   %ebp
 80488ad: mov    %esp,%ebp
 80488af: lea    -0x1010(%esp),%esp
 80488b6: orl    $0x0,(%esp)
 80488ba: lea    0x1010(%esp),%esp
 80488c1: mov    0x8(%ebp),%eax    ; this pointer
 80488c4: movl   $0x8048bd8,(%eax) ; Base1 vtable (0x8048bd8)
 80488ca: mov    0x8(%ebp),%eax
 80488cd: movl   $0x1,0x4(%eax)    ; baseStuff = 1
 80488d4: pop    %ebp
 80488d5: ret

080487f8 <_ZN5Base2C1Ev>:
 80487f8: push   %ebp
 80487f9: mov    %esp,%ebp
 80487fb: lea    -0x1010(%esp),%esp
 8048802: orl    $0x0,(%esp)
 8048806: lea    0x1010(%esp),%esp
 804880d: mov    0x8(%ebp),%eax    ; this pointer
 8048810: movl   $0x8048bf0,(%eax) ; Base2 vtable (0x8048bf0)
 8048816: mov    0x8(%ebp),%eax
 8048819: movl   $0x1,0x4(%eax)    ; b2 = 4
 8048820: pop    %ebp
 8048821: ret

08048982 <_ZN9TestClassC1Ev>:
 8048982: push   %ebp
 8048983: mov    %esp,%ebp
 8048985: lea    -0x1028(%esp),%esp
 804898c: orl    $0x0,(%esp)
 8048990: lea    0x1010(%esp),%esp
 8048997: mov    0x8(%ebp),%eax          ; this pointer
 804899a: mov    %eax,(%esp)
 804899d: call   80488ce <_ZN5Base1C1Ev> ; Base1 constructor
 80489a2: mov    0x8(%ebp),%eax
 80489a5: add    $0x8,%eax               ; this + 8
 80489a8: mov    %eax,(%esp)
 80489ab: call   8048838 <_ZN5Base2C1Ev> ; Base2 constructor
 80489b0: mov    0x8(%ebp),%eax
 80489b3: movl   $0x8048ba8,(%eax)       ; Base1 vtable overwrite (0x8048ba8)
 80489b9: mov    0x8(%ebp),%eax
 80489bc: movl   $0x8048bc0,0x8(%eax)    ; Base2 vtable overwrite (0x8048bc0)
 80489c3: mov    0x8(%ebp),%eax
 80489c6: movl   $0x2,0x10(%eax)         ; stuff = 2
 80489cd: leave
 80489ce: ret

Note the presence of two vtables. The memory layout is the following after calling the two base constructors:

  this+0x0: 0x8048bb8 (Base1 vtable)
  this+0x4: 1 (Base1::baseStuff)
  this+0x8: 0x8048bd0 (Base2 vtable)
  this+0xc: 4 (Base2::b2)

Then, the constructor overwrites both the vtable references and writes its variable:

  this+0x0: 0x8048b88 (Base1 vtable overwrite)
  this+0x4: 1 (Base1::baseStuff)
  this+0x8: 0x8048ba0 (Base2 vtable overwrite)
  this+0xc: 4 (Base2::b2)
  this+0x10: 2 (TestClass::stuff)

Vtables:

Contents of section .rodata:
 8048b80 03000000 01000200 00000000 00000000  ................
 8048b90 00000000 00000000 00000000 00000000  ................
 8048ba0 00000000 208c0408 ce890408 3a8a0408  .... .......:...
 8048bb0 a28a0408 6e8a0408 f8ffffff 208c0408  ....n....... ...
 8048bc0 2f8a0408 678a0408 9b8a0408 00000000  /...g...........
 8048bd0 00000000 488c0408 f6880408 34890408  ....H.......4...
 8048be0 62890408 00000000 00000000 588c0408  b...........X...
 8048bf0 42880408 80880408 ae880408 39546573  B...........9Tes
 8048c00 74436c61 73730000 00000000 00000000  tClass..........
 8048c10 00000000 00000000 00000000 00000000  ................
 8048c20 68b00408 fc8b0408 00000000 02000000  h...............
 8048c30 488c0408 02000000 588c0408 02080000  H.......X.......
 8048c40 35426173 65310000 28b00408 408c0408  5Base1..(...@...
 8048c50 35426173 65320000 28b00408 508c0408  5Base2..(...P...


// Base1:
 0x8048bd8: _ZN5Base1D1Ev
 0x8048bdc: _ZN5Base1D0Ev
 0x8048be0: _ZN5Base18GetStuffEv

// Base2:
 0x8048bf0: _ZN5Base2D1Ev
 0x8048bf4: _ZN5Base2D0Ev
 0x8048bf8: _ZN5Base29GetStuff2Ev

// TestClass vtable1:
 0x8048ba8: _ZN9TestClassD1Ev
 0x8048bac: _ZN9TestClassD0Ev
 0x8048bb0: _ZN9TestClass8GetStuffEv
// TestClass vtable2:
 0x8048bc0: _ZThn8_N9TestClassD1Ev
 0x8048bc4: _ZThn8_N9TestClassD0Ev
 0x8048bc8: _ZThn8_N9TestClass9GetStuff2Ev

By looking at the memory layout and the vtables, the reason of the double functions, such as _ZThn8_N9TestClass9GetStuff2Ev, and of the double vtable is obvious:

08048a7b <_ZThn8_N9TestClass9GetStuff2Ev>:
 8048a7b: subl   $0x8,0x4(%esp)
 8048a80: jmp    8048a4e <_ZN9TestClass9GetStuff2Ev>

The wrapper function move the offset of the this reference, so that the compiler can reuse the implementation of TestClass::GetStuff2. If you cast TestClass* to Base2*, the compiler moves the this reference so that it points to the second vtable. However, since the method is virtual and can be overridden, by calling Base2::GetStuff2, you are actually calling TestClass::GetStuff2But the this reference has been moved. The goal of the wrapper is to adjust that move, restoring this to the orginal location.

For instance, let's see the main and DoIt disassembly:

08048741 <main>:
 ...
 804877b: mov    0x1c(%esp),%eax ; TestClass* tc -> eax
 804877f: add    $0x8,%eax       ; Casting: this = this+0x8 (2nd vtable)
 ...
 8048789: mov    %eax,0x4(%esp)  ; Second parameter of DoIt
 804878d: mov    0x1c(%esp),%eax ; TestClass* tc -> eax
 8048791: mov    %eax,(%esp)     ; First parameter of DoIt
 8048794: call   80486fd <_Z4DoItP5Base1P5Base2>
 ...

080486fd <_Z4DoItP5Base1P5Base2>:
 ...
 8048713: mov    0x8(%ebp),%eax ; Base1* bt1 -> eax
 8048716: mov    (%eax),%eax
 8048718: add    $0x8,%eax
 804871b: mov    (%eax),%eax    ; GetStuff from vtable
 804871d: mov    0x8(%ebp),%edx
 8048720: mov    %edx,(%esp)
 8048723: call   *%eax
 8048725: mov    %eax,%ebx
 8048727: mov    0xc(%ebp),%eax ; Base2* bt2 -> eax
 804872a: mov    (%eax),%eax
 804872c: add    $0x8,%eax
 804872f: mov    (%eax),%eax    ; _ZThn8_N9TestClass9GetStuff2Ev from vtable
 8048731: mov    0xc(%ebp),%edx
 8048734: mov    %edx,(%esp)
 8048737: call   *%eax          ; Actual call to _ZThn8_N9TestClass9GetStuff2Ev
 ...

08048a9b <_ZThn8_N9TestClass9GetStuff2Ev>:
 8048a9b: subl   $0x8,0x4(%esp)  ; 'this' adjustment
 8048aa0: jmp    8048a6e <_ZN9TestClass9GetStuff2Ev>

Calling the wrapper, will cause this to point to the original TestClass pointer, so the function _ZN9TestClass9GetStuff2Ev can be called with consistent data.

Friday, October 17, 2014

Reversing C++ binaries 3: Virtual members

Now that we have a better understanding of how classes are compiled, we can analyze polymorphism. We can expect virtual members to be handled differently, because a class instance may have a different implementation: the compiler simply can't know at compile time which function to call.

I modified the old TestClass to include a virtual method:

class TestClass
{
  public:
  // _ZN9TestClassC1Ev
  TestClass()
  {
    stuff = 1;
  }

  // _ZN9TestClassD1Ev
  virtual ~TestClass()
  {
    stuff = 0;
  }

  // _ZN9TestClass8GetStuffEv
  virtual int GetStuff()
  {
    return stuff;
  }

  private:
  int stuff;
};

// _Z4DoItP9TestClass
int DoIt(TestClass* t1)
{
  return t1->GetStuff();
}

int main()
{
  TestClass* t1 = new TestClass();
  int r = DoIt(t1);
  delete t1;
  return r;
}

Let's look at the disassebly of the function DoIt:

0804869d <_Z4DoItP9TestClass>:
 804869d: push   %ebp
 804869e: mov    %esp,%ebp
 80486a0: lea    -0x1028(%esp),%esp
 80486a7: orl    $0x0,(%esp)
 80486ab: lea    0x1010(%esp),%esp
 80486b2: mov    0x8(%ebp),%eax ; t1 = ebp + 8
 80486b5: mov    (%eax),%eax    ; obj = *t1
 80486b7: add    $0x8,%eax      ;
 80486ba: mov    (%eax),%eax    ; GetStuff = obj[8]
 80486bc: mov    0x8(%ebp),%edx
 80486bf: mov    %edx,(%esp)
 80486c2: call   *%eax          ; GetStuff(this)
 80486c4: leave
 80486c5: ret

Unlike previous usages, the DoIt disassembly doesn't contain an explict call to GetStuff. Instead, there is a call to register eax, which is initialized by deferencing a field of TestClass. This particular field is not present into the C++ code, thus we must look at the disassembled constructor:

080487a6 <_ZN9TestClassC1Ev>:
 80487a6: push   %ebp
 80487a7: mov    %esp,%ebp
 80487a9: lea    -0x1010(%esp),%esp
 80487b0: orl    $0x0,(%esp)
 80487b4: lea    0x1010(%esp),%esp
 80487bb: mov    0x8(%ebp),%eax
 80487be: movl   $0x8048910,(%eax) ; this[0] = 0x8048910
 80487c4: mov    0x8(%ebp),%eax
 80487c7: movl   $0x1,0x4(%eax)    ; this->stuff = 1;
 80487ce: pop    %ebp
 80487cf: ret

The address 0x8048910 resides in the .rodata section and point to the Virtual Table of TestClass. The vtable contains references to all virtual methods present in TestClass:

Contents of section .rodata:
 8048900 03000000 01000200 00000000 28890408  ............(...
 8048910 d0870408 0e880408 3c880408 39546573  ........<...9Tes
 8048920 74436c61 73730000 28a00408 1c890408  tClass..(.......

// TestClass virtual table:

 0x8048910 + 0x00: 080487d0 ; _ZN9TestClassD1Ev (Complete Object destructor)
 0x8048910 + 0x04: 0804880e ; _ZN9TestClassD0Ev (Deleting destructor)
 0x8048910 + 0x08: 0804883c ; _ZN9TestClass8GetStuffEv (GetStuff)

To expain the difference between the two destruction, I quote the C++ ABI Itanium reference:

  1. Base object destructor of a class T: A function that runs the destructors for non-static data members of T and non-virtual direct base classes of T. Mangled with suffix D2.
  2. Complete object destructor of a class T: a function that, in addition to the actions required of a base object destructor, runs the destructors for the virtual base classes of T. Mangled with suffix D1.
  3. Deleting destructor of a class T: a function that, in addition to the actions required of a complete object destructor, calls the appropriate deallocation function (i.e,. operator delete) for T. Mangled with suffix D0.


Reversing C++ binaries 2: Objects lifecycle and structure

This post belong to the series "Reversing C++ binaries". First post here.

The goal of this episode is to understand the lifecycle of object, when using
global, local and dynamic allocated instances. In the end, the field memory structure of simple object is anlyzed.

As a first example, we take into account the following class:

class TestClass
{
  public:
  // _ZN9TestClassC1Ev
  TestClass()
  {
    stuff = 1;
  }

  // _ZN9TestClassD1Ev
  ~TestClass()
  {
    stuff = 0;
  }

  // _ZN9TestClass8GetStuffEv
  int GetStuff()
  {
    return stuff;
  }

  private:
  int stuff;
};

A common approach when compiling a class is to create global functions for constructors, destructors and methods and allocate only enough space for fields. Each global function operates on this, which is a pointer to the object. Basing on the compiler, the reference can be provided into a register or onto the stack. I'm currently using g++, which uses the stack to pass this as first parameter.

Local Objects

The following function initialize a class onto the stack and calls the method GetStuff:

// _Z7OnStackv
int OnStack()
{
  TestClass t1;
  return t1.GetStuff();
}

804862d: push   %ebp
804862e: mov    %esp,%ebp
8048630: push   %ebx
8048631: lea    -0x1034(%esp),%esp
8048638: orl    $0x0,(%esp)
804863c: lea    0x1010(%esp),%esp
8048643: lea    -0xc(%ebp),%eax  ; <- this
8048646: mov    %eax,(%esp)      ;    as first parameter
8048649: call   804876e <_ZN9TestClassC1Ev> ; Constructor
804864e: lea    -0xc(%ebp),%eax  ; <- this
8048651: mov    %eax,(%esp)      ;    as first parameter
8048654: call   80487ae <_ZN9TestClass8GetStuffEv> ; Method
8048659: mov    %eax,%ebx
804865b: lea    -0xc(%ebp),%eax  ; <- this
804865e: mov    %eax,(%esp)      ;    as first parameter
8048661: call   804878e <_ZN9TestClassD1Ev> ; Destructor on exit
8048666: mov    %ebx,%eax
8048668: add    $0x24,%esp
804866b: pop    %ebx
804866c: pop    %ebp
804866d: ret

Dynamic Allocated Object

An heap based initialization implies the use of the new and delete operator which respectively trigger the class constructor and destructor. The new operator, which resides in a shared library, performs a malloc, while the delete operator performs a free.

// _Z6OnHeapv
int OnHeap()
{
  // new: _Znwj@plt
  // con: _ZN9TestClassC1Ev
  TestClass* t1 = new TestClass();

  int ret = t1->GetStuff();

  // des: _ZN9TestClassD1Ev
  // delete: _ZdlPv@plt
  delete t1;
  return ret;
}

804866e: push   %ebp
804866f: mov    %esp,%ebp
8048671: push   %ebx
8048672: lea    -0x1034(%esp),%esp
8048679: orl    $0x0,(%esp)
804867d: lea    0x1010(%esp),%esp
8048684: movl   $0x4,(%esp)         ; the size to allocate
804868b: call   80484e0 <_Znwj@plt> ; the new operator
8048690: mov    %eax,%ebx           ; return the 'this' reference
8048692: mov    %ebx,(%esp)
8048695: call   804876e <_ZN9TestClassC1Ev>
804869a: mov    %ebx,-0xc(%ebp)
804869d: mov    -0xc(%ebp),%eax
80486a0: mov    %eax,(%esp)
80486a3: call   80487ae <_ZN9TestClass8GetStuffEv>
80486a8: mov    %eax,-0x10(%ebp)
80486ab: mov    -0xc(%ebp),%ebx
80486ae: test   %ebx,%ebx                 ; If the reference is null
80486b0: je     80486c2 <_Z6OnHeapv+0x54> ; ...return
80486b2: mov    %ebx,(%esp)
80486b5: call   804878e <_ZN9TestClassD1Ev>
80486ba: mov    %ebx,(%esp)           ; Value of this as first parameter
80486bd: call   80484a0 <_ZdlPv@plt>  ; of the delete operator
80486c2: mov    -0x10(%ebp),%eax
80486c5: add    $0x24,%esp
80486c8: pop    %ebx
80486c9: pop    %ebp
80486ca: ret

Global Objects

Both static and global objects are initialized before the main is called,
by an initialization routine. The same routine saves the new references and
the destructor address into a global table, where they will be destroyed by
the __run_exit_handlers routine, executed after the main function.

TestClass tg;
static TestClass ts;

int GlobalTc()
{
  return tg.GetStuff() + ts.GetStuff();
}

08048793 <_Z41__static_initialization_and_destruction_0ii>:
 ...
 80487e3: movl   $0x804a00c,(%esp)
 80487ea: call   8048860 <_ZN9TestClassC1Ev> ; Global initialization
 80487ef: movl   $0x804a004,0x8(%esp)
 ...
 804880b: movl   $0x804a014,(%esp)
 8048812: call   8048860 <_ZN9TestClassC1Ev> ; Static initialization
 ...
 8048834: ret

Memory Layout

For a better understanding of memory layour, let's use a class with a few more fields.

class WithFieldsClass
{
public:
  // _ZN15WithFieldsClassC1Ev
  WithFieldsClass()
  {
    publicS1 = 1;
    publicS2 = &protectedS1;
    protectedS1 = 'c';
    protectedS2 = 2;
    privateS1 = &publicS1;
    privateS2 = 'd';
  }

  int publicS1;
  char* publicS2;

protected:
  char protectedS1;
  int protectedS2;

private:
  int* privateS1;
  char privateS2;
};

The constructor is the function to analyze to understand which fields are present into the object and maybe also their type.

08048b02 <_ZN15WithFieldsClassC1Ev>:
 8048b02: push   %ebp
 8048b03: mov    %esp,%ebp
 8048b05: lea    -0x1010(%esp),%esp
 8048b0c: orl    $0x0,(%esp)
 8048b10: lea    0x1010(%esp),%esp
 8048b17: mov    0x8(%ebp),%eax
 8048b1a: movl   $0x1,(%eax)     ; this->publicS1 = 1
 8048b20: mov    0x8(%ebp),%eax
 8048b23: lea    0x8(%eax),%edx
 8048b26: mov    0x8(%ebp),%eax
 8048b29: mov    %edx,0x4(%eax)  ; this->publicS2 = &protectedS1
 8048b2c: mov    0x8(%ebp),%eax
 8048b2f: movb   $0x63,0x8(%eax) ; this->protectedS1 = 'c'
 8048b33: mov    0x8(%ebp),%eax
 8048b36: movl   $0x2,0xc(%eax)  ; this->protectedD2 = 2
 8048b3d: mov    0x8(%ebp),%edx
 8048b40: mov    0x8(%ebp),%eax
 8048b43: mov    %edx,0x10(%eax) ; this->privateS1 = &publicS1
 8048b46: mov    0x8(%ebp),%eax
 8048b49: movb   $0x64,0x14(%eax) ; this->privateS2 = 'd'
 8048b4d: pop    %ebp
 8048b4e: ret

From this analysis, we can infer that the memory layout is sequential:

  this+0x00: publicS1
  this+0x04: publicS2
  this+0x08: protectedS1
  this+0x0c: protectedS2
  this+0x10: privateS1
  this+0x14: privateS2

Wednesday, October 15, 2014

Reversing C++ binaries 1: name mangling and global/static functions

Binary reversing is an essential skill for malware analysis and solving wargames challenges. Program written in C are common and there are various tutorial about their reversing (calling conventions, dynamic libraries, stack, variables and so on). Once the assembly language is learned, it's just a matter of patience to reverse an application (anti-reversing techniques aside, of course).

However the assembly generated from C++ code is harder to analyze, due to object-oriented constructs. These tutorials aim to study how high-levels constructs, such as namespaces, operators, classes and their relationships, are converted into assembly code and how to reverse them when analyzing a binary.

PART 1: NAME MANGLING AND FUNCTIONS

First of all, functions memory addresses are renamed with a name suitable for the compiler and the linker. This process is called name mangling (see below for references).

namespace
{
// _ZN12_GLOBAL__N_17ScroogeEv
int Scrooge()
{
    return 5;
}
}

// _Z11GlobalPlutov
int GlobalPluto()
{
    return 4;
}

// _ZL11GoofyStaticv
static int GoofyStatic()
{
    return 3;
}

namespace Donald
{
// _ZN6Donald12GlobalDonaldEv
int GlobalDonald()
{
    return 1;
}
// _ZN6DonaldL12StaticDonaldEv
int StaticDonald()
{
    return 2;
}
}

Thus by reading the following code:

push   %ebp
mov    %esp,%ebp
lea    -0x1018(%esp),%esp
orl    $0x0,(%esp)
lea    0x1010(%esp),%esp
call   80487fd <_ZL11GoofyStaticv>
call   80487c1 <_Z11GlobalPlutov>
call   8048749 <_ZN6Donald12GlobalDonaldEv>
call   8048785 <_ZN6DonaldL12StaticDonaldEv>
call   804870d <_ZN12_GLOBAL__N_17ScroogeEv>
mov    $0x0,%eax
leave
ret

We can say that the function calls two function without namespace called GoofyStatic and GlobalPluto, two function inside the 'Donald' namespace and finally a function residing in the global namespace. Finally, GDB offers an automatic demangling utility:

gdb> set print asm-demangle on
gdb> disass main
Dump of assembler code for function main:
   0x0804873f <+0>: push   %ebp
   0x08048740 <+1>: mov    %esp,%ebp
   0x08048742 <+3>: and    $0xfffffff0,%esp
   0x08048745 <+6>: lea    -0x1010(%esp),%esp
   0x0804874c <+13>: orl    $0x0,(%esp)
   0x08048750 <+17>: lea    0x1010(%esp),%esp
   0x08048757 <+24>: call   0x8048659 <main2()>
   0x0804875c <+29>: call   0x80485e9 <Donald::GlobalDonald()>
   0x08048761 <+34>: call   0x8048707 <_ZN6DonaldL12StaticDonaldEv>
   0x08048766 <+39>: call   0x8048621 <GlobalPluto()>
   0x0804876b <+44>: call   0x8048723 <_ZL11GoofyStaticv>
   0x08048770 <+49>: mov    $0x0,%eax
   0x08048775 <+54>: leave
   0x08048776 <+55>: ret

References:
http://www.int0x80.gr/papers/name_mangling.pdf
http://www.ofb.net/gnu/gcc/gxxint_15.html
http://en.wikipedia.org/wiki/Name_mangling#Name_mangling_in_C.2B.2B
http://stackoverflow.com/a/1962381