Monday, October 20, 2014

Reversing C++ binaries 4: Inheritance


The previous tutorial was about virtual members. Now we can use that knowledge to analyze inerithance. We will use the usual TestClass, but now it extends BaseTestClass:

class BaseTestClass
{
public:
  // _ZN13BaseTestClassC1Ev
  BaseTestClass()
  {
    baseStuff = 1;
  }
  // _ZN13BaseTestClassD*Ev
  virtual ~BaseTestClass()
  {
    baseStuff = 0;
  }
  // _ZN13BaseTestClass8GetStuffEv
  virtual int GetStuff()
  {
    return baseStuff;
  }
protected:
  int baseStuff;
};
class TestClass : public BaseTestClass
{
public:
  // _ZN9TestClassC1Ev
  TestClass()
  {
    stuff = 2;
  }
  // _ZN9TestClassD*Ev
  virtual ~TestClass()
  {
    stuff = 0;
  }
  // _ZN9TestClass8GetStuffEv
  virtual int GetStuff()
  {
    return BaseTestClass::GetStuff() + stuff;
  }
  // _ZN9TestClass13AnotherMethodEv
  virtual int AnotherMethod()
  {
    return baseStuff;
  }
private:
  int stuff;
};
int DoIt(BaseTestClass* btc)
{
  return btc->GetStuff();
}
int main()
{
  BaseTestClass* t1 = new BaseTestClass();
  TestClass* t2 = new TestClass();
  int a = DoIt(t1) + DoIt(t2) + t2->AnotherMethod();
  delete t1;
  delete t2;
  return a;
}

This time we'll go directly to the constructors to analyze the virtual tables:

080487ee <_ZN13BaseTestClassC1Ev>:
 80487ee: push   %ebp
 80487ef: mov    %esp,%ebp
 80487f1: lea    -0x1010(%esp),%esp
 80487f8: orl    $0x0,(%esp)
 80487fc: lea    0x1010(%esp),%esp
 8048803: mov    0x8(%ebp),%eax    ; this pointer
 8048806: movl   $0x8048ad8,(%eax) ; vtable = 0x8048ad8
 804880c: mov    0x8(%ebp),%eax
 804880f: movl   $0x1,0x4(%eax)    ; baseStuff = 1
 8048816: pop    %ebp
 8048817: ret

080488a2 <_ZN9TestClassC1Ev>:
 80488a2: push   %ebp
 80488a3: mov    %esp,%ebp
 80488a5: lea    -0x1028(%esp),%esp
 80488ac: orl    $0x0,(%esp)
 80488b0: lea    0x1010(%esp),%esp
 80488b7: mov    0x8(%ebp),%eax
 80488ba: mov    %eax,(%esp)         ; this pointer
 80488bd: call   80487ee <_ZN13BaseTestClassC1Ev> ; Call to base constructor
 80488c2: mov    0x8(%ebp),%eax
 80488c5: movl   $0x8048ac0,(%eax) ; vtable = 0x8048ac0 (overwrite)
 80488cb: mov    0x8(%ebp),%eax
 80488ce: movl   $0x2,0x8(%eax) ; stuff = 2
 80488d5: leave
 80488d6: ret
 80488d7: nop

Let's review what happened. The first action done by the TestClass constructor is to call the base constructor, which sets the virtual table and the field baseStuff:

  this+0x0: 0x8048ad8 (BaseTestClass vtable)
  this+0x4: 1 (BaseTestClass::baseStuff)

Then, the vtable is overridden and the field stuff is inserted under baseStuff:

  this+0x0: 0x8048ac0 (TestClass vtable)
  this+0x4: 1 (BaseTestClass::baseStuff)
  this+0x8: 2 (TestClass::stuff)

Vtables are:

Contents of section .rodata:
 8048ab0 03000000 01000200 00000000 f08a0408  ................
 8048ac0 4c890408 96890408 c4890408 ee890408  L...............
 8048ad0 00000000 0c8b0408 8c880408 ca880408  ................
 8048ae0 f8880408 39546573 74436c61 73730000  ....9TestClass..
 8048af0 68a00408 e48a0408 0c8b0408 31334261  h...........13Ba
 8048b00 73655465 7374436c 61737300 28a00408  seTestClass.(...
 8048b10 fc8a0408                             ....            

// Base1:
 0x8048ad8: _ZN13Base1D1Ev
 0x8048adc: _ZN13Base1D0Ev
 0x8048ae0: _ZN13Base18GetStuffEv

// TestClass:
 0x8048ac0: _ZN9TestClassD1Ev
 0x8048ac4: _ZN9TestClassD0Ev
 0x8048ac8: _ZN9TestClass8GetStuffEv
 0x8048ac8: _ZN9TestClass13AnotherMethodEv

Multiple Inheritance

C++ supports multiple inheritance, so, let's see how it's implemented:

class Base2
{
public:
  // _ZN5Base2C1Ev
  Base2()
  {
    b2 = 1;
  }
  // _ZN5Base2D*Ev
  virtual ~Base2()
  {
    b2 = 0;
  }
  // _ZN5Base29GetStuff2Ev
  virtual int GetStuff2()
  {
    return b2;
  }
protected:
  int b2;

};

class Base1
{
public:
  // _ZN5Base1C1Ev
  Base1()
  {
    baseStuff = 1;
  }
  // _ZN5Base1D*Ev
  virtual ~Base1()
  {
    baseStuff = 0;
  }
  // _ZN5Base18GetStuffEv
  virtual int GetStuff()
  {
    return baseStuff;
  }
protected:
  int baseStuff;

};

class TestClass : public Base1, public Base2
{
public:
  // _ZN9TestClassC1Ev
  TestClass()
  {
    stuff = 2;
  }
  // _ZN9TestClassD*Ev
  // _ZThn8_N9TestClassD*Ev
  virtual ~TestClass()
  {
    stuff = 0;
  }
  // _ZN9TestClass9GetStuff2Ev
  // _ZThn8_N9TestClass9GetStuff2Ev
  virtual int GetStuff2()
  {
    return Base2::GetStuff2() + stuff;
  }
  // _ZN9TestClass8GetStuffEv
  virtual int GetStuff()
  {
    return Base1::GetStuff() + stuff;
  }

private:
  int stuff;
};

// _Z4DoItP5Base1P5Base2
int DoIt(Base1* bt1, Base2* bt2)
{
  return bt1->GetStuff() + bt2->GetStuff2();
}

int main()
{
  TestClass* t1 = new TestClass();
  return DoIt(t1, t1);
}

For each method, I wrote their mangled name as comment. As you may have notice, the method TestClass::GetStuff2() and TestClass::~TestClass() have two mangled names, that is, they are mapped into two different functions. The first is the usual method implementation, while the second moves the this reference and jumps to the right method:

08048a7b <_ZThn8_N9TestClass9GetStuff2Ev>:
 8048a7b: subl   $0x8,0x4(%esp)
 8048a80: jmp    8048a4e <_ZN9TestClass9GetStuff2Ev>

The reason is simple, but it will be more clear after we see the memory layout, so let's analyze the constructors:

080488ac <_ZN5Base1C1Ev>:
 80488ac: push   %ebp
 80488ad: mov    %esp,%ebp
 80488af: lea    -0x1010(%esp),%esp
 80488b6: orl    $0x0,(%esp)
 80488ba: lea    0x1010(%esp),%esp
 80488c1: mov    0x8(%ebp),%eax    ; this pointer
 80488c4: movl   $0x8048bd8,(%eax) ; Base1 vtable (0x8048bd8)
 80488ca: mov    0x8(%ebp),%eax
 80488cd: movl   $0x1,0x4(%eax)    ; baseStuff = 1
 80488d4: pop    %ebp
 80488d5: ret

080487f8 <_ZN5Base2C1Ev>:
 80487f8: push   %ebp
 80487f9: mov    %esp,%ebp
 80487fb: lea    -0x1010(%esp),%esp
 8048802: orl    $0x0,(%esp)
 8048806: lea    0x1010(%esp),%esp
 804880d: mov    0x8(%ebp),%eax    ; this pointer
 8048810: movl   $0x8048bf0,(%eax) ; Base2 vtable (0x8048bf0)
 8048816: mov    0x8(%ebp),%eax
 8048819: movl   $0x1,0x4(%eax)    ; b2 = 4
 8048820: pop    %ebp
 8048821: ret

08048982 <_ZN9TestClassC1Ev>:
 8048982: push   %ebp
 8048983: mov    %esp,%ebp
 8048985: lea    -0x1028(%esp),%esp
 804898c: orl    $0x0,(%esp)
 8048990: lea    0x1010(%esp),%esp
 8048997: mov    0x8(%ebp),%eax          ; this pointer
 804899a: mov    %eax,(%esp)
 804899d: call   80488ce <_ZN5Base1C1Ev> ; Base1 constructor
 80489a2: mov    0x8(%ebp),%eax
 80489a5: add    $0x8,%eax               ; this + 8
 80489a8: mov    %eax,(%esp)
 80489ab: call   8048838 <_ZN5Base2C1Ev> ; Base2 constructor
 80489b0: mov    0x8(%ebp),%eax
 80489b3: movl   $0x8048ba8,(%eax)       ; Base1 vtable overwrite (0x8048ba8)
 80489b9: mov    0x8(%ebp),%eax
 80489bc: movl   $0x8048bc0,0x8(%eax)    ; Base2 vtable overwrite (0x8048bc0)
 80489c3: mov    0x8(%ebp),%eax
 80489c6: movl   $0x2,0x10(%eax)         ; stuff = 2
 80489cd: leave
 80489ce: ret

Note the presence of two vtables. The memory layout is the following after calling the two base constructors:

  this+0x0: 0x8048bb8 (Base1 vtable)
  this+0x4: 1 (Base1::baseStuff)
  this+0x8: 0x8048bd0 (Base2 vtable)
  this+0xc: 4 (Base2::b2)

Then, the constructor overwrites both the vtable references and writes its variable:

  this+0x0: 0x8048b88 (Base1 vtable overwrite)
  this+0x4: 1 (Base1::baseStuff)
  this+0x8: 0x8048ba0 (Base2 vtable overwrite)
  this+0xc: 4 (Base2::b2)
  this+0x10: 2 (TestClass::stuff)

Vtables:

Contents of section .rodata:
 8048b80 03000000 01000200 00000000 00000000  ................
 8048b90 00000000 00000000 00000000 00000000  ................
 8048ba0 00000000 208c0408 ce890408 3a8a0408  .... .......:...
 8048bb0 a28a0408 6e8a0408 f8ffffff 208c0408  ....n....... ...
 8048bc0 2f8a0408 678a0408 9b8a0408 00000000  /...g...........
 8048bd0 00000000 488c0408 f6880408 34890408  ....H.......4...
 8048be0 62890408 00000000 00000000 588c0408  b...........X...
 8048bf0 42880408 80880408 ae880408 39546573  B...........9Tes
 8048c00 74436c61 73730000 00000000 00000000  tClass..........
 8048c10 00000000 00000000 00000000 00000000  ................
 8048c20 68b00408 fc8b0408 00000000 02000000  h...............
 8048c30 488c0408 02000000 588c0408 02080000  H.......X.......
 8048c40 35426173 65310000 28b00408 408c0408  5Base1..(...@...
 8048c50 35426173 65320000 28b00408 508c0408  5Base2..(...P...


// Base1:
 0x8048bd8: _ZN5Base1D1Ev
 0x8048bdc: _ZN5Base1D0Ev
 0x8048be0: _ZN5Base18GetStuffEv

// Base2:
 0x8048bf0: _ZN5Base2D1Ev
 0x8048bf4: _ZN5Base2D0Ev
 0x8048bf8: _ZN5Base29GetStuff2Ev

// TestClass vtable1:
 0x8048ba8: _ZN9TestClassD1Ev
 0x8048bac: _ZN9TestClassD0Ev
 0x8048bb0: _ZN9TestClass8GetStuffEv
// TestClass vtable2:
 0x8048bc0: _ZThn8_N9TestClassD1Ev
 0x8048bc4: _ZThn8_N9TestClassD0Ev
 0x8048bc8: _ZThn8_N9TestClass9GetStuff2Ev

By looking at the memory layout and the vtables, the reason of the double functions, such as _ZThn8_N9TestClass9GetStuff2Ev, and of the double vtable is obvious:

08048a7b <_ZThn8_N9TestClass9GetStuff2Ev>:
 8048a7b: subl   $0x8,0x4(%esp)
 8048a80: jmp    8048a4e <_ZN9TestClass9GetStuff2Ev>

The wrapper function move the offset of the this reference, so that the compiler can reuse the implementation of TestClass::GetStuff2. If you cast TestClass* to Base2*, the compiler moves the this reference so that it points to the second vtable. However, since the method is virtual and can be overridden, by calling Base2::GetStuff2, you are actually calling TestClass::GetStuff2But the this reference has been moved. The goal of the wrapper is to adjust that move, restoring this to the orginal location.

For instance, let's see the main and DoIt disassembly:

08048741 <main>:
 ...
 804877b: mov    0x1c(%esp),%eax ; TestClass* tc -> eax
 804877f: add    $0x8,%eax       ; Casting: this = this+0x8 (2nd vtable)
 ...
 8048789: mov    %eax,0x4(%esp)  ; Second parameter of DoIt
 804878d: mov    0x1c(%esp),%eax ; TestClass* tc -> eax
 8048791: mov    %eax,(%esp)     ; First parameter of DoIt
 8048794: call   80486fd <_Z4DoItP5Base1P5Base2>
 ...

080486fd <_Z4DoItP5Base1P5Base2>:
 ...
 8048713: mov    0x8(%ebp),%eax ; Base1* bt1 -> eax
 8048716: mov    (%eax),%eax
 8048718: add    $0x8,%eax
 804871b: mov    (%eax),%eax    ; GetStuff from vtable
 804871d: mov    0x8(%ebp),%edx
 8048720: mov    %edx,(%esp)
 8048723: call   *%eax
 8048725: mov    %eax,%ebx
 8048727: mov    0xc(%ebp),%eax ; Base2* bt2 -> eax
 804872a: mov    (%eax),%eax
 804872c: add    $0x8,%eax
 804872f: mov    (%eax),%eax    ; _ZThn8_N9TestClass9GetStuff2Ev from vtable
 8048731: mov    0xc(%ebp),%edx
 8048734: mov    %edx,(%esp)
 8048737: call   *%eax          ; Actual call to _ZThn8_N9TestClass9GetStuff2Ev
 ...

08048a9b <_ZThn8_N9TestClass9GetStuff2Ev>:
 8048a9b: subl   $0x8,0x4(%esp)  ; 'this' adjustment
 8048aa0: jmp    8048a6e <_ZN9TestClass9GetStuff2Ev>

Calling the wrapper, will cause this to point to the original TestClass pointer, so the function _ZN9TestClass9GetStuff2Ev can be called with consistent data.

No comments: