The previous tutorial was about virtual members. Now we can use that knowledge to analyze inerithance. We will use the usual
TestClass, but now it extends
BaseTestClass:
class BaseTestClass
{
public:
// _ZN13BaseTestClassC1Ev
BaseTestClass()
{
baseStuff = 1;
}
// _ZN13BaseTestClassD*Ev
virtual ~BaseTestClass()
{
baseStuff = 0;
}
// _ZN13BaseTestClass8GetStuffEv
virtual int GetStuff()
{
return baseStuff;
}
protected:
int baseStuff;
};
class TestClass : public BaseTestClass
{
public:
// _ZN9TestClassC1Ev
TestClass()
{
stuff = 2;
}
// _ZN9TestClassD*Ev
virtual ~TestClass()
{
stuff = 0;
}
// _ZN9TestClass8GetStuffEv
virtual int GetStuff()
{
return BaseTestClass::GetStuff() + stuff;
}
// _ZN9TestClass13AnotherMethodEv
virtual int AnotherMethod()
{
return baseStuff;
}
private:
int stuff;
};
int DoIt(BaseTestClass* btc)
{
return btc->GetStuff();
}
int main()
{
BaseTestClass* t1 = new BaseTestClass();
TestClass* t2 = new TestClass();
int a = DoIt(t1) + DoIt(t2) + t2->AnotherMethod();
delete t1;
delete t2;
return a;
}
This time we'll go directly to the constructors to analyze the virtual tables:
080487ee <_ZN13BaseTestClassC1Ev>:
80487ee: push %ebp
80487ef: mov %esp,%ebp
80487f1: lea -0x1010(%esp),%esp
80487f8: orl $0x0,(%esp)
80487fc: lea 0x1010(%esp),%esp
8048803: mov 0x8(%ebp),%eax ; this pointer
8048806: movl $0x8048ad8,(%eax) ; vtable = 0x8048ad8
804880c: mov 0x8(%ebp),%eax
804880f: movl $0x1,0x4(%eax) ; baseStuff = 1
8048816: pop %ebp
8048817: ret
080488a2 <_ZN9TestClassC1Ev>:
80488a2: push %ebp
80488a3: mov %esp,%ebp
80488a5: lea -0x1028(%esp),%esp
80488ac: orl $0x0,(%esp)
80488b0: lea 0x1010(%esp),%esp
80488b7: mov 0x8(%ebp),%eax
80488ba: mov %eax,(%esp) ; this pointer
80488bd: call 80487ee <_ZN13BaseTestClassC1Ev> ; Call to base constructor
80488c2: mov 0x8(%ebp),%eax
80488c5: movl $0x8048ac0,(%eax) ; vtable = 0x8048ac0 (overwrite)
80488cb: mov 0x8(%ebp),%eax
80488ce: movl $0x2,0x8(%eax) ; stuff = 2
80488d5: leave
80488d6: ret
80488d7: nop
Let's review what happened. The first action done by the
TestClass constructor is to call the base constructor, which sets the virtual table and the field
baseStuff:
this+0x0: 0x8048ad8 (BaseTestClass vtable)
this+0x4: 1 (BaseTestClass::baseStuff)
Then, the vtable is overridden and the field stuff is inserted under
baseStuff:
this+0x0: 0x8048ac0 (TestClass vtable)
this+0x4: 1 (BaseTestClass::baseStuff)
this+0x8: 2 (TestClass::stuff)
Vtables are:
Contents of section .rodata:
8048ab0 03000000 01000200 00000000 f08a0408 ................
8048ac0 4c890408 96890408 c4890408 ee890408 L...............
8048ad0 00000000 0c8b0408 8c880408 ca880408 ................
8048ae0 f8880408 39546573 74436c61 73730000 ....9TestClass..
8048af0 68a00408 e48a0408 0c8b0408 31334261 h...........13Ba
8048b00 73655465 7374436c 61737300 28a00408 seTestClass.(...
8048b10 fc8a0408 ....
// Base1:
0x8048ad8: _ZN13Base1D1Ev
0x8048adc: _ZN13Base1D0Ev
0x8048ae0: _ZN13Base18GetStuffEv
// TestClass:
0x8048ac0: _ZN9TestClassD1Ev
0x8048ac4: _ZN9TestClassD0Ev
0x8048ac8: _ZN9TestClass8GetStuffEv
0x8048ac8: _ZN9TestClass13AnotherMethodEv
Multiple Inheritance
C++ supports multiple inheritance, so, let's see how it's implemented:
class Base2
{
public:
// _ZN5Base2C1Ev
Base2()
{
b2 = 1;
}
// _ZN5Base2D*Ev
virtual ~Base2()
{
b2 = 0;
}
// _ZN5Base29GetStuff2Ev
virtual int GetStuff2()
{
return b2;
}
protected:
int b2;
};
class Base1
{
public:
// _ZN5Base1C1Ev
Base1()
{
baseStuff = 1;
}
// _ZN5Base1D*Ev
virtual ~Base1()
{
baseStuff = 0;
}
// _ZN5Base18GetStuffEv
virtual int GetStuff()
{
return baseStuff;
}
protected:
int baseStuff;
};
class TestClass : public Base1, public Base2
{
public:
// _ZN9TestClassC1Ev
TestClass()
{
stuff = 2;
}
// _ZN9TestClassD*Ev
// _ZThn8_N9TestClassD*Ev
virtual ~TestClass()
{
stuff = 0;
}
// _ZN9TestClass9GetStuff2Ev
// _ZThn8_N9TestClass9GetStuff2Ev
virtual int GetStuff2()
{
return Base2::GetStuff2() + stuff;
}
// _ZN9TestClass8GetStuffEv
virtual int GetStuff()
{
return Base1::GetStuff() + stuff;
}
private:
int stuff;
};
// _Z4DoItP5Base1P5Base2
int DoIt(Base1* bt1, Base2* bt2)
{
return bt1->GetStuff() + bt2->GetStuff2();
}
int main()
{
TestClass* t1 = new TestClass();
return DoIt(t1, t1);
}
For each method, I wrote their mangled name as comment. As you may have notice, the method TestClass::GetStuff2() and TestClass::~TestClass() have two mangled names, that is, they are mapped into two different functions. The first is the usual method implementation, while the second moves the this reference and jumps to the right method:
08048a7b <_ZThn8_N9TestClass9GetStuff2Ev>:
8048a7b: subl $0x8,0x4(%esp)
8048a80: jmp 8048a4e <_ZN9TestClass9GetStuff2Ev>
The reason is simple, but it will be more clear after we see the memory layout, so let's analyze the constructors:
080488ac <_ZN5Base1C1Ev>:
80488ac: push %ebp
80488ad: mov %esp,%ebp
80488af: lea -0x1010(%esp),%esp
80488b6: orl $0x0,(%esp)
80488ba: lea 0x1010(%esp),%esp
80488c1: mov 0x8(%ebp),%eax ; this pointer
80488c4: movl $0x8048bd8,(%eax) ; Base1 vtable (0x8048bd8)
80488ca: mov 0x8(%ebp),%eax
80488cd: movl $0x1,0x4(%eax) ; baseStuff = 1
80488d4: pop %ebp
80488d5: ret
080487f8 <_ZN5Base2C1Ev>:
80487f8: push %ebp
80487f9: mov %esp,%ebp
80487fb: lea -0x1010(%esp),%esp
8048802: orl $0x0,(%esp)
8048806: lea 0x1010(%esp),%esp
804880d: mov 0x8(%ebp),%eax ; this pointer
8048810: movl $0x8048bf0,(%eax) ; Base2 vtable (0x8048bf0)
8048816: mov 0x8(%ebp),%eax
8048819: movl $0x1,0x4(%eax) ; b2 = 4
8048820: pop %ebp
8048821: ret
08048982 <_ZN9TestClassC1Ev>:
8048982: push %ebp
8048983: mov %esp,%ebp
8048985: lea -0x1028(%esp),%esp
804898c: orl $0x0,(%esp)
8048990: lea 0x1010(%esp),%esp
8048997: mov 0x8(%ebp),%eax ; this pointer
804899a: mov %eax,(%esp)
804899d: call 80488ce <_ZN5Base1C1Ev> ; Base1 constructor
80489a2: mov 0x8(%ebp),%eax
80489a5: add $0x8,%eax ; this + 8
80489a8: mov %eax,(%esp)
80489ab: call 8048838 <_ZN5Base2C1Ev> ; Base2 constructor
80489b0: mov 0x8(%ebp),%eax
80489b3: movl $0x8048ba8,(%eax) ; Base1 vtable overwrite (0x8048ba8)
80489b9: mov 0x8(%ebp),%eax
80489bc: movl $0x8048bc0,0x8(%eax) ; Base2 vtable overwrite (0x8048bc0)
80489c3: mov 0x8(%ebp),%eax
80489c6: movl $0x2,0x10(%eax) ; stuff = 2
80489cd: leave
80489ce: ret
Note the presence of two vtables. The memory layout is the following after calling the two base constructors:
this+0x0: 0x8048bb8 (Base1 vtable)
this+0x4: 1 (Base1::baseStuff)
this+0x8: 0x8048bd0 (Base2 vtable)
this+0xc: 4 (Base2::b2)
Then, the constructor overwrites both the vtable references and writes its variable:
this+0x0: 0x8048b88 (Base1 vtable overwrite)
this+0x4: 1 (Base1::baseStuff)
this+0x8: 0x8048ba0 (Base2 vtable overwrite)
this+0xc: 4 (Base2::b2)
this+0x10: 2 (TestClass::stuff)
Vtables:
Contents of section .rodata:
8048b80 03000000 01000200 00000000 00000000 ................
8048b90 00000000 00000000 00000000 00000000 ................
8048ba0 00000000 208c0408 ce890408 3a8a0408 .... .......:...
8048bb0 a28a0408 6e8a0408 f8ffffff 208c0408 ....n....... ...
8048bc0 2f8a0408 678a0408 9b8a0408 00000000 /...g...........
8048bd0 00000000 488c0408 f6880408 34890408 ....H.......4...
8048be0 62890408 00000000 00000000 588c0408 b...........X...
8048bf0 42880408 80880408 ae880408 39546573 B...........9Tes
8048c00 74436c61 73730000 00000000 00000000 tClass..........
8048c10 00000000 00000000 00000000 00000000 ................
8048c20 68b00408 fc8b0408 00000000 02000000 h...............
8048c30 488c0408 02000000 588c0408 02080000 H.......X.......
8048c40 35426173 65310000 28b00408 408c0408 5Base1..(...@...
8048c50 35426173 65320000 28b00408 508c0408 5Base2..(...P...
// Base1:
0x8048bd8: _ZN5Base1D1Ev
0x8048bdc: _ZN5Base1D0Ev
0x8048be0: _ZN5Base18GetStuffEv
// Base2:
0x8048bf0: _ZN5Base2D1Ev
0x8048bf4: _ZN5Base2D0Ev
0x8048bf8: _ZN5Base29GetStuff2Ev
// TestClass vtable1:
0x8048ba8: _ZN9TestClassD1Ev
0x8048bac: _ZN9TestClassD0Ev
0x8048bb0: _ZN9TestClass8GetStuffEv
// TestClass vtable2:
0x8048bc0: _ZThn8_N9TestClassD1Ev
0x8048bc4: _ZThn8_N9TestClassD0Ev
0x8048bc8: _ZThn8_N9TestClass9GetStuff2Ev
By looking at the memory layout and the vtables, the reason of the double functions, such as
_ZThn8_N9TestClass9GetStuff2Ev, and of the double vtable is obvious:
08048a7b <_ZThn8_N9TestClass9GetStuff2Ev>:
8048a7b: subl $0x8,0x4(%esp)
8048a80: jmp 8048a4e <_ZN9TestClass9GetStuff2Ev>
The wrapper function move the offset of the
this reference, so that the compiler can reuse the implementation of
TestClass::GetStuff2. If you cast
TestClass* to
Base2*, the compiler moves the this reference so that it points to the second vtable. However, since the method is virtual and can be overridden, by calling Base2::GetStuff2, you are actually calling TestClass::GetStuff2. But the
this reference has been moved. The goal of the wrapper is to adjust that move, restoring
this to the orginal location.
For instance, let's see the
main and
DoIt disassembly:
08048741 <main>:
...
804877b: mov 0x1c(%esp),%eax ; TestClass* tc -> eax
804877f: add $0x8,%eax ; Casting: this = this+0x8 (2nd vtable)
...
8048789: mov %eax,0x4(%esp) ; Second parameter of DoIt
804878d: mov 0x1c(%esp),%eax ; TestClass* tc -> eax
8048791: mov %eax,(%esp) ; First parameter of DoIt
8048794: call 80486fd <_Z4DoItP5Base1P5Base2>
...
080486fd <_Z4DoItP5Base1P5Base2>:
...
8048713: mov 0x8(%ebp),%eax ; Base1* bt1 -> eax
8048716: mov (%eax),%eax
8048718: add $0x8,%eax
804871b: mov (%eax),%eax ; GetStuff from vtable
804871d: mov 0x8(%ebp),%edx
8048720: mov %edx,(%esp)
8048723: call *%eax
8048725: mov %eax,%ebx
8048727: mov 0xc(%ebp),%eax ; Base2* bt2 -> eax
804872a: mov (%eax),%eax
804872c: add $0x8,%eax
804872f: mov (%eax),%eax ; _ZThn8_N9TestClass9GetStuff2Ev from vtable
8048731: mov 0xc(%ebp),%edx
8048734: mov %edx,(%esp)
8048737: call *%eax ; Actual call to _ZThn8_N9TestClass9GetStuff2Ev
...
08048a9b <_ZThn8_N9TestClass9GetStuff2Ev>:
8048a9b: subl $0x8,0x4(%esp) ; 'this' adjustment
8048aa0: jmp 8048a6e <_ZN9TestClass9GetStuff2Ev>
Calling the wrapper, will cause this to point to the original
TestClass pointer, so the function
_ZN9TestClass9GetStuff2Ev can be called with consistent data.